From 7a2d96c7c898085428e627f8cc61fd08bcb79fdc Mon Sep 17 00:00:00 2001
From: Andy Heninger <andy.heninger@gmail.com>
Date: Tue, 4 Jan 2011 07:42:32 +0000
Subject: [PATCH] ICU-8199 Fix use of out-of-scope object in DigitList, and
 related rounding problems in Formattable::getInt64

X-SVN-Rev: 29259
---
 icu4c/source/i18n/digitlst.cpp         |  36 ++++---
 icu4c/source/i18n/fmtable.cpp          |  24 ++++-
 icu4c/source/test/intltest/numrgts.cpp | 137 ++++++++++++++++++++++++-
 icu4c/source/test/intltest/numrgts.h   |   3 +-
 4 files changed, 177 insertions(+), 23 deletions(-)

diff --git a/icu4c/source/i18n/digitlst.cpp b/icu4c/source/i18n/digitlst.cpp
index 4a146f4428..f3040d8733 100644
--- a/icu4c/source/i18n/digitlst.cpp
+++ b/icu4c/source/i18n/digitlst.cpp
@@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1997-2010, International Business Machines
+*   Copyright (C) 1997-2011, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *
@@ -482,11 +482,11 @@ int32_t DigitList::getLong() /*const*/
 
 
 /**
- *  convert this number to an int64_t.   Round if there is a fractional part.
+ *  convert this number to an int64_t.   Truncate if there is a fractional part.
  *  Return zero if the number cannot be represented.
  */
 int64_t DigitList::getInt64() /*const*/ {
-    // Round if non-integer.   (Truncate or round?)
+    // Truncate if non-integer.
     // Return 0 if out of range.
     // Range of in64_t is -9223372036854775808 to 9223372036854775807  (19 digits)
     //
@@ -494,23 +494,27 @@ int64_t DigitList::getInt64() /*const*/ {
         // Overflow, absolute value too big.
         return 0;
     }
-    decNumber *workingNum = fDecNumber;
 
-    if (fDecNumber->exponent != 0) {
-        // Force to an integer, with zero exponent, rounding if necessary.
-        DigitList copy(*this);
-        DigitList zero;
-        uprv_decNumberQuantize(copy.fDecNumber, copy.fDecNumber, zero.fDecNumber, &fContext);
-        workingNum = copy.fDecNumber;
-    }
+    // The number of integer digits may differ from the number of digits stored
+    //   in the decimal number.
+    //     for 12.345  numIntDigits = 2, number->digits = 5
+    //     for 12E4    numIntDigits = 6, number->digits = 2
+    // The conversion ignores the fraction digits in the first case,
+    // and fakes up extra zero digits in the second.
+    // TODO:  It would be faster to store a table of powers of ten to multiply by
+    //        instead of looping over zero digits, multiplying each time.
 
+    int32_t numIntDigits = fDecNumber->digits + fDecNumber->exponent;
     uint64_t value = 0;
-    int32_t numDigits = workingNum->digits;
-    for (int i = numDigits-1; i>=0 ; --i) {
-        int v = workingNum->lsu[i];
+    for (int32_t i = 0; i < numIntDigits; i++) {
+        // Loop is iterating over digits starting with the most significant.
+        // Numbers are stored with the least significant digit at index zero.
+        int32_t digitIndex = fDecNumber->digits - i - 1;
+        int32_t v = (digitIndex >= 0) ? fDecNumber->lsu[digitIndex] : 0;
         value = value * (uint64_t)10 + (uint64_t)v;
     }
-    if (decNumberIsNegative(workingNum)) {
+
+    if (decNumberIsNegative(fDecNumber)) {
         value = ~value;
         value += 1;
     }
@@ -519,7 +523,7 @@ int64_t DigitList::getInt64() /*const*/ {
     // Check overflow.  It's convenient that the MSD is 9 only on overflow, the amount of
     //                  overflow can't wrap too far.  The test will also fail -0, but
     //                  that does no harm; the right answer is 0.
-    if (numDigits == 19) {
+    if (numIntDigits == 19) {
         if (( decNumberIsNegative(fDecNumber) && svalue>0) ||
             (!decNumberIsNegative(fDecNumber) && svalue<0)) {
             svalue = 0;
diff --git a/icu4c/source/i18n/fmtable.cpp b/icu4c/source/i18n/fmtable.cpp
index 4c755bf5e9..e6568e3a7a 100644
--- a/icu4c/source/i18n/fmtable.cpp
+++ b/icu4c/source/i18n/fmtable.cpp
@@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 1997-2010, International Business Machines Corporation and    *
+* Copyright (C) 1997-2011, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
@@ -17,6 +17,7 @@
 
 #if !UCONFIG_NO_FORMATTING
 
+#include <math.h>
 #include "unicode/fmtable.h"
 #include "unicode/ustring.h"
 #include "unicode/measure.h"
@@ -427,6 +428,12 @@ Formattable::getLong(UErrorCode& status) const
 }
 
 // -------------------------------------
+// Maximum int that can be represented exactly in a double.  (53 bits)
+//    Larger ints may be rounded to a near-by value as not all are representable.
+// TODO:  move this constant elsewhere, possibly configure it for different
+//        floating point formats, if any non-standard ones are still in use.
+static const int64_t U_DOUBLE_MAX_EXACT_INT = 9007199254740992;
+
 int64_t
 Formattable::getInt64(UErrorCode& status) const
 {
@@ -439,21 +446,28 @@ Formattable::getInt64(UErrorCode& status) const
     case Formattable::kInt64: 
         return fValue.fInt64;
     case Formattable::kDouble:
-        if (fValue.fDouble >= U_INT64_MAX) {
+        if (fValue.fDouble > (double)U_INT64_MAX) {
             status = U_INVALID_FORMAT_ERROR;
             return U_INT64_MAX;
-        } else if (fValue.fDouble <= U_INT64_MIN) {
+        } else if (fValue.fDouble < (double)U_INT64_MIN) {
             status = U_INVALID_FORMAT_ERROR;
             return U_INT64_MIN;
+        } else if (fabs(fValue.fDouble) > U_DOUBLE_MAX_EXACT_INT && fDecimalNum != NULL) {
+            int64_t val = fDecimalNum->getInt64();
+            if (val != 0) {
+                return val;
+            } else {
+                status = U_INVALID_FORMAT_ERROR;
+                return fValue.fDouble > 0 ? U_INT64_MAX : U_INT64_MIN;
+            }
         } else {
             return (int64_t)fValue.fDouble;
-        }
+        } 
     case Formattable::kObject:
         if (fValue.fObject == NULL) {
             status = U_MEMORY_ALLOCATION_ERROR;
             return 0;
         }
-        // TODO Later replace this with instanceof call
         if (instanceOfMeasure(fValue.fObject)) {
             return ((const Measure*) fValue.fObject)->
                 getNumber().getInt64(status);
diff --git a/icu4c/source/test/intltest/numrgts.cpp b/icu4c/source/test/intltest/numrgts.cpp
index ab55bc2d2f..72a2c824b7 100644
--- a/icu4c/source/test/intltest/numrgts.cpp
+++ b/icu4c/source/test/intltest/numrgts.cpp
@@ -1,5 +1,5 @@
 /***********************************************************************
- * Copyright (c) 1997-2010, International Business Machines Corporation
+ * Copyright (c) 1997-2011, International Business Machines Corporation
  * and others. All Rights Reserved.
  ***********************************************************************/
  
@@ -168,6 +168,7 @@ NumberFormatRegressionTest::runIndexedTest( int32_t index, UBool exec, const cha
         CASE(58,Test4243011);
         CASE(59,Test4243108);
         CASE(60,TestJ691);
+        CASE(61,Test8199);
 
         default: name = ""; break;
     }
@@ -2686,4 +2687,138 @@ void NumberFormatRegressionTest::TestJ691(void) {
     delete df;
 }
 
+//---------------------------------------------------------------------------
+//
+//   Error Checking / Reporting macros
+//
+//---------------------------------------------------------------------------
+#define TEST_CHECK_STATUS(status) \
+    if (U_FAILURE(status)) {\
+        errln("File %s, Line %d.  status=%s\n", __FILE__, __LINE__, u_errorName(status));\
+        return;\
+    }
+
+#define TEST_ASSERT(expr) \
+    if ((expr)==FALSE) {\
+        errln("File %s, line %d: Assertion Failed: " #expr "\n", __FILE__, __LINE__);\
+    }
+
+
+// Ticket 8199:  Parse failure for numbers in the range of 1E10 - 1E18
+
+void NumberFormatRegressionTest::Test8199(void) {
+    UErrorCode status = U_ZERO_ERROR;
+    NumberFormat *nf = NumberFormat::createInstance(Locale::getEnglish(), status);
+    TEST_CHECK_STATUS(status);
+
+    // Note:  Retrieving parsed values from a Formattable as a reduced-precision type
+    //        should always truncate, no other rounding scheme.
+
+    UnicodeString numStr = "1000000000.6";   // 9 zeroes
+    Formattable val;
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(1000000000 == val.getInt64(status));
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(1000000000.6 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "100000000000000001.1";   // approx 1E17, parses as a double rather
+                                       //   than int64 because of the fraction
+                                       //   even though int64 is more precise.
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(100000000000000001LL == val.getInt64(status));
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(100000000000000000 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "1E17";  // Parses with the internal decimal number having non-zero exponent
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kInt64 == val.getType());
+    TEST_ASSERT(100000000000000000LL == val.getInt64());
+    TEST_ASSERT(1.0E17 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "9223372036854775807";  // largest int64_t
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kInt64 == val.getType());
+    TEST_ASSERT(9223372036854775807LL == val.getInt64());
+    // In the following check, note that a substantial range of integers will
+    //    convert to the same double value.
+    TEST_ASSERT(9223372036854775810.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "-9223372036854775808";  // smallest int64_t
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kInt64 == val.getType());
+    // TEST_ASSERT(-9223372036854775808LL == val.getInt64()); // Compiler chokes on constant.
+    TEST_ASSERT((int64_t)0x8000000000000000LL == val.getInt64());
+    TEST_ASSERT(-9223372036854775808.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "9223372036854775808";  // largest int64_t + 1
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(9223372036854775807LL == val.getInt64(status));
+    TEST_ASSERT(status == U_INVALID_FORMAT_ERROR);
+    status = U_ZERO_ERROR;
+    TEST_ASSERT(9223372036854775810.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "-9223372036854775809";  // smallest int64_t - 1
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    // TEST_ASSERT(-9223372036854775808LL == val.getInt64(status));  // spurious compiler warnings
+    TEST_ASSERT((int64_t)0x8000000000000000LL == val.getInt64(status));
+    TEST_ASSERT(status == U_INVALID_FORMAT_ERROR);
+    status = U_ZERO_ERROR;
+    TEST_ASSERT(-9223372036854775810.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    // Test values near the limit of where doubles can represent all integers.
+    // The implementation strategy of getInt64() changes at this boundary.
+    // Strings to be parsed include a decimal fraction to force them to be
+    //   parsed as doubles rather than ints.  The fraction is discarded
+    //   from the parsed double value because it is beyond what can be represented.
+
+    status = U_ZERO_ERROR;
+    numStr = "9007199254740991.1";  // largest 53 bit int
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    // printf("getInt64() returns %lld\n", val.getInt64(status));
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(9007199254740991LL == val.getInt64(status));
+    TEST_ASSERT(9007199254740991.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    status = U_ZERO_ERROR;
+    numStr = "9007199254740992.1";  // 54 bits for the int part.
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(9007199254740992LL == val.getInt64(status));
+    TEST_ASSERT(9007199254740992.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    status = U_ZERO_ERROR;
+    numStr = "9007199254740993.1";  // 54 bits for the int part.  Double will round
+    nf->parse(numStr, val, status); //    the ones digit, putting it up to ...994
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(9007199254740993LL == val.getInt64(status));
+    TEST_ASSERT(9007199254740994.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    delete nf;
+}
+
+
 #endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/icu4c/source/test/intltest/numrgts.h b/icu4c/source/test/intltest/numrgts.h
index d366676c1a..ec82e35500 100644
--- a/icu4c/source/test/intltest/numrgts.h
+++ b/icu4c/source/test/intltest/numrgts.h
@@ -1,6 +1,6 @@
 /***********************************************************************
  * COPYRIGHT: 
- * Copyright (c) 1997-2009, International Business Machines Corporation
+ * Copyright (c) 1997-2011, International Business Machines Corporation
  * and others. All Rights Reserved.
  ***********************************************************************/
 
@@ -90,6 +90,7 @@ public:
     void Test4243011(void);
     void Test4243108(void);
     void TestJ691(void);
+    void Test8199(void);
 
 protected:
     UBool failure(UErrorCode status, const UnicodeString& msg, UBool possibleDataError=FALSE);