ICU-8199 Fix use of out-of-scope object in DigitList, and related rounding problems in Formattable::getInt64

X-SVN-Rev: 29259
2011-01-04 07:42:32 +00:00 · 2011-01-04 07:42:32 +00:00 · 7a2d96c7c8
commit 7a2d96c7c8
parent cc9d054eb7
4 changed files with 177 additions and 23 deletions
--- a/icu4c/source/i18n/digitlst.cpp
+++ b/icu4c/source/i18n/digitlst.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1997-2010, International Business Machines
+*   Copyright (C) 1997-2011, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *
@ -482,11 +482,11 @@ int32_t DigitList::getLong() /*const*/


 /**
- *  convert this number to an int64_t.   Round if there is a fractional part.
+ *  convert this number to an int64_t.   Truncate if there is a fractional part.
 *  Return zero if the number cannot be represented.
 */
 int64_t DigitList::getInt64() /*const*/ {
-    // Round if non-integer.   (Truncate or round?)
+    // Truncate if non-integer.
    // Return 0 if out of range.
    // Range of in64_t is -9223372036854775808 to 9223372036854775807  (19 digits)
    //
@ -494,23 +494,27 @@ int64_t DigitList::getInt64() /*const*/ {
        // Overflow, absolute value too big.
        return 0;
    }
-    decNumber *workingNum = fDecNumber;

-    if (fDecNumber->exponent != 0) {
-        // Force to an integer, with zero exponent, rounding if necessary.
-        DigitList copy(*this);
-        DigitList zero;
-        uprv_decNumberQuantize(copy.fDecNumber, copy.fDecNumber, zero.fDecNumber, &fContext);
-        workingNum = copy.fDecNumber;
-    }
+    // The number of integer digits may differ from the number of digits stored
+    //   in the decimal number.
+    //     for 12.345  numIntDigits = 2, number->digits = 5
+    //     for 12E4    numIntDigits = 6, number->digits = 2
+    // The conversion ignores the fraction digits in the first case,
+    // and fakes up extra zero digits in the second.
+    // TODO:  It would be faster to store a table of powers of ten to multiply by
+    //        instead of looping over zero digits, multiplying each time.

+    int32_t numIntDigits = fDecNumber->digits + fDecNumber->exponent;
    uint64_t value = 0;
-    int32_t numDigits = workingNum->digits;
-    for (int i = numDigits-1; i>=0 ; --i) {
-        int v = workingNum->lsu[i];
+    for (int32_t i = 0; i < numIntDigits; i++) {
+        // Loop is iterating over digits starting with the most significant.
+        // Numbers are stored with the least significant digit at index zero.
+        int32_t digitIndex = fDecNumber->digits - i - 1;
+        int32_t v = (digitIndex >= 0) ? fDecNumber->lsu[digitIndex] : 0;
        value = value * (uint64_t)10 + (uint64_t)v;
    }
-    if (decNumberIsNegative(workingNum)) {
+
+    if (decNumberIsNegative(fDecNumber)) {
        value = ~value;
        value += 1;
    }
@ -519,7 +523,7 @@ int64_t DigitList::getInt64() /*const*/ {
    // Check overflow.  It's convenient that the MSD is 9 only on overflow, the amount of
    //                  overflow can't wrap too far.  The test will also fail -0, but
    //                  that does no harm; the right answer is 0.
-    if (numDigits == 19) {
+    if (numIntDigits == 19) {
        if (( decNumberIsNegative(fDecNumber) && svalue>0) ||
            (!decNumberIsNegative(fDecNumber) && svalue<0)) {
            svalue = 0;
--- a/icu4c/source/i18n/fmtable.cpp
+++ b/icu4c/source/i18n/fmtable.cpp
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 1997-2010, International Business Machines Corporation and    *
+* Copyright (C) 1997-2011, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
@ -17,6 +17,7 @@

 #if !UCONFIG_NO_FORMATTING

+#include <math.h>
 #include "unicode/fmtable.h"
 #include "unicode/ustring.h"
 #include "unicode/measure.h"
@ -427,6 +428,12 @@ Formattable::getLong(UErrorCode& status) const
 }

 // -------------------------------------
+// Maximum int that can be represented exactly in a double.  (53 bits)
+//    Larger ints may be rounded to a near-by value as not all are representable.
+// TODO:  move this constant elsewhere, possibly configure it for different
+//        floating point formats, if any non-standard ones are still in use.
+static const int64_t U_DOUBLE_MAX_EXACT_INT = 9007199254740992;
+
 int64_t
 Formattable::getInt64(UErrorCode& status) const
 {
@ -439,21 +446,28 @@ Formattable::getInt64(UErrorCode& status) const
    case Formattable::kInt64: 
        return fValue.fInt64;
    case Formattable::kDouble:
-        if (fValue.fDouble >= U_INT64_MAX) {
+        if (fValue.fDouble > (double)U_INT64_MAX) {
            status = U_INVALID_FORMAT_ERROR;
            return U_INT64_MAX;
-        } else if (fValue.fDouble <= U_INT64_MIN) {
+        } else if (fValue.fDouble < (double)U_INT64_MIN) {
            status = U_INVALID_FORMAT_ERROR;
            return U_INT64_MIN;
+        } else if (fabs(fValue.fDouble) > U_DOUBLE_MAX_EXACT_INT && fDecimalNum != NULL) {
+            int64_t val = fDecimalNum->getInt64();
+            if (val != 0) {
+                return val;
+            } else {
+                status = U_INVALID_FORMAT_ERROR;
+                return fValue.fDouble > 0 ? U_INT64_MAX : U_INT64_MIN;
+            }
        } else {
            return (int64_t)fValue.fDouble;
-        }
+        } 
    case Formattable::kObject:
        if (fValue.fObject == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return 0;
        }
-        // TODO Later replace this with instanceof call
        if (instanceOfMeasure(fValue.fObject)) {
            return ((const Measure*) fValue.fObject)->
                getNumber().getInt64(status);
--- a/icu4c/source/test/intltest/numrgts.cpp
+++ b/icu4c/source/test/intltest/numrgts.cpp
@ -1,5 +1,5 @@
 /***********************************************************************
- * Copyright (c) 1997-2010, International Business Machines Corporation
+ * Copyright (c) 1997-2011, International Business Machines Corporation
 * and others. All Rights Reserved.
 ***********************************************************************/
 
@ -168,6 +168,7 @@ NumberFormatRegressionTest::runIndexedTest( int32_t index, UBool exec, const cha
        CASE(58,Test4243011);
        CASE(59,Test4243108);
        CASE(60,TestJ691);
+        CASE(61,Test8199);

        default: name = ""; break;
    }
@ -2686,4 +2687,138 @@ void NumberFormatRegressionTest::TestJ691(void) {
    delete df;
 }

+//---------------------------------------------------------------------------
+//
+//   Error Checking / Reporting macros
+//
+//---------------------------------------------------------------------------
+#define TEST_CHECK_STATUS(status) \
+    if (U_FAILURE(status)) {\
+        errln("File %s, Line %d.  status=%s\n", __FILE__, __LINE__, u_errorName(status));\
+        return;\
+    }
+
+#define TEST_ASSERT(expr) \
+    if ((expr)==FALSE) {\
+        errln("File %s, line %d: Assertion Failed: " #expr "\n", __FILE__, __LINE__);\
+    }
+
+
+// Ticket 8199:  Parse failure for numbers in the range of 1E10 - 1E18
+
+void NumberFormatRegressionTest::Test8199(void) {
+    UErrorCode status = U_ZERO_ERROR;
+    NumberFormat *nf = NumberFormat::createInstance(Locale::getEnglish(), status);
+    TEST_CHECK_STATUS(status);
+
+    // Note:  Retrieving parsed values from a Formattable as a reduced-precision type
+    //        should always truncate, no other rounding scheme.
+
+    UnicodeString numStr = "1000000000.6";   // 9 zeroes
+    Formattable val;
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(1000000000 == val.getInt64(status));
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(1000000000.6 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "100000000000000001.1";   // approx 1E17, parses as a double rather
+                                       //   than int64 because of the fraction
+                                       //   even though int64 is more precise.
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(100000000000000001LL == val.getInt64(status));
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(100000000000000000 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "1E17";  // Parses with the internal decimal number having non-zero exponent
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kInt64 == val.getType());
+    TEST_ASSERT(100000000000000000LL == val.getInt64());
+    TEST_ASSERT(1.0E17 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "9223372036854775807";  // largest int64_t
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kInt64 == val.getType());
+    TEST_ASSERT(9223372036854775807LL == val.getInt64());
+    // In the following check, note that a substantial range of integers will
+    //    convert to the same double value.
+    TEST_ASSERT(9223372036854775810.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "-9223372036854775808";  // smallest int64_t
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kInt64 == val.getType());
+    // TEST_ASSERT(-9223372036854775808LL == val.getInt64()); // Compiler chokes on constant.
+    TEST_ASSERT((int64_t)0x8000000000000000LL == val.getInt64());
+    TEST_ASSERT(-9223372036854775808.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "9223372036854775808";  // largest int64_t + 1
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(9223372036854775807LL == val.getInt64(status));
+    TEST_ASSERT(status == U_INVALID_FORMAT_ERROR);
+    status = U_ZERO_ERROR;
+    TEST_ASSERT(9223372036854775810.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    numStr = "-9223372036854775809";  // smallest int64_t - 1
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    // TEST_ASSERT(-9223372036854775808LL == val.getInt64(status));  // spurious compiler warnings
+    TEST_ASSERT((int64_t)0x8000000000000000LL == val.getInt64(status));
+    TEST_ASSERT(status == U_INVALID_FORMAT_ERROR);
+    status = U_ZERO_ERROR;
+    TEST_ASSERT(-9223372036854775810.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    // Test values near the limit of where doubles can represent all integers.
+    // The implementation strategy of getInt64() changes at this boundary.
+    // Strings to be parsed include a decimal fraction to force them to be
+    //   parsed as doubles rather than ints.  The fraction is discarded
+    //   from the parsed double value because it is beyond what can be represented.
+
+    status = U_ZERO_ERROR;
+    numStr = "9007199254740991.1";  // largest 53 bit int
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    // printf("getInt64() returns %lld\n", val.getInt64(status));
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(9007199254740991LL == val.getInt64(status));
+    TEST_ASSERT(9007199254740991.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    status = U_ZERO_ERROR;
+    numStr = "9007199254740992.1";  // 54 bits for the int part.
+    nf->parse(numStr, val, status);
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(9007199254740992LL == val.getInt64(status));
+    TEST_ASSERT(9007199254740992.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    status = U_ZERO_ERROR;
+    numStr = "9007199254740993.1";  // 54 bits for the int part.  Double will round
+    nf->parse(numStr, val, status); //    the ones digit, putting it up to ...994
+    TEST_CHECK_STATUS(status);
+    TEST_ASSERT(Formattable::kDouble == val.getType());
+    TEST_ASSERT(9007199254740993LL == val.getInt64(status));
+    TEST_ASSERT(9007199254740994.0 == val.getDouble(status));
+    TEST_CHECK_STATUS(status);
+
+    delete nf;
+}
+
+
 #endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/test/intltest/numrgts.h
+++ b/icu4c/source/test/intltest/numrgts.h
@ -1,6 +1,6 @@
 /***********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2009, International Business Machines Corporation
+ * Copyright (c) 1997-2011, International Business Machines Corporation
 * and others. All Rights Reserved.
 ***********************************************************************/

@ -90,6 +90,7 @@ public:
    void Test4243011(void);
    void Test4243108(void);
    void TestJ691(void);
+    void Test8199(void);

 protected:
    UBool failure(UErrorCode status, const UnicodeString& msg, UBool possibleDataError=FALSE);