From 7a2d96c7c898085428e627f8cc61fd08bcb79fdc Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Tue, 4 Jan 2011 07:42:32 +0000 Subject: [PATCH] ICU-8199 Fix use of out-of-scope object in DigitList, and related rounding problems in Formattable::getInt64 X-SVN-Rev: 29259 --- icu4c/source/i18n/digitlst.cpp | 36 ++++--- icu4c/source/i18n/fmtable.cpp | 24 ++++- icu4c/source/test/intltest/numrgts.cpp | 137 ++++++++++++++++++++++++- icu4c/source/test/intltest/numrgts.h | 3 +- 4 files changed, 177 insertions(+), 23 deletions(-) diff --git a/icu4c/source/i18n/digitlst.cpp b/icu4c/source/i18n/digitlst.cpp index 4a146f4428..f3040d8733 100644 --- a/icu4c/source/i18n/digitlst.cpp +++ b/icu4c/source/i18n/digitlst.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1997-2010, International Business Machines +* Copyright (C) 1997-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * @@ -482,11 +482,11 @@ int32_t DigitList::getLong() /*const*/ /** - * convert this number to an int64_t. Round if there is a fractional part. + * convert this number to an int64_t. Truncate if there is a fractional part. * Return zero if the number cannot be represented. */ int64_t DigitList::getInt64() /*const*/ { - // Round if non-integer. (Truncate or round?) + // Truncate if non-integer. // Return 0 if out of range. // Range of in64_t is -9223372036854775808 to 9223372036854775807 (19 digits) // @@ -494,23 +494,27 @@ int64_t DigitList::getInt64() /*const*/ { // Overflow, absolute value too big. return 0; } - decNumber *workingNum = fDecNumber; - if (fDecNumber->exponent != 0) { - // Force to an integer, with zero exponent, rounding if necessary. - DigitList copy(*this); - DigitList zero; - uprv_decNumberQuantize(copy.fDecNumber, copy.fDecNumber, zero.fDecNumber, &fContext); - workingNum = copy.fDecNumber; - } + // The number of integer digits may differ from the number of digits stored + // in the decimal number. + // for 12.345 numIntDigits = 2, number->digits = 5 + // for 12E4 numIntDigits = 6, number->digits = 2 + // The conversion ignores the fraction digits in the first case, + // and fakes up extra zero digits in the second. + // TODO: It would be faster to store a table of powers of ten to multiply by + // instead of looping over zero digits, multiplying each time. + int32_t numIntDigits = fDecNumber->digits + fDecNumber->exponent; uint64_t value = 0; - int32_t numDigits = workingNum->digits; - for (int i = numDigits-1; i>=0 ; --i) { - int v = workingNum->lsu[i]; + for (int32_t i = 0; i < numIntDigits; i++) { + // Loop is iterating over digits starting with the most significant. + // Numbers are stored with the least significant digit at index zero. + int32_t digitIndex = fDecNumber->digits - i - 1; + int32_t v = (digitIndex >= 0) ? fDecNumber->lsu[digitIndex] : 0; value = value * (uint64_t)10 + (uint64_t)v; } - if (decNumberIsNegative(workingNum)) { + + if (decNumberIsNegative(fDecNumber)) { value = ~value; value += 1; } @@ -519,7 +523,7 @@ int64_t DigitList::getInt64() /*const*/ { // Check overflow. It's convenient that the MSD is 9 only on overflow, the amount of // overflow can't wrap too far. The test will also fail -0, but // that does no harm; the right answer is 0. - if (numDigits == 19) { + if (numIntDigits == 19) { if (( decNumberIsNegative(fDecNumber) && svalue>0) || (!decNumberIsNegative(fDecNumber) && svalue<0)) { svalue = 0; diff --git a/icu4c/source/i18n/fmtable.cpp b/icu4c/source/i18n/fmtable.cpp index 4c755bf5e9..e6568e3a7a 100644 --- a/icu4c/source/i18n/fmtable.cpp +++ b/icu4c/source/i18n/fmtable.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 1997-2010, International Business Machines Corporation and * +* Copyright (C) 1997-2011, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * @@ -17,6 +17,7 @@ #if !UCONFIG_NO_FORMATTING +#include #include "unicode/fmtable.h" #include "unicode/ustring.h" #include "unicode/measure.h" @@ -427,6 +428,12 @@ Formattable::getLong(UErrorCode& status) const } // ------------------------------------- +// Maximum int that can be represented exactly in a double. (53 bits) +// Larger ints may be rounded to a near-by value as not all are representable. +// TODO: move this constant elsewhere, possibly configure it for different +// floating point formats, if any non-standard ones are still in use. +static const int64_t U_DOUBLE_MAX_EXACT_INT = 9007199254740992; + int64_t Formattable::getInt64(UErrorCode& status) const { @@ -439,21 +446,28 @@ Formattable::getInt64(UErrorCode& status) const case Formattable::kInt64: return fValue.fInt64; case Formattable::kDouble: - if (fValue.fDouble >= U_INT64_MAX) { + if (fValue.fDouble > (double)U_INT64_MAX) { status = U_INVALID_FORMAT_ERROR; return U_INT64_MAX; - } else if (fValue.fDouble <= U_INT64_MIN) { + } else if (fValue.fDouble < (double)U_INT64_MIN) { status = U_INVALID_FORMAT_ERROR; return U_INT64_MIN; + } else if (fabs(fValue.fDouble) > U_DOUBLE_MAX_EXACT_INT && fDecimalNum != NULL) { + int64_t val = fDecimalNum->getInt64(); + if (val != 0) { + return val; + } else { + status = U_INVALID_FORMAT_ERROR; + return fValue.fDouble > 0 ? U_INT64_MAX : U_INT64_MIN; + } } else { return (int64_t)fValue.fDouble; - } + } case Formattable::kObject: if (fValue.fObject == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return 0; } - // TODO Later replace this with instanceof call if (instanceOfMeasure(fValue.fObject)) { return ((const Measure*) fValue.fObject)-> getNumber().getInt64(status); diff --git a/icu4c/source/test/intltest/numrgts.cpp b/icu4c/source/test/intltest/numrgts.cpp index ab55bc2d2f..72a2c824b7 100644 --- a/icu4c/source/test/intltest/numrgts.cpp +++ b/icu4c/source/test/intltest/numrgts.cpp @@ -1,5 +1,5 @@ /*********************************************************************** - * Copyright (c) 1997-2010, International Business Machines Corporation + * Copyright (c) 1997-2011, International Business Machines Corporation * and others. All Rights Reserved. ***********************************************************************/ @@ -168,6 +168,7 @@ NumberFormatRegressionTest::runIndexedTest( int32_t index, UBool exec, const cha CASE(58,Test4243011); CASE(59,Test4243108); CASE(60,TestJ691); + CASE(61,Test8199); default: name = ""; break; } @@ -2686,4 +2687,138 @@ void NumberFormatRegressionTest::TestJ691(void) { delete df; } +//--------------------------------------------------------------------------- +// +// Error Checking / Reporting macros +// +//--------------------------------------------------------------------------- +#define TEST_CHECK_STATUS(status) \ + if (U_FAILURE(status)) {\ + errln("File %s, Line %d. status=%s\n", __FILE__, __LINE__, u_errorName(status));\ + return;\ + } + +#define TEST_ASSERT(expr) \ + if ((expr)==FALSE) {\ + errln("File %s, line %d: Assertion Failed: " #expr "\n", __FILE__, __LINE__);\ + } + + +// Ticket 8199: Parse failure for numbers in the range of 1E10 - 1E18 + +void NumberFormatRegressionTest::Test8199(void) { + UErrorCode status = U_ZERO_ERROR; + NumberFormat *nf = NumberFormat::createInstance(Locale::getEnglish(), status); + TEST_CHECK_STATUS(status); + + // Note: Retrieving parsed values from a Formattable as a reduced-precision type + // should always truncate, no other rounding scheme. + + UnicodeString numStr = "1000000000.6"; // 9 zeroes + Formattable val; + nf->parse(numStr, val, status); + TEST_CHECK_STATUS(status); + TEST_ASSERT(Formattable::kDouble == val.getType()); + TEST_ASSERT(1000000000 == val.getInt64(status)); + TEST_CHECK_STATUS(status); + TEST_ASSERT(1000000000.6 == val.getDouble(status)); + TEST_CHECK_STATUS(status); + + numStr = "100000000000000001.1"; // approx 1E17, parses as a double rather + // than int64 because of the fraction + // even though int64 is more precise. + nf->parse(numStr, val, status); + TEST_CHECK_STATUS(status); + TEST_ASSERT(Formattable::kDouble == val.getType()); + TEST_ASSERT(100000000000000001LL == val.getInt64(status)); + TEST_CHECK_STATUS(status); + TEST_ASSERT(100000000000000000 == val.getDouble(status)); + TEST_CHECK_STATUS(status); + + numStr = "1E17"; // Parses with the internal decimal number having non-zero exponent + nf->parse(numStr, val, status); + TEST_CHECK_STATUS(status); + TEST_ASSERT(Formattable::kInt64 == val.getType()); + TEST_ASSERT(100000000000000000LL == val.getInt64()); + TEST_ASSERT(1.0E17 == val.getDouble(status)); + TEST_CHECK_STATUS(status); + + numStr = "9223372036854775807"; // largest int64_t + nf->parse(numStr, val, status); + TEST_CHECK_STATUS(status); + TEST_ASSERT(Formattable::kInt64 == val.getType()); + TEST_ASSERT(9223372036854775807LL == val.getInt64()); + // In the following check, note that a substantial range of integers will + // convert to the same double value. + TEST_ASSERT(9223372036854775810.0 == val.getDouble(status)); + TEST_CHECK_STATUS(status); + + numStr = "-9223372036854775808"; // smallest int64_t + nf->parse(numStr, val, status); + TEST_CHECK_STATUS(status); + TEST_ASSERT(Formattable::kInt64 == val.getType()); + // TEST_ASSERT(-9223372036854775808LL == val.getInt64()); // Compiler chokes on constant. + TEST_ASSERT((int64_t)0x8000000000000000LL == val.getInt64()); + TEST_ASSERT(-9223372036854775808.0 == val.getDouble(status)); + TEST_CHECK_STATUS(status); + + numStr = "9223372036854775808"; // largest int64_t + 1 + nf->parse(numStr, val, status); + TEST_CHECK_STATUS(status); + TEST_ASSERT(Formattable::kDouble == val.getType()); + TEST_ASSERT(9223372036854775807LL == val.getInt64(status)); + TEST_ASSERT(status == U_INVALID_FORMAT_ERROR); + status = U_ZERO_ERROR; + TEST_ASSERT(9223372036854775810.0 == val.getDouble(status)); + TEST_CHECK_STATUS(status); + + numStr = "-9223372036854775809"; // smallest int64_t - 1 + nf->parse(numStr, val, status); + TEST_CHECK_STATUS(status); + TEST_ASSERT(Formattable::kDouble == val.getType()); + // TEST_ASSERT(-9223372036854775808LL == val.getInt64(status)); // spurious compiler warnings + TEST_ASSERT((int64_t)0x8000000000000000LL == val.getInt64(status)); + TEST_ASSERT(status == U_INVALID_FORMAT_ERROR); + status = U_ZERO_ERROR; + TEST_ASSERT(-9223372036854775810.0 == val.getDouble(status)); + TEST_CHECK_STATUS(status); + + // Test values near the limit of where doubles can represent all integers. + // The implementation strategy of getInt64() changes at this boundary. + // Strings to be parsed include a decimal fraction to force them to be + // parsed as doubles rather than ints. The fraction is discarded + // from the parsed double value because it is beyond what can be represented. + + status = U_ZERO_ERROR; + numStr = "9007199254740991.1"; // largest 53 bit int + nf->parse(numStr, val, status); + TEST_CHECK_STATUS(status); + // printf("getInt64() returns %lld\n", val.getInt64(status)); + TEST_ASSERT(Formattable::kDouble == val.getType()); + TEST_ASSERT(9007199254740991LL == val.getInt64(status)); + TEST_ASSERT(9007199254740991.0 == val.getDouble(status)); + TEST_CHECK_STATUS(status); + + status = U_ZERO_ERROR; + numStr = "9007199254740992.1"; // 54 bits for the int part. + nf->parse(numStr, val, status); + TEST_CHECK_STATUS(status); + TEST_ASSERT(Formattable::kDouble == val.getType()); + TEST_ASSERT(9007199254740992LL == val.getInt64(status)); + TEST_ASSERT(9007199254740992.0 == val.getDouble(status)); + TEST_CHECK_STATUS(status); + + status = U_ZERO_ERROR; + numStr = "9007199254740993.1"; // 54 bits for the int part. Double will round + nf->parse(numStr, val, status); // the ones digit, putting it up to ...994 + TEST_CHECK_STATUS(status); + TEST_ASSERT(Formattable::kDouble == val.getType()); + TEST_ASSERT(9007199254740993LL == val.getInt64(status)); + TEST_ASSERT(9007199254740994.0 == val.getDouble(status)); + TEST_CHECK_STATUS(status); + + delete nf; +} + + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/numrgts.h b/icu4c/source/test/intltest/numrgts.h index d366676c1a..ec82e35500 100644 --- a/icu4c/source/test/intltest/numrgts.h +++ b/icu4c/source/test/intltest/numrgts.h @@ -1,6 +1,6 @@ /*********************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2009, International Business Machines Corporation + * Copyright (c) 1997-2011, International Business Machines Corporation * and others. All Rights Reserved. ***********************************************************************/ @@ -90,6 +90,7 @@ public: void Test4243011(void); void Test4243108(void); void TestJ691(void); + void Test8199(void); protected: UBool failure(UErrorCode status, const UnicodeString& msg, UBool possibleDataError=FALSE);