diff --git a/icu4c/source/i18n/dcfmtsym.cpp b/icu4c/source/i18n/dcfmtsym.cpp index d321a82f8a..6651d8a89a 100644 --- a/icu4c/source/i18n/dcfmtsym.cpp +++ b/icu4c/source/i18n/dcfmtsym.cpp @@ -38,6 +38,7 @@ #include "uresimp.h" #include "ureslocs.h" #include "charstr.h" +#include "uassert.h" // ***************************************************************************** // class DecimalFormatSymbols @@ -530,6 +531,8 @@ DecimalFormatSymbols::initialize() { fSymbols[kExponentMultiplicationSymbol] = (UChar)0xd7; // 'x' multiplication symbol for exponents fIsCustomCurrencySymbol = FALSE; fIsCustomIntlCurrencySymbol = FALSE; + fCodePointZero = 0x30; + U_ASSERT(fCodePointZero == fSymbols[kZeroDigitSymbol].char32At(0)); } diff --git a/icu4c/source/i18n/unicode/dcfmtsym.h b/icu4c/source/i18n/unicode/dcfmtsym.h index 0308f2c2fd..c6da623034 100644 --- a/icu4c/source/i18n/unicode/dcfmtsym.h +++ b/icu4c/source/i18n/unicode/dcfmtsym.h @@ -392,6 +392,13 @@ public: inline UBool isCustomIntlCurrencySymbol() const { return fIsCustomIntlCurrencySymbol; } + + /** + * @internal For ICU use only + */ + inline UChar32 getCodePointZero() const { + return fCodePointZero; + } #endif /* U_HIDE_INTERNAL_API */ /** @@ -440,6 +447,22 @@ private: */ UnicodeString fNoSymbol; + /** + * Dealing with code points is faster than dealing with strings when formatting. Because of + * this, we maintain a value containing the zero code point that is used whenever digitStrings + * represents a sequence of ten code points in order. + * + *

If the value stored here is positive, it means that the code point stored in this value + * corresponds to the digitStrings array, and codePointZero can be used instead of the + * digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does + * *not* contain a sequence of code points, and it must be used directly. + * + *

It is assumed that codePointZero always shadows the value in digitStrings. codePointZero + * should never be set directly; rather, it should be updated only when digitStrings mutates. + * That is, the flow of information is digitStrings -> codePointZero, not the other way. + */ + UChar32 fCodePointZero; + Locale locale; char actualLocale[ULOC_FULLNAME_CAPACITY]; @@ -493,13 +516,17 @@ DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString // If the zero digit is being set to a known zero digit according to Unicode, // then we automatically set the corresponding 1-9 digits - if ( propogateDigits && symbol == kZeroDigitSymbol && value.countChar32() == 1 ) { + // Also record updates to fCodePointZero. Be conservative if in doubt. + if (symbol == kZeroDigitSymbol) { UChar32 sym = value.char32At(0); - if ( u_charDigitValue(sym) == 0 ) { + if ( propogateDigits && u_charDigitValue(sym) == 0 && value.countChar32() == 1 ) { + fCodePointZero = sym; for ( int8_t i = 1 ; i<= 9 ; i++ ) { sym++; fSymbols[(int)kOneDigitSymbol+i-1] = UnicodeString(sym); } + } else { + fCodePointZero = -1; } } } diff --git a/icu4c/source/test/intltest/tsdcfmsy.cpp b/icu4c/source/test/intltest/tsdcfmsy.cpp index 90198e070f..0cbd784ec8 100644 --- a/icu4c/source/test/intltest/tsdcfmsy.cpp +++ b/icu4c/source/test/intltest/tsdcfmsy.cpp @@ -23,6 +23,7 @@ void IntlTestDecimalFormatSymbols::runIndexedTest( int32_t index, UBool exec, co TESTCASE_AUTO_BEGIN; TESTCASE_AUTO(testSymbols); TESTCASE_AUTO(testLastResortData); + TESTCASE_AUTO(testDigitSymbols); TESTCASE_AUTO(testNumberingSystem); TESTCASE_AUTO_END; } @@ -249,6 +250,54 @@ void IntlTestDecimalFormatSymbols::testLastResortData() { Verify(1234567.25, "#,##0.##", *lastResort, "1,234,567.25"); } +void IntlTestDecimalFormatSymbols::testDigitSymbols() { + // This test does more in ICU4J than in ICU4C right now. + // In ICU4C, it is basically just a test for codePointZero. + UChar defZero = u'0'; + UChar32 osmanyaZero = U'\U000104A0'; + static const UChar* osmanyaDigitStrings[] = { + u"\U000104A0", u"\U000104A1", u"\U000104A2", u"\U000104A3", u"\U000104A4", + u"\U000104A5", u"\U000104A6", u"\U000104A7", u"\U000104A8", u"\U000104A9" + }; + + IcuTestErrorCode status(*this, "testDigitSymbols()"); + DecimalFormatSymbols symbols(Locale("en"), status); + + if (defZero != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be ASCII 0"); + } + + for (int32_t i=0; i<=9; i++) { + DecimalFormatSymbols::ENumberFormatSymbol key = + i == 0 + ? DecimalFormatSymbols::kZeroDigitSymbol + : static_cast + (DecimalFormatSymbols::kOneDigitSymbol + i); + symbols.setSymbol(key, UnicodeString(osmanyaDigitStrings[i]), FALSE); + } + // NOTE: in ICU4J, the calculation of codePointZero is smarter; + // in ICU4C, it is more conservative and is only set if propogateDigits is true. + if (-1 != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be invalid"); + } + + // Check Osmanya codePointZero + symbols.setSymbol( + DecimalFormatSymbols::kZeroDigitSymbol, + UnicodeString(osmanyaDigitStrings[0]), TRUE); + if (osmanyaZero != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be Osmanya code point zero"); + } + + // Reset digits to Latin + symbols.setSymbol( + DecimalFormatSymbols::kZeroDigitSymbol, + UnicodeString(defZero)); + if (defZero != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be ASCII 0"); + } +} + void IntlTestDecimalFormatSymbols::testNumberingSystem() { IcuTestErrorCode errorCode(*this, "testNumberingSystem"); struct testcase { diff --git a/icu4c/source/test/intltest/tsdcfmsy.h b/icu4c/source/test/intltest/tsdcfmsy.h index 1fd1dfdfba..1922941b84 100644 --- a/icu4c/source/test/intltest/tsdcfmsy.h +++ b/icu4c/source/test/intltest/tsdcfmsy.h @@ -28,6 +28,7 @@ private: */ void testSymbols(/*char *par*/); void testLastResortData(); + void testDigitSymbols(); void testNumberingSystem(); /** helper functions**/