diff --git a/icu4c/source/i18n/dcfmtsym.cpp b/icu4c/source/i18n/dcfmtsym.cpp index d321a82f8a..6651d8a89a 100644 --- a/icu4c/source/i18n/dcfmtsym.cpp +++ b/icu4c/source/i18n/dcfmtsym.cpp @@ -38,6 +38,7 @@ #include "uresimp.h" #include "ureslocs.h" #include "charstr.h" +#include "uassert.h" // ***************************************************************************** // class DecimalFormatSymbols @@ -530,6 +531,8 @@ DecimalFormatSymbols::initialize() { fSymbols[kExponentMultiplicationSymbol] = (UChar)0xd7; // 'x' multiplication symbol for exponents fIsCustomCurrencySymbol = FALSE; fIsCustomIntlCurrencySymbol = FALSE; + fCodePointZero = 0x30; + U_ASSERT(fCodePointZero == fSymbols[kZeroDigitSymbol].char32At(0)); } diff --git a/icu4c/source/i18n/unicode/dcfmtsym.h b/icu4c/source/i18n/unicode/dcfmtsym.h index 0308f2c2fd..c6da623034 100644 --- a/icu4c/source/i18n/unicode/dcfmtsym.h +++ b/icu4c/source/i18n/unicode/dcfmtsym.h @@ -392,6 +392,13 @@ public: inline UBool isCustomIntlCurrencySymbol() const { return fIsCustomIntlCurrencySymbol; } + + /** + * @internal For ICU use only + */ + inline UChar32 getCodePointZero() const { + return fCodePointZero; + } #endif /* U_HIDE_INTERNAL_API */ /** @@ -440,6 +447,22 @@ private: */ UnicodeString fNoSymbol; + /** + * Dealing with code points is faster than dealing with strings when formatting. Because of + * this, we maintain a value containing the zero code point that is used whenever digitStrings + * represents a sequence of ten code points in order. + * + *
If the value stored here is positive, it means that the code point stored in this value + * corresponds to the digitStrings array, and codePointZero can be used instead of the + * digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does + * *not* contain a sequence of code points, and it must be used directly. + * + *
It is assumed that codePointZero always shadows the value in digitStrings. codePointZero
+ * should never be set directly; rather, it should be updated only when digitStrings mutates.
+ * That is, the flow of information is digitStrings -> codePointZero, not the other way.
+ */
+ UChar32 fCodePointZero;
+
Locale locale;
char actualLocale[ULOC_FULLNAME_CAPACITY];
@@ -493,13 +516,17 @@ DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString
// If the zero digit is being set to a known zero digit according to Unicode,
// then we automatically set the corresponding 1-9 digits
- if ( propogateDigits && symbol == kZeroDigitSymbol && value.countChar32() == 1 ) {
+ // Also record updates to fCodePointZero. Be conservative if in doubt.
+ if (symbol == kZeroDigitSymbol) {
UChar32 sym = value.char32At(0);
- if ( u_charDigitValue(sym) == 0 ) {
+ if ( propogateDigits && u_charDigitValue(sym) == 0 && value.countChar32() == 1 ) {
+ fCodePointZero = sym;
for ( int8_t i = 1 ; i<= 9 ; i++ ) {
sym++;
fSymbols[(int)kOneDigitSymbol+i-1] = UnicodeString(sym);
}
+ } else {
+ fCodePointZero = -1;
}
}
}
diff --git a/icu4c/source/test/intltest/tsdcfmsy.cpp b/icu4c/source/test/intltest/tsdcfmsy.cpp
index 90198e070f..0cbd784ec8 100644
--- a/icu4c/source/test/intltest/tsdcfmsy.cpp
+++ b/icu4c/source/test/intltest/tsdcfmsy.cpp
@@ -23,6 +23,7 @@ void IntlTestDecimalFormatSymbols::runIndexedTest( int32_t index, UBool exec, co
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testSymbols);
TESTCASE_AUTO(testLastResortData);
+ TESTCASE_AUTO(testDigitSymbols);
TESTCASE_AUTO(testNumberingSystem);
TESTCASE_AUTO_END;
}
@@ -249,6 +250,54 @@ void IntlTestDecimalFormatSymbols::testLastResortData() {
Verify(1234567.25, "#,##0.##", *lastResort, "1,234,567.25");
}
+void IntlTestDecimalFormatSymbols::testDigitSymbols() {
+ // This test does more in ICU4J than in ICU4C right now.
+ // In ICU4C, it is basically just a test for codePointZero.
+ UChar defZero = u'0';
+ UChar32 osmanyaZero = U'\U000104A0';
+ static const UChar* osmanyaDigitStrings[] = {
+ u"\U000104A0", u"\U000104A1", u"\U000104A2", u"\U000104A3", u"\U000104A4",
+ u"\U000104A5", u"\U000104A6", u"\U000104A7", u"\U000104A8", u"\U000104A9"
+ };
+
+ IcuTestErrorCode status(*this, "testDigitSymbols()");
+ DecimalFormatSymbols symbols(Locale("en"), status);
+
+ if (defZero != symbols.getCodePointZero()) {
+ errln("ERROR: Code point zero be ASCII 0");
+ }
+
+ for (int32_t i=0; i<=9; i++) {
+ DecimalFormatSymbols::ENumberFormatSymbol key =
+ i == 0
+ ? DecimalFormatSymbols::kZeroDigitSymbol
+ : static_cast