ICU-13574 Adding codePointZero logic to ICU4C, added to ICU4J in r40091.

X-SVN-Rev: 40856
This commit is contained in:
Shane Carr 2018-02-08 01:51:09 +00:00
parent 0d83273be9
commit b18e037531
4 changed files with 82 additions and 2 deletions

View File

@ -38,6 +38,7 @@
#include "uresimp.h"
#include "ureslocs.h"
#include "charstr.h"
#include "uassert.h"
// *****************************************************************************
// class DecimalFormatSymbols
@ -530,6 +531,8 @@ DecimalFormatSymbols::initialize() {
fSymbols[kExponentMultiplicationSymbol] = (UChar)0xd7; // 'x' multiplication symbol for exponents
fIsCustomCurrencySymbol = FALSE;
fIsCustomIntlCurrencySymbol = FALSE;
fCodePointZero = 0x30;
U_ASSERT(fCodePointZero == fSymbols[kZeroDigitSymbol].char32At(0));
}

View File

@ -392,6 +392,13 @@ public:
inline UBool isCustomIntlCurrencySymbol() const {
return fIsCustomIntlCurrencySymbol;
}
/**
* @internal For ICU use only
*/
inline UChar32 getCodePointZero() const {
return fCodePointZero;
}
#endif /* U_HIDE_INTERNAL_API */
/**
@ -440,6 +447,22 @@ private:
*/
UnicodeString fNoSymbol;
/**
* Dealing with code points is faster than dealing with strings when formatting. Because of
* this, we maintain a value containing the zero code point that is used whenever digitStrings
* represents a sequence of ten code points in order.
*
* <p>If the value stored here is positive, it means that the code point stored in this value
* corresponds to the digitStrings array, and codePointZero can be used instead of the
* digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does
* *not* contain a sequence of code points, and it must be used directly.
*
* <p>It is assumed that codePointZero always shadows the value in digitStrings. codePointZero
* should never be set directly; rather, it should be updated only when digitStrings mutates.
* That is, the flow of information is digitStrings -> codePointZero, not the other way.
*/
UChar32 fCodePointZero;
Locale locale;
char actualLocale[ULOC_FULLNAME_CAPACITY];
@ -493,13 +516,17 @@ DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString
// If the zero digit is being set to a known zero digit according to Unicode,
// then we automatically set the corresponding 1-9 digits
if ( propogateDigits && symbol == kZeroDigitSymbol && value.countChar32() == 1 ) {
// Also record updates to fCodePointZero. Be conservative if in doubt.
if (symbol == kZeroDigitSymbol) {
UChar32 sym = value.char32At(0);
if ( u_charDigitValue(sym) == 0 ) {
if ( propogateDigits && u_charDigitValue(sym) == 0 && value.countChar32() == 1 ) {
fCodePointZero = sym;
for ( int8_t i = 1 ; i<= 9 ; i++ ) {
sym++;
fSymbols[(int)kOneDigitSymbol+i-1] = UnicodeString(sym);
}
} else {
fCodePointZero = -1;
}
}
}

View File

@ -23,6 +23,7 @@ void IntlTestDecimalFormatSymbols::runIndexedTest( int32_t index, UBool exec, co
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testSymbols);
TESTCASE_AUTO(testLastResortData);
TESTCASE_AUTO(testDigitSymbols);
TESTCASE_AUTO(testNumberingSystem);
TESTCASE_AUTO_END;
}
@ -249,6 +250,54 @@ void IntlTestDecimalFormatSymbols::testLastResortData() {
Verify(1234567.25, "#,##0.##", *lastResort, "1,234,567.25");
}
void IntlTestDecimalFormatSymbols::testDigitSymbols() {
// This test does more in ICU4J than in ICU4C right now.
// In ICU4C, it is basically just a test for codePointZero.
UChar defZero = u'0';
UChar32 osmanyaZero = U'\U000104A0';
static const UChar* osmanyaDigitStrings[] = {
u"\U000104A0", u"\U000104A1", u"\U000104A2", u"\U000104A3", u"\U000104A4",
u"\U000104A5", u"\U000104A6", u"\U000104A7", u"\U000104A8", u"\U000104A9"
};
IcuTestErrorCode status(*this, "testDigitSymbols()");
DecimalFormatSymbols symbols(Locale("en"), status);
if (defZero != symbols.getCodePointZero()) {
errln("ERROR: Code point zero be ASCII 0");
}
for (int32_t i=0; i<=9; i++) {
DecimalFormatSymbols::ENumberFormatSymbol key =
i == 0
? DecimalFormatSymbols::kZeroDigitSymbol
: static_cast<DecimalFormatSymbols::ENumberFormatSymbol>
(DecimalFormatSymbols::kOneDigitSymbol + i);
symbols.setSymbol(key, UnicodeString(osmanyaDigitStrings[i]), FALSE);
}
// NOTE: in ICU4J, the calculation of codePointZero is smarter;
// in ICU4C, it is more conservative and is only set if propogateDigits is true.
if (-1 != symbols.getCodePointZero()) {
errln("ERROR: Code point zero be invalid");
}
// Check Osmanya codePointZero
symbols.setSymbol(
DecimalFormatSymbols::kZeroDigitSymbol,
UnicodeString(osmanyaDigitStrings[0]), TRUE);
if (osmanyaZero != symbols.getCodePointZero()) {
errln("ERROR: Code point zero be Osmanya code point zero");
}
// Reset digits to Latin
symbols.setSymbol(
DecimalFormatSymbols::kZeroDigitSymbol,
UnicodeString(defZero));
if (defZero != symbols.getCodePointZero()) {
errln("ERROR: Code point zero be ASCII 0");
}
}
void IntlTestDecimalFormatSymbols::testNumberingSystem() {
IcuTestErrorCode errorCode(*this, "testNumberingSystem");
struct testcase {

View File

@ -28,6 +28,7 @@ private:
*/
void testSymbols(/*char *par*/);
void testLastResortData();
void testDigitSymbols();
void testNumberingSystem();
/** helper functions**/