diff --git a/icu4c/source/i18n/plurrule.cpp b/icu4c/source/i18n/plurrule.cpp index 94be778bb4..8f6cd9115e 100644 --- a/icu4c/source/i18n/plurrule.cpp +++ b/icu4c/source/i18n/plurrule.cpp @@ -15,6 +15,7 @@ #include "unicode/uniset.h" #include "unicode/utypes.h" +#include "unicode/ures.h" #include "unicode/plurrule.h" #include "cmemory.h" #include "cstring.h" @@ -26,88 +27,18 @@ #include "ustrfmt.h" #include "locutil.h" +/* +// TODO(claireho): remove stdio +#include "stdio.h" +*/ + #if !UCONFIG_NO_FORMATTING U_NAMESPACE_BEGIN -// gPluralRuleLocaleHash is a global hash table that maps locale name to -// the pointer of PluralRule. gPluralRuleLocaleHash is built only once and -// resides in the memory until end of application. We will remove the -// gPluralRuleLocaleHash table when we move plural rules data to resource -// bundle in ICU4.0 release. If Valgrind reports the memory is still -// reachable, please ignore it. -static Hashtable *gPluralRuleLocaleHash=NULL; #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0]) -// TODO: Plural rule data - will move to ResourceBundle. -#define NUMBER_PLURAL_RULES 13 -static const UChar uCharPluralRules[NUMBER_PLURAL_RULES][128] = { - // other: n/ja,ko,tr,v - {LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,SLASH,LOW_J,LOW_A,COMMA,LOW_K,LOW_O,COMMA,LOW_T, - LOW_R,COMMA,LOW_V,LOW_I, 0}, - // one: n is 1/da,de,el,en,eo,es,et,fi,fo,he,hu,it,nb,nl,nn,no,pt,sv - {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SLASH,LOW_D,LOW_A,COMMA,LOW_D, - LOW_E,COMMA,LOW_E,LOW_L,COMMA,LOW_E,LOW_N,COMMA,LOW_E,LOW_O,COMMA,LOW_E,LOW_S,COMMA,LOW_E,LOW_T, - COMMA,LOW_F,LOW_I,COMMA,LOW_F,LOW_O,COMMA,LOW_H,LOW_E,COMMA,LOW_H,LOW_U,COMMA,LOW_I,LOW_T,COMMA, - LOW_N,LOW_B,COMMA,LOW_N,LOW_L,COMMA,LOW_N,LOW_N,COMMA,LOW_N,LOW_O,COMMA,LOW_P,LOW_T,COMMA,LOW_S, - LOW_V, 0}, - // one: n in 0..1/fr,pt_BR - {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_N,SPACE,U_ZERO,DOT,DOT,U_ONE,SLASH,LOW_F, - LOW_R,COMMA,LOW_P,LOW_T,LOWLINE,CAP_B,CAP_R, 0}, - // zero: n is 0; one: n mod 10 is 1 and n mod 100 is not 11/lv - {LOW_Z,LOW_E,LOW_R,LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ZERO,SEMI_COLON,SPACE,LOW_O, - LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,LOW_S,SPACE, - U_ONE,SPACE,LOW_A,LOW_N,LOW_D,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO,SPACE, - LOW_I,LOW_S,SPACE,LOW_N,LOW_O,LOW_T,SPACE,U_ONE,U_ONE,SLASH,LOW_L,LOW_V, 0}, - // one: n is 1; two: n is 2/ga - {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_T,LOW_W, - LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_TWO,SLASH,LOW_G,LOW_A, 0}, - // zero: n is 0; one: n is 1; zero: n mod 100 in 1..19/ro - {LOW_Z,LOW_E,LOW_R,LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ZERO,SEMI_COLON,SPACE,LOW_O, - LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_Z,LOW_E,LOW_R, - LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO,SPACE,LOW_I,LOW_N,SPACE, - U_ONE,DOT,DOT,U_ONE,U_NINE,SLASH,LOW_R,LOW_O, 0}, - // other: n mod 100 in 11..19; one: n mod 10 is 1; few: n mod 10 in 2..9/lt - {LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO, - SPACE,LOW_I,LOW_N,SPACE,U_ONE,U_ONE,DOT,DOT,U_ONE,U_NINE,SEMI_COLON,SPACE,LOW_O,LOW_N,LOW_E,COLON, - SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON, - SPACE,LOW_F,LOW_E,LOW_W,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I, - LOW_N,SPACE,U_TWO,DOT,DOT,U_NINE,SLASH,LOW_L,LOW_T, 0}, - // one: n mod 10 is 1 and n mod 100 is not 11; few: n mod 10 in 2..4 - // and n mod 100 not in 12..14/hr,ru,sr,uk - {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,LOW_S, - SPACE,U_ONE,SPACE,LOW_A,LOW_N,LOW_D,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO, - SPACE,LOW_I,LOW_S,SPACE,LOW_N,LOW_O,LOW_T,SPACE,U_ONE,U_ONE,SEMI_COLON,SPACE,LOW_F,LOW_E,LOW_W, - COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,LOW_N,SPACE,U_TWO,DOT, - DOT,U_FOUR,SPACE,LOW_A,LOW_N,LOW_D,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE, - U_ZERO,U_ZERO,SPACE,LOW_N,LOW_O,LOW_T,SPACE,LOW_I,LOW_N,SPACE,U_ONE,U_TWO,DOT,DOT,U_ONE,U_FOUR, - SLASH,LOW_H,LOW_R,COMMA,LOW_R,LOW_U,COMMA,LOW_S,LOW_R,COMMA,LOW_U,LOW_K, 0}, - // one: n is 1; few: n in 2..4/cs,sk - {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_F,LOW_E, - LOW_W,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_N,SPACE,U_TWO,DOT,DOT,U_FOUR,SLASH,LOW_C,LOW_S,COMMA, - LOW_S,LOW_K, 0}, - // one: n is 1; few: n mod 10 in 2..4 and n mod 100 not in 12..14/pl - {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_F,LOW_E, - LOW_W,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,SPACE,LOW_I,LOW_N,SPACE,U_TWO, - DOT,DOT,U_FOUR,SPACE,LOW_A,LOW_N,LOW_D,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO, - U_ZERO,SPACE,LOW_N,LOW_O,LOW_T,SPACE,LOW_I,LOW_N,SPACE,U_ONE,U_TWO,DOT,DOT,U_ONE,U_FOUR,SLASH, - LOW_P,LOW_L, 0}, - // one: n mod 100 is 1; two: n mod 100 is 2; few: n mod 100 in 3..4/sl - {LOW_O,LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO,SPACE,LOW_I, - LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_T,LOW_W,LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D, - SPACE,U_ONE,U_ZERO,U_ZERO,SPACE,LOW_I,LOW_S,SPACE,U_TWO,SEMI_COLON,SPACE,LOW_F,LOW_E,LOW_W,COLON, - SPACE,LOW_N,SPACE,LOW_M,LOW_O,LOW_D,SPACE,U_ONE,U_ZERO,U_ZERO,SPACE,LOW_I,LOW_N,SPACE,U_THREE,DOT, - DOT,U_FOUR,SLASH,LOW_S,LOW_L, 0}, - // zero: n is 0; one: n is 1; two: n is 2; few: n is 3..10; many: n in 11..99/ar - {LOW_Z,LOW_E,LOW_R,LOW_O,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ZERO,SEMI_COLON,SPACE,LOW_O, - LOW_N,LOW_E,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_ONE,SEMI_COLON,SPACE,LOW_T,LOW_W,LOW_O, - COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_TWO,SEMI_COLON,SPACE,LOW_F,LOW_E,LOW_W,COLON,SPACE, - LOW_N,SPACE,LOW_I,LOW_S,SPACE,U_THREE,DOT,DOT,U_ONE,U_ZERO,SEMI_COLON,SPACE,LOW_M,LOW_A,LOW_N, - LOW_Y,COLON,SPACE,LOW_N,SPACE,LOW_I,LOW_N,SPACE,U_ONE,U_ONE,DOT,DOT,U_NINE,U_NINE,SLASH,LOW_A, - LOW_R, 0}, -}; - static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0}; static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0}; static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0}; @@ -122,25 +53,20 @@ static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0}; static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0}; static const UChar PK_OR[]={LOW_O,LOW_R,0}; static const UChar PK_VAR_N[]={LOW_N,0}; +static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules) UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) PluralRules::PluralRules(UErrorCode& status) : - fLocaleStringsHash(NULL), mRules(NULL), mParser(new RuleParser()) { - initHashtable(status); - if (U_SUCCESS(status)) { - getRuleData(status); - } } PluralRules::PluralRules(const PluralRules& other) : UObject(other), - fLocaleStringsHash(NULL), mRules(NULL), mParser(new RuleParser()) { @@ -160,7 +86,6 @@ PluralRules::clone() const { PluralRules& PluralRules::operator=(const PluralRules& other) { if (this != &other) { - fLocaleStringsHash=other.fLocaleStringsHash; delete mRules; if (other.mRules==NULL) { mRules = NULL; @@ -183,7 +108,7 @@ PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { if ( (newRules != NULL)&& U_SUCCESS(status) ) { newRules->parseDescription((UnicodeString &)description, rules, status); if (U_SUCCESS(status)) { - newRules->addRules(rules, status); + newRules->addRules(rules); } } if (U_FAILURE(status)) { @@ -202,42 +127,28 @@ PluralRules::createDefaultRules(UErrorCode& status) { PluralRules* U_EXPORT2 PluralRules::forLocale(const Locale& locale, UErrorCode& status) { - RuleChain *locRules; - - PluralRules *newRules = new PluralRules(status); - if (U_FAILURE(status)) { - delete newRules; + RuleChain rChain; + status = U_ZERO_ERROR; + PluralRules *newObj = new PluralRules(status); + if (newObj==NULL) { return NULL; } - UnicodeString localeName; - LocaleUtility::initNameFromLocale(locale, localeName); - { - Mutex lock; - locRules = (RuleChain *) (newRules->fLocaleStringsHash->get(localeName)); - } - if (locRules == NULL) { - // Check parent locales. - char parentLocaleName[ULOC_FULLNAME_CAPACITY]; - const char *curLocaleName=locale.getName(); - int32_t localeNameLen=0; - uprv_strcpy(parentLocaleName, curLocaleName); - while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName, - ULOC_FULLNAME_CAPACITY, &status)) > 0) { - localeName = UnicodeString(parentLocaleName, -1, US_INV); - Mutex lock; - locRules = (RuleChain *) (newRules->fLocaleStringsHash->get(localeName)); - if (locRules != NULL) { - break; - } + UnicodeString locRule = newObj->getRuleFromResource(locale, status); + if ((locRule.length() != 0) && U_SUCCESS(status)) { + newObj->parseDescription(locRule, rChain, status); + if (U_SUCCESS(status)) { + newObj->addRules(rChain); } } - if (locRules==NULL) { - delete newRules; // Remove newRules to avoid memory leak since it is not needed anymore. - return createRules(PLURAL_DEFAULT_RULE, status); + if (U_FAILURE(status)||(locRule.length() == 0)) { + // use default plural rule + status = U_ZERO_ERROR; + UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE); + newObj->parseDescription(defRule, rChain, status); + newObj->addRules(rChain); } - - newRules->addRules(*locRules, status); - return newRules; + + return newObj; } UnicodeString @@ -328,34 +239,6 @@ PluralRules::operator==(const PluralRules& other) const { return TRUE; } -void -PluralRules::getRuleData(UErrorCode& status) { - UnicodeString ruleData; - UnicodeString localeData; - UnicodeString localeName; - int32_t i; - UChar cSlash = (UChar)0x002F; - - i=0; - while ( iadd(); break; case tOr: - orNode=rules.ruleHeader; + lastChain = &rules; + while (lastChain->next !=NULL) { + lastChain = lastChain->next; + } + orNode=lastChain->ruleHeader; while (orNode->next != NULL) { orNode = orNode->next; } @@ -482,51 +370,104 @@ PluralRules::getRepeatLimit() const { } } -void -PluralRules::initHashtable(UErrorCode& status) { - if (fLocaleStringsHash!=NULL) { - return; - } - { - Mutex lock; - if (gPluralRuleLocaleHash == NULL) { - // This static PluralRule hashtable residents in memory until end of application. - if ((gPluralRuleLocaleHash = new Hashtable(TRUE, status))!=NULL) { - fLocaleStringsHash = gPluralRuleLocaleHash; - return; - } - } - else { - fLocaleStringsHash = gPluralRuleLocaleHash; - } - } -} void -PluralRules::addRules(RuleChain& rules, UErrorCode& status) { - addRules(mLocaleName, rules, FALSE, status); -} - -void -PluralRules::addRules(const UnicodeString& localeName, RuleChain& rules, UBool addToHash, UErrorCode& status) { +PluralRules::addRules(RuleChain& rules) { RuleChain *newRule = new RuleChain(rules); - if ( addToHash ) - { - { - Mutex lock; - if ( (RuleChain *)fLocaleStringsHash->get(localeName) == NULL ) { - fLocaleStringsHash->put(localeName, newRule, status); - } - else { - delete newRule; - return; + this->mRules=newRule; + newRule->setRepeatLimit(); +} + +UnicodeString +PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) { + UnicodeString emptyStr; + + errCode = U_ZERO_ERROR; + UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode); + if(U_FAILURE(errCode)) { + /* total failure, not even root could be opened */ + return emptyStr; + } + UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode); + if(U_FAILURE(errCode)) { + ures_close(rb); + return emptyStr; + } + int32_t resLen=0; + const char *curLocaleName=locale.getName(); + const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode); + + if (s == NULL) { + // Check parent locales. + UErrorCode status = U_ZERO_ERROR; + char parentLocaleName[ULOC_FULLNAME_CAPACITY]; + const char *curLocaleName=locale.getName(); + int32_t localeNameLen=0; + uprv_strcpy(parentLocaleName, curLocaleName); + + while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName, + ULOC_FULLNAME_CAPACITY, &status)) > 0) { + resLen=0; + s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status); + if (s != NULL) { + errCode = U_ZERO_ERROR; + break; } + status = U_ZERO_ERROR; } } - else { - this->mRules=newRule; + if (s==NULL) { + ures_close(locRes); + ures_close(rb); + return emptyStr; } - newRule->setRepeatLimit(); + + char setKey[256]; + UChar result[256]; + u_UCharsToChars(s, setKey, resLen + 1); + // printf("\n PluralRule: %s\n", setKey); + + + UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode); + if(U_FAILURE(errCode)) { + ures_close(locRes); + ures_close(rb); + return emptyStr; + } + resLen=0; + UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode); + if (U_FAILURE(errCode)) { + ures_close(ruleRes); + ures_close(locRes); + ures_close(rb); + return emptyStr; + } + + int32_t numberKeys = ures_getSize(setRes); + char *key=NULL; + int32_t len=0; + for(int32_t i=0; i