diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp index 34069b78f5..b762ee8c1c 100644 --- a/icu4c/source/i18n/translit.cpp +++ b/icu4c/source/i18n/translit.cpp @@ -957,7 +957,7 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id, UErrorCode ec = U_ZERO_ERROR; TransliteratorAlias* alias = 0; Transliterator* t = 0; - + umtx_init(®istryMutex); umtx_lock(®istryMutex); if (HAVE_REGISTRY) { @@ -968,17 +968,47 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id, if (U_FAILURE(ec)) { delete t; delete alias; - return NULL; + return 0; } - if (alias != 0) { - // Instantiate an alias + // We may have not gotten a transliterator: Because we can't + // instantiate a transliterator from inside TransliteratorRegistry:: + // get() (that would deadlock), we sometimes pass back an alias. This + // contains the data we need to finish the instantiation outside the + // registry mutex. The alias may, in turn, generate another alias, so + // we handle aliases in a loop. The max times through the loop is two. + // [alan] + while (alias != 0) { U_ASSERT(t==0); - t = alias->create(pe, ec); - delete alias; + // Rule-based aliases are handled with TransliteratorAlias:: + // parse(), followed by TransliteratorRegistry::reget(). + // Other aliases are handled with TransliteratorAlias::create(). + if (alias->isRuleBased()) { + // Step 1. parse + TransliteratorParser parser; + alias->parse(parser, pe, ec); + delete alias; + alias = 0; + + // Step 2. reget + umtx_lock(®istryMutex); + if (HAVE_REGISTRY) { + t = registry->reget(id, parser, alias, pe, ec); + } + umtx_unlock(®istryMutex); + + // Step 3. Loop back around! + } else { + t = alias->create(pe, ec); + delete alias; + alias = 0; + break; + } if (U_FAILURE(ec)) { delete t; + delete alias; t = NULL; + break; } } diff --git a/icu4c/source/i18n/transreg.cpp b/icu4c/source/i18n/transreg.cpp index efc19255c9..fc0d99208d 100644 --- a/icu4c/source/i18n/transreg.cpp +++ b/icu4c/source/i18n/transreg.cpp @@ -27,6 +27,7 @@ #include "rbt_pars.h" #include "tridpars.h" #include "charstr.h" +#include "uassert.h" // Enable the following symbol to add debugging code that tracks the // allocation, deletion, and use of Entry objects. BoundsChecker has @@ -64,7 +65,8 @@ TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID) : aliasID(theAliasID), trans(0), compoundFilter(0), - idSplitPoint(-1) { + idSplitPoint(-1), + type(TransliteratorAlias::SIMPLE) { } TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, @@ -76,7 +78,19 @@ TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, aliasID(idBlock), trans(adopted), compoundFilter(cpdFilter), - idSplitPoint(theIDSplitPoint) { + idSplitPoint(theIDSplitPoint), + type(TransliteratorAlias::COMPOUND) { +} + +TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, + const UnicodeString& rules, + UTransDirection dir) : + ID(theID), + aliasID(rules), // bad name -- rename aliasID! + trans(0), + compoundFilter(0), + idSplitPoint((int32_t) dir), // bad name -- rename idSplitPoint! + type(TransliteratorAlias::RULES) { } TransliteratorAlias::~TransliteratorAlias() { @@ -86,10 +100,15 @@ TransliteratorAlias::~TransliteratorAlias() { Transliterator* TransliteratorAlias::create(UParseError& pe, UErrorCode& ec) { + if (U_FAILURE(ec)) { + return 0; + } Transliterator *t; - if (trans == 0) { + switch (type) { + case SIMPLE: t = Transliterator::createInstance(aliasID, UTRANS_FORWARD, pe, ec); - } else { + break; + case COMPOUND: t = new CompoundTransliterator(ID, aliasID, idSplitPoint, trans, ec); /* test for NULL */ @@ -99,12 +118,36 @@ Transliterator* TransliteratorAlias::create(UParseError& pe, } trans = 0; // so we don't delete it later if (compoundFilter) { + // TODO: Is this right? Are we leaking memory here? + // I'm suspicious because of the "trans = 0" line above; + // doesn't seem to fit the cloning here. Don't have time + // to track this down right now. [alan 3.0] t->adoptFilter((UnicodeSet*) compoundFilter->clone()); } + break; + case RULES: + U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE! + break; } return t; } +UBool TransliteratorAlias::isRuleBased() const { + return type == RULES; +} + +void TransliteratorAlias::parse(TransliteratorParser& parser, + UParseError& pe, UErrorCode& ec) const { + U_ASSERT(type == RULES); + if (U_FAILURE(ec)) { + return; + } + + // aliasID is really rules -- rename it! + // idSplitPoint is really UTransDirection -- rename it! + parser.parse(aliasID, (UTransDirection) idSplitPoint, pe, ec); +} + //---------------------------------------------------------------------- // class Spec //---------------------------------------------------------------------- @@ -441,11 +484,85 @@ Transliterator* TransliteratorRegistry::get(const UnicodeString& ID, TransliteratorAlias*& aliasReturn, UParseError& parseError, UErrorCode& status) { + U_ASSERT(aliasReturn == NULL); Entry *entry = find(ID); return (entry == 0) ? 0 : instantiateEntry(ID, entry, aliasReturn, parseError,status); } +Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID, + TransliteratorParser& parser, + TransliteratorAlias*& aliasReturn, + UParseError& parseError, + UErrorCode& status) { + U_ASSERT(aliasReturn == NULL); + Entry *entry = find(ID); + + if (entry == 0) { + // We get to this point if there are two threads, one of which + // is instantiating an ID, and another of which is removing + // the same ID from the registry, and the timing is just right. + return 0; + } + + // The usage model for the caller is that they will first call + // reg->get() inside the mutex, they'll get back an alias, they call + // alias->isRuleBased(), and if they get TRUE, they call alias->parse() + // outside the mutex, then reg->reget() inside the mutex again. A real + // mess, but it gets things working for ICU 3.0. [alan]. + + // Note: It's possible that in between the caller calling + // alias->parse() and reg->reget(), that another thread will have + // called reg->reget(), and the entry will already have been fixed up. + // We have to detect this so we don't stomp over existing entry + // data members and potentially leak memory (u.data and compoundFilter). + + if (entry->entryType == Entry::RULES_FORWARD || + entry->entryType == Entry::RULES_REVERSE || + entry->entryType == Entry::LOCALE_RULES) { + + entry->u.data = parser.orphanData(); + entry->stringArg = parser.idBlock; + entry->intArg = parser.idSplitPoint; + entry->compoundFilter = parser.orphanCompoundFilter(); + + // Reset entry->entryType to encapsulate the parsed data. The + // next time we instantiate this ID (including this very next + // time, at the end of this function) we won't have to parse + // again. + // NOTE: The logic here matches that in + // Transliterator::createFromRules(). + if (entry->stringArg.length() == 0) { + if (entry->u.data == 0) { + // No idBlock, no data -- this is just an + // alias for Null + entry->entryType = Entry::ALIAS; + entry->stringArg = NullTransliterator::ID; + } else { + // No idBlock, data != 0 -- this is an + // ordinary RBT_DATA + entry->entryType = Entry::RBT_DATA; + } + } else { + if (entry->u.data == 0) { + // idBlock, no data -- this is an alias. The ID has + // been munged from reverse into forward mode, if + // necessary, so instantiate the ID in the forward + // direction. + entry->entryType = Entry::ALIAS; + } else { + // idBlock and data -- this is a compound + // RBT + entry->entryType = Entry::COMPOUND_RBT; + } + } + } + + Transliterator *t = + instantiateEntry(ID, entry, aliasReturn, parseError,status); + return t; +} + void TransliteratorRegistry::put(Transliterator* adoptedProto, UBool visible) { Entry *entry = new Entry(); @@ -1030,45 +1147,62 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID TransliteratorAlias* &aliasReturn, UParseError& parseError, UErrorCode& status) { + Transliterator *t = 0; + U_ASSERT(aliasReturn == 0); - for (;;) { - if (entry->entryType == Entry::RBT_DATA) { - return new RuleBasedTransliterator(ID, entry->u.data); - } else if (entry->entryType == Entry::PROTOTYPE) { - return entry->u.prototype->clone(); - } else if (entry->entryType == Entry::ALIAS) { - aliasReturn = new TransliteratorAlias(entry->stringArg); - /* test for NULL */ - if (aliasReturn == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - } - return 0; - } else if (entry->entryType == Entry::FACTORY) { - return entry->u.factory.function(ID, entry->u.factory.context); - } else if (entry->entryType == Entry::COMPOUND_RBT) { + switch (entry->entryType) { + case Entry::RBT_DATA: + t = new RuleBasedTransliterator(ID, entry->u.data); + if (t == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return t; + case Entry::PROTOTYPE: + t = entry->u.prototype->clone(); + if (t == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return t; + case Entry::ALIAS: + aliasReturn = new TransliteratorAlias(entry->stringArg); + if (aliasReturn == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return 0; + case Entry::FACTORY: + t = entry->u.factory.function(ID, entry->u.factory.context); + if (t == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return t; + case Entry::COMPOUND_RBT: + { UnicodeString id("_", ""); Transliterator *t = new RuleBasedTransliterator(id, entry->u.data); - /* test for NULL */ if (t == 0) { status = U_MEMORY_ALLOCATION_ERROR; return 0; } aliasReturn = new TransliteratorAlias(ID, entry->stringArg, t, entry->intArg, entry->compoundFilter); - return 0; } - - TransliteratorParser parser; - - if (entry->entryType == Entry::LOCALE_RULES) { - parser.parse(entry->stringArg, (UTransDirection) entry->intArg, - parseError, status); - } else { - // At this point entry type must be either RULES_FORWARD or - // RULES_REVERSE. We process the rule data into a - // TransliteratorRuleData object, and possibly also into an - // ::id header and/or footer. Then we modify the registry with - // the parsed data and retry. - UBool isReverse = (entry->entryType == Entry::RULES_REVERSE); + if (aliasReturn == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return 0; + case Entry::LOCALE_RULES: + aliasReturn = new TransliteratorAlias(ID, entry->stringArg, + (UTransDirection) entry->intArg); + if (aliasReturn == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return 0; + case Entry::RULES_FORWARD: + case Entry::RULES_REVERSE: + // Process the rule data into a TransliteratorRuleData object, + // and possibly also into an ::id header and/or footer. Then + // we modify the registry with the parsed data and retry. + { + TransliteratorParser parser; // We use the file name, taken from another resource bundle // 2-d array at static init time, as a locale language. We're @@ -1079,64 +1213,34 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID UnicodeString rules = ures_getUnicodeStringByKey(bundle, RB_RULE, &status); ures_close(bundle); - // If the status indicates a failure, then we don't have any - // rules -- there is probably an installation error. The list - // in the root locale should correspond to all the installed - // transliterators; if it lists something that's not - // installed, we'll get an error from ResourceBundle. - - parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD, - parseError, status); - } - - if (U_FAILURE(status)) { - // We have a failure of some kind. Remove the ID from the - // registry so we don't keep trying. NOTE: This will throw off - // anyone who is, at the moment, trying to iterate over the - // available IDs. That's acceptable since we should never - // really get here except under installation, configuration, - // or unrecoverable run time memory failures. - remove(ID); - break; - } - - entry->u.data = parser.orphanData(); - entry->stringArg = parser.idBlock; - entry->intArg = parser.idSplitPoint; - entry->compoundFilter = parser.orphanCompoundFilter(); - - // Reset entry->entryType to something that we process at the - // top of the loop, then loop back to the top. As long as we - // do this, we only loop through twice at most. - // NOTE: The logic here matches that in - // Transliterator::createFromRules(). - if (entry->stringArg.length() == 0) { - if (entry->u.data == 0) { - // No idBlock, no data -- this is just an - // alias for Null - entry->entryType = Entry::ALIAS; - entry->stringArg = NullTransliterator::ID; + if (U_FAILURE(status)) { + // We have a failure of some kind. Remove the ID from the + // registry so we don't keep trying. NOTE: This will throw off + // anyone who is, at the moment, trying to iterate over the + // available IDs. That's acceptable since we should never + // really get here except under installation, configuration, + // or unrecoverable run time memory failures. + remove(ID); } else { - // No idBlock, data != 0 -- this is an - // ordinary RBT_DATA - entry->entryType = Entry::RBT_DATA; - } - } else { - if (entry->u.data == 0) { - // idBlock, no data -- this is an alias. The ID has - // been munged from reverse into forward mode, if - // necessary, so instantiate the ID in the forward - // direction. - entry->entryType = Entry::ALIAS; - } else { - // idBlock and data -- this is a compound - // RBT - entry->entryType = Entry::COMPOUND_RBT; + + // If the status indicates a failure, then we don't have any + // rules -- there is probably an installation error. The list + // in the root locale should correspond to all the installed + // transliterators; if it lists something that's not + // installed, we'll get an error from ResourceBundle. + aliasReturn = new TransliteratorAlias(ID, rules, + ((entry->entryType == Entry::RULES_REVERSE) ? + UTRANS_REVERSE : UTRANS_FORWARD)); + if (aliasReturn == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } } } + return 0; + default: + U_ASSERT(FALSE); // can't get here + return 0; } - - return 0; // failed } U_NAMESPACE_END diff --git a/icu4c/source/i18n/transreg.h b/icu4c/source/i18n/transreg.h index 387e3c7201..2bffabadc9 100644 --- a/icu4c/source/i18n/transreg.h +++ b/icu4c/source/i18n/transreg.h @@ -41,29 +41,61 @@ class UnicodeString; class TransliteratorAlias : public UMemory { public: /** - * Construct a simple alias. + * Construct a simple alias (type == SIMPLE) * @param aliasID the given id. */ TransliteratorAlias(const UnicodeString& aliasID); /** - * Construct a compound RBT alias. + * Construct a compound RBT alias (type == COMPOUND) */ TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlock, Transliterator* adopted, int32_t idSplitPoint, const UnicodeSet* compoundFilter); + /** + * Construct a rules alias (type = RULES) + */ + TransliteratorAlias(const UnicodeString& theID, + const UnicodeString& rules, + UTransDirection dir); + ~TransliteratorAlias(); /** * The whole point of create() is that the caller must invoke * it when the registry mutex is NOT held, to prevent deadlock. * It may only be called once. + * + * Note: Only call create() if isRuleBased() returns FALSE. + * + * This method must be called *outside* of the TransliteratorRegistry + * mutex. */ Transliterator* create(UParseError&, UErrorCode&); + + /** + * Return TRUE if this alias is rule-based. If so, the caller + * must call parse() on it, then call TransliteratorRegistry::reget(). + */ + UBool isRuleBased() const; + + /** + * If isRuleBased() returns TRUE, then the caller must call this + * method, followed by TransliteratorRegistry::reget(). The latter + * method must be called inside the TransliteratorRegistry mutex. + * + * Note: Only call parse() if isRuleBased() returns TRUE. + * + * This method must be called *outside* of the TransliteratorRegistry + * mutex, because it can instantiate Transliterators embedded in + * the rules via the "&Latin-Arabic()" syntax. + */ + void parse(TransliteratorParser& parser, + UParseError& pe, UErrorCode& ec) const; private: - // We actually come in two flavors: + // We actually come in three flavors: // 1. Simple alias // Here aliasID is the alias string. Everything else is // null, zero, empty. @@ -72,11 +104,15 @@ class TransliteratorAlias : public UMemory { // contained RBT, and idSplitPoint is the offet in aliasID // where the contained RBT goes. compoundFilter is the // compound filter, and it is _not_ owned. + // 3. Rules + // Here ID is the ID, aliasID is the rules string. + // idSplitPoint is the UTransDirection. UnicodeString ID; - UnicodeString aliasID; + UnicodeString aliasID; // rename! holds rules for RULES type Transliterator* trans; // owned const UnicodeSet* compoundFilter; // alias - int32_t idSplitPoint; + int32_t idSplitPoint; // rename! holds UTransDirection for RULES type + enum { SIMPLE, COMPOUND, RULES } type; TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class @@ -130,7 +166,8 @@ class TransliteratorRegistry : public UMemory { * filters or compounds, which we do not understand. Caller should * make aliasReturn NULL before calling. * @param ID the given ID - * @param aliasReturn the given TransliteratorAlias + * @param aliasReturn output param to receive TransliteratorAlias; + * should be NULL on entry * @param parseError Struct to recieve information on position * of error if an error is encountered * @param status Output param set to success/failure code. @@ -140,6 +177,27 @@ class TransliteratorRegistry : public UMemory { UParseError& parseError, UErrorCode& status); + /** + * The caller must call this after calling get(), if [a] calling get() + * returns an alias, and [b] the alias is rule based. In that + * situation the caller must call alias->parse() to do the parsing + * OUTSIDE THE REGISTRY MUTEX, then call this method to retry + * instantiating the transliterator. + * + * Note: Another alias might be returned by this method. + * + * This method (like all public methods of this class) must be called + * from within the TransliteratorRegistry mutex. + * + * @param aliasReturn output param to receive TransliteratorAlias; + * should be NULL on entry + */ + Transliterator* reget(const UnicodeString& ID, + TransliteratorParser& parser, + TransliteratorAlias*& aliasReturn, + UParseError& parseError, + UErrorCode& status); + /** * Register a prototype (adopted). This adds an entry to the * dynamic store, or replaces an existing entry. Any entry in the @@ -326,7 +384,7 @@ class TransliteratorRegistry : public UMemory { Entry* adopted, UBool visible); - void registerEntry(const UnicodeString& ID, + void registerEntry(const UnicodeString& ID, const UnicodeString& source, const UnicodeString& target, const UnicodeString& variant,