ICU-3786 rewrite registry and instantiation code to do parsing outside the registry mutex to avoid deadlock for rules of the form &Latin-Arabic(...)

X-SVN-Rev: 15703
2004-06-04 00:52:39 +00:00 · 2004-06-04 00:52:39 +00:00 · 164faf6a3d
commit 164faf6a3d
parent 9e97b3f70e
3 changed files with 293 additions and 101 deletions
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -957,7 +957,7 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id,
    UErrorCode ec = U_ZERO_ERROR;
    TransliteratorAlias* alias = 0;
    Transliterator* t = 0;
-    
+
    umtx_init(&registryMutex);
    umtx_lock(&registryMutex);
    if (HAVE_REGISTRY) {
@ -968,17 +968,47 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id,
    if (U_FAILURE(ec)) {
        delete t;
        delete alias;
-        return NULL;
+        return 0;
    }

-    if (alias != 0) {
-        // Instantiate an alias
+    // We may have not gotten a transliterator:  Because we can't
+    // instantiate a transliterator from inside TransliteratorRegistry::
+    // get() (that would deadlock), we sometimes pass back an alias.  This
+    // contains the data we need to finish the instantiation outside the
+    // registry mutex.  The alias may, in turn, generate another alias, so
+    // we handle aliases in a loop.  The max times through the loop is two.
+    // [alan]
+    while (alias != 0) {
        U_ASSERT(t==0);
-        t = alias->create(pe, ec);
-        delete alias;
+        // Rule-based aliases are handled with TransliteratorAlias::
+        // parse(), followed by TransliteratorRegistry::reget().
+        // Other aliases are handled with TransliteratorAlias::create().
+        if (alias->isRuleBased()) {
+            // Step 1. parse
+            TransliteratorParser parser;
+            alias->parse(parser, pe, ec);
+            delete alias;
+            alias = 0;
+
+            // Step 2. reget
+            umtx_lock(&registryMutex);
+            if (HAVE_REGISTRY) {
+                t = registry->reget(id, parser, alias, pe, ec);
+            }
+            umtx_unlock(&registryMutex);
+
+            // Step 3. Loop back around!
+        } else {
+            t = alias->create(pe, ec);
+            delete alias;
+            alias = 0;
+            break;
+        }
        if (U_FAILURE(ec)) {
            delete t;
+            delete alias;
            t = NULL;
+            break;
        }
    }

--- a/icu4c/source/i18n/transreg.cpp
+++ b/icu4c/source/i18n/transreg.cpp
@ -27,6 +27,7 @@
 #include "rbt_pars.h"
 #include "tridpars.h"
 #include "charstr.h"
+#include "uassert.h"

 // Enable the following symbol to add debugging code that tracks the
 // allocation, deletion, and use of Entry objects.  BoundsChecker has
@ -64,7 +65,8 @@ TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID) :
    aliasID(theAliasID),
    trans(0),
    compoundFilter(0),
-    idSplitPoint(-1) {
+    idSplitPoint(-1),
+    type(TransliteratorAlias::SIMPLE) {
 }

 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
@ -76,7 +78,19 @@ TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
    aliasID(idBlock),
    trans(adopted),
    compoundFilter(cpdFilter),
-    idSplitPoint(theIDSplitPoint) {
+    idSplitPoint(theIDSplitPoint),
+    type(TransliteratorAlias::COMPOUND) {
+}
+
+TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
+                                         const UnicodeString& rules,
+                                         UTransDirection dir) :
+    ID(theID),
+    aliasID(rules), // bad name -- rename aliasID!
+    trans(0),
+    compoundFilter(0),
+    idSplitPoint((int32_t) dir), // bad name -- rename idSplitPoint!
+    type(TransliteratorAlias::RULES) {
 }

 TransliteratorAlias::~TransliteratorAlias() {
@ -86,10 +100,15 @@ TransliteratorAlias::~TransliteratorAlias() {

 Transliterator* TransliteratorAlias::create(UParseError& pe,
                                            UErrorCode& ec) {
+    if (U_FAILURE(ec)) {
+        return 0;
+    }
    Transliterator *t;
-    if (trans == 0) {
+    switch (type) {
+    case SIMPLE:
        t = Transliterator::createInstance(aliasID, UTRANS_FORWARD, pe, ec);
-    } else {
+        break;
+    case COMPOUND:
        t = new CompoundTransliterator(ID, aliasID, idSplitPoint,
                                       trans, ec);
        /* test for NULL */
@ -99,12 +118,36 @@ Transliterator* TransliteratorAlias::create(UParseError& pe,
        }
        trans = 0; // so we don't delete it later
        if (compoundFilter) {
+            // TODO: Is this right? Are we leaking memory here?
+            // I'm suspicious because of the "trans = 0" line above;
+            // doesn't seem to fit the cloning here.  Don't have time
+            // to track this down right now. [alan 3.0]
            t->adoptFilter((UnicodeSet*) compoundFilter->clone());
        }
+        break;
+    case RULES:
+        U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE!
+        break;
    }
    return t;
 }

+UBool TransliteratorAlias::isRuleBased() const {
+    return type == RULES;
+}
+
+void TransliteratorAlias::parse(TransliteratorParser& parser,
+                                UParseError& pe, UErrorCode& ec) const {
+    U_ASSERT(type == RULES);
+    if (U_FAILURE(ec)) {
+        return;
+    }
+
+    // aliasID is really rules -- rename it!
+    // idSplitPoint is really UTransDirection -- rename it!
+    parser.parse(aliasID, (UTransDirection) idSplitPoint, pe, ec);
+}
+
 //----------------------------------------------------------------------
 // class Spec
 //----------------------------------------------------------------------
@ -441,11 +484,85 @@ Transliterator* TransliteratorRegistry::get(const UnicodeString& ID,
                                            TransliteratorAlias*& aliasReturn,
                                            UParseError& parseError,
                                            UErrorCode& status) {
+    U_ASSERT(aliasReturn == NULL);
    Entry *entry = find(ID);
    return (entry == 0) ? 0
        : instantiateEntry(ID, entry, aliasReturn, parseError,status);
 }

+Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
+                                              TransliteratorParser& parser,
+                                              TransliteratorAlias*& aliasReturn,
+                                              UParseError& parseError,
+                                              UErrorCode& status) {
+    U_ASSERT(aliasReturn == NULL);
+    Entry *entry = find(ID);
+
+    if (entry == 0) {
+        // We get to this point if there are two threads, one of which
+        // is instantiating an ID, and another of which is removing
+        // the same ID from the registry, and the timing is just right.
+        return 0;
+    }
+
+    // The usage model for the caller is that they will first call
+    // reg->get() inside the mutex, they'll get back an alias, they call
+    // alias->isRuleBased(), and if they get TRUE, they call alias->parse()
+    // outside the mutex, then reg->reget() inside the mutex again.  A real
+    // mess, but it gets things working for ICU 3.0. [alan].
+
+    // Note: It's possible that in between the caller calling
+    // alias->parse() and reg->reget(), that another thread will have
+    // called reg->reget(), and the entry will already have been fixed up.
+    // We have to detect this so we don't stomp over existing entry
+    // data members and potentially leak memory (u.data and compoundFilter).
+
+    if (entry->entryType == Entry::RULES_FORWARD ||
+        entry->entryType == Entry::RULES_REVERSE ||
+        entry->entryType == Entry::LOCALE_RULES) {
+        
+        entry->u.data = parser.orphanData();
+        entry->stringArg = parser.idBlock;
+        entry->intArg = parser.idSplitPoint;
+        entry->compoundFilter = parser.orphanCompoundFilter();
+
+        // Reset entry->entryType to encapsulate the parsed data.  The
+        // next time we instantiate this ID (including this very next
+        // time, at the end of this function) we won't have to parse
+        // again.
+        // NOTE: The logic here matches that in
+        // Transliterator::createFromRules().
+        if (entry->stringArg.length() == 0) {
+            if (entry->u.data == 0) {
+                // No idBlock, no data -- this is just an
+                // alias for Null
+                entry->entryType = Entry::ALIAS;
+                entry->stringArg = NullTransliterator::ID;
+            } else {
+                // No idBlock, data != 0 -- this is an
+                // ordinary RBT_DATA
+                entry->entryType = Entry::RBT_DATA;
+            }
+        } else {
+            if (entry->u.data == 0) {
+                // idBlock, no data -- this is an alias.  The ID has
+                // been munged from reverse into forward mode, if
+                // necessary, so instantiate the ID in the forward
+                // direction.
+                entry->entryType = Entry::ALIAS;
+            } else {
+                // idBlock and data -- this is a compound
+                // RBT
+                entry->entryType = Entry::COMPOUND_RBT;
+            }
+        }
+    }
+
+    Transliterator *t =
+        instantiateEntry(ID, entry, aliasReturn, parseError,status);
+    return t;
+}
+
 void TransliteratorRegistry::put(Transliterator* adoptedProto,
                                 UBool visible) {
    Entry *entry = new Entry();
@ -1030,45 +1147,62 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
                                                         TransliteratorAlias* &aliasReturn,
                                                         UParseError& parseError,
                                                         UErrorCode& status) {
+    Transliterator *t = 0;
+    U_ASSERT(aliasReturn == 0);

-    for (;;) {
-        if (entry->entryType == Entry::RBT_DATA) {
-            return new RuleBasedTransliterator(ID, entry->u.data);
-        } else if (entry->entryType == Entry::PROTOTYPE) {
-            return entry->u.prototype->clone();
-        } else if (entry->entryType == Entry::ALIAS) {
-            aliasReturn = new TransliteratorAlias(entry->stringArg);
-            /* test for NULL */
-            if (aliasReturn == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-            }
-            return 0;
-        } else if (entry->entryType == Entry::FACTORY) {
-            return entry->u.factory.function(ID, entry->u.factory.context);
-        } else if (entry->entryType == Entry::COMPOUND_RBT) {
+    switch (entry->entryType) {
+    case Entry::RBT_DATA:
+        t = new RuleBasedTransliterator(ID, entry->u.data);
+        if (t == 0) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return t;
+    case Entry::PROTOTYPE:
+        t = entry->u.prototype->clone();
+        if (t == 0) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return t;
+    case Entry::ALIAS:
+        aliasReturn = new TransliteratorAlias(entry->stringArg);
+        if (aliasReturn == 0) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return 0;
+    case Entry::FACTORY:
+        t = entry->u.factory.function(ID, entry->u.factory.context);
+        if (t == 0) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return t;
+    case Entry::COMPOUND_RBT:
+        {
            UnicodeString id("_", "");
            Transliterator *t = new RuleBasedTransliterator(id, entry->u.data);
-            /* test for NULL */
            if (t == 0) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return 0;
            }
            aliasReturn = new TransliteratorAlias(ID, entry->stringArg, t, entry->intArg, entry->compoundFilter);
-            return 0;
        }
-
-        TransliteratorParser parser;
-
-        if (entry->entryType == Entry::LOCALE_RULES) {
-            parser.parse(entry->stringArg, (UTransDirection) entry->intArg,
-                         parseError, status);
-        } else {
-            // At this point entry type must be either RULES_FORWARD or
-            // RULES_REVERSE.  We process the rule data into a
-            // TransliteratorRuleData object, and possibly also into an
-            // ::id header and/or footer.  Then we modify the registry with
-            // the parsed data and retry.
-            UBool isReverse = (entry->entryType == Entry::RULES_REVERSE);
+        if (aliasReturn == 0) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return 0;
+    case Entry::LOCALE_RULES:
+        aliasReturn = new TransliteratorAlias(ID, entry->stringArg,
+                                              (UTransDirection) entry->intArg);
+        if (aliasReturn == 0) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return 0;
+    case Entry::RULES_FORWARD:
+    case Entry::RULES_REVERSE:
+        // Process the rule data into a TransliteratorRuleData object,
+        // and possibly also into an ::id header and/or footer.  Then
+        // we modify the registry with the parsed data and retry.
+        {
+            TransliteratorParser parser;
            
            // We use the file name, taken from another resource bundle
            // 2-d array at static init time, as a locale language.  We're
@ -1079,64 +1213,34 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
            UnicodeString rules = ures_getUnicodeStringByKey(bundle, RB_RULE, &status);
            ures_close(bundle);
            
-            // If the status indicates a failure, then we don't have any
-            // rules -- there is probably an installation error.  The list
-            // in the root locale should correspond to all the installed
-            // transliterators; if it lists something that's not
-            // installed, we'll get an error from ResourceBundle.
-            
-            parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD,
-                         parseError, status);
-        }
-
-        if (U_FAILURE(status)) {
-            // We have a failure of some kind.  Remove the ID from the
-            // registry so we don't keep trying.  NOTE: This will throw off
-            // anyone who is, at the moment, trying to iterate over the
-            // available IDs.  That's acceptable since we should never
-            // really get here except under installation, configuration,
-            // or unrecoverable run time memory failures.
-            remove(ID);
-            break;
-        }
-
-        entry->u.data = parser.orphanData();
-        entry->stringArg = parser.idBlock;
-        entry->intArg = parser.idSplitPoint;
-        entry->compoundFilter = parser.orphanCompoundFilter();
-
-        // Reset entry->entryType to something that we process at the
-        // top of the loop, then loop back to the top.  As long as we
-        // do this, we only loop through twice at most.
-        // NOTE: The logic here matches that in
-        // Transliterator::createFromRules().
-        if (entry->stringArg.length() == 0) {
-            if (entry->u.data == 0) {
-                // No idBlock, no data -- this is just an
-                // alias for Null
-                entry->entryType = Entry::ALIAS;
-                entry->stringArg = NullTransliterator::ID;
+            if (U_FAILURE(status)) {
+                // We have a failure of some kind.  Remove the ID from the
+                // registry so we don't keep trying.  NOTE: This will throw off
+                // anyone who is, at the moment, trying to iterate over the
+                // available IDs.  That's acceptable since we should never
+                // really get here except under installation, configuration,
+                // or unrecoverable run time memory failures.
+                remove(ID);
            } else {
-                // No idBlock, data != 0 -- this is an
-                // ordinary RBT_DATA
-                entry->entryType = Entry::RBT_DATA;
-            }
-        } else {
-            if (entry->u.data == 0) {
-                // idBlock, no data -- this is an alias.  The ID has
-                // been munged from reverse into forward mode, if
-                // necessary, so instantiate the ID in the forward
-                // direction.
-                entry->entryType = Entry::ALIAS;
-            } else {
-                // idBlock and data -- this is a compound
-                // RBT
-                entry->entryType = Entry::COMPOUND_RBT;
+                
+                // If the status indicates a failure, then we don't have any
+                // rules -- there is probably an installation error.  The list
+                // in the root locale should correspond to all the installed
+                // transliterators; if it lists something that's not
+                // installed, we'll get an error from ResourceBundle.
+                aliasReturn = new TransliteratorAlias(ID, rules,
+                    ((entry->entryType == Entry::RULES_REVERSE) ?
+                     UTRANS_REVERSE : UTRANS_FORWARD));
+                if (aliasReturn == 0) {
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                }
            }
        }
+        return 0;
+    default:
+        U_ASSERT(FALSE); // can't get here
+        return 0;
    }
-
-    return 0; // failed
 }
 U_NAMESPACE_END

--- a/icu4c/source/i18n/transreg.h
+++ b/icu4c/source/i18n/transreg.h
@ -41,29 +41,61 @@ class UnicodeString;
 class TransliteratorAlias : public UMemory {
 public:
    /**
-     * Construct a simple alias.
+     * Construct a simple alias (type == SIMPLE)
     * @param aliasID the given id.
     */
    TransliteratorAlias(const UnicodeString& aliasID);
    
    /**
-     * Construct a compound RBT alias.
+     * Construct a compound RBT alias (type == COMPOUND)
     */
    TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlock,
                        Transliterator* adopted, int32_t idSplitPoint,
                        const UnicodeSet* compoundFilter);

+    /**
+     * Construct a rules alias (type = RULES)
+     */
+    TransliteratorAlias(const UnicodeString& theID,
+                        const UnicodeString& rules,
+                        UTransDirection dir);
+
    ~TransliteratorAlias();
    
    /**
     * The whole point of create() is that the caller must invoke
     * it when the registry mutex is NOT held, to prevent deadlock.
     * It may only be called once.
+     *
+     * Note: Only call create() if isRuleBased() returns FALSE.
+     *
+     * This method must be called *outside* of the TransliteratorRegistry
+     * mutex.
     */
    Transliterator* create(UParseError&, UErrorCode&);
+
+    /**
+     * Return TRUE if this alias is rule-based.  If so, the caller
+     * must call parse() on it, then call TransliteratorRegistry::reget().
+     */
+    UBool isRuleBased() const;
+
+    /**
+     * If isRuleBased() returns TRUE, then the caller must call this
+     * method, followed by TransliteratorRegistry::reget().  The latter
+     * method must be called inside the TransliteratorRegistry mutex.
+     *
+     * Note: Only call parse() if isRuleBased() returns TRUE.
+     *
+     * This method must be called *outside* of the TransliteratorRegistry
+     * mutex, because it can instantiate Transliterators embedded in
+     * the rules via the "&Latin-Arabic()" syntax.
+     */
+    void parse(TransliteratorParser& parser,
+               UParseError& pe, UErrorCode& ec) const;
    
 private:
-    // We actually come in two flavors:
+    // We actually come in three flavors:
    // 1. Simple alias
    //    Here aliasID is the alias string.  Everything else is
    //    null, zero, empty.
@ -72,11 +104,15 @@ class TransliteratorAlias : public UMemory {
    //    contained RBT, and idSplitPoint is the offet in aliasID
    //    where the contained RBT goes.  compoundFilter is the
    //    compound filter, and it is _not_ owned.
+    // 3. Rules
+    //    Here ID is the ID, aliasID is the rules string.
+    //    idSplitPoint is the UTransDirection.
    UnicodeString ID;
-    UnicodeString aliasID;
+    UnicodeString aliasID; // rename! holds rules for RULES type
    Transliterator* trans; // owned
    const UnicodeSet* compoundFilter; // alias
-    int32_t idSplitPoint;
+    int32_t idSplitPoint; // rename! holds UTransDirection for RULES type
+    enum { SIMPLE, COMPOUND, RULES } type;

    TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
    TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
@ -130,7 +166,8 @@ class TransliteratorRegistry : public UMemory {
     * filters or compounds, which we do not understand.  Caller should
     * make aliasReturn NULL before calling.
     * @param ID          the given ID
-     * @param aliasReturn the given TransliteratorAlias
+     * @param aliasReturn output param to receive TransliteratorAlias;
+     *                    should be NULL on entry
     * @param parseError  Struct to recieve information on position 
     *                    of error if an error is encountered
     * @param status      Output param set to success/failure code.
@ -140,6 +177,27 @@ class TransliteratorRegistry : public UMemory {
                        UParseError& parseError,
                        UErrorCode& status);

+    /**
+     * The caller must call this after calling get(), if [a] calling get()
+     * returns an alias, and [b] the alias is rule based.  In that
+     * situation the caller must call alias->parse() to do the parsing
+     * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
+     * instantiating the transliterator.
+     *
+     * Note: Another alias might be returned by this method.
+     *
+     * This method (like all public methods of this class) must be called
+     * from within the TransliteratorRegistry mutex.
+     *
+     * @param aliasReturn output param to receive TransliteratorAlias;
+     *                    should be NULL on entry
+     */
+    Transliterator* reget(const UnicodeString& ID,
+                          TransliteratorParser& parser,
+                          TransliteratorAlias*& aliasReturn,
+                          UParseError& parseError,
+                          UErrorCode& status);
+
    /**
     * Register a prototype (adopted).  This adds an entry to the
     * dynamic store, or replaces an existing entry.  Any entry in the
@ -326,7 +384,7 @@ class TransliteratorRegistry : public UMemory {
                       Entry* adopted,
                       UBool visible);

-   void registerEntry(const UnicodeString& ID,
+    void registerEntry(const UnicodeString& ID,
                       const UnicodeString& source,
                       const UnicodeString& target,
                       const UnicodeString& variant,