ICU-3786 rewrite registry and instantiation code to do parsing outside the registry mutex to avoid deadlock for rules of the form &Latin-Arabic(...)

X-SVN-Rev: 15703
This commit is contained in:
Alan Liu 2004-06-04 00:52:39 +00:00
parent 9e97b3f70e
commit 164faf6a3d
3 changed files with 293 additions and 101 deletions

View File

@ -957,7 +957,7 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id,
UErrorCode ec = U_ZERO_ERROR;
TransliteratorAlias* alias = 0;
Transliterator* t = 0;
umtx_init(&registryMutex);
umtx_lock(&registryMutex);
if (HAVE_REGISTRY) {
@ -968,17 +968,47 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id,
if (U_FAILURE(ec)) {
delete t;
delete alias;
return NULL;
return 0;
}
if (alias != 0) {
// Instantiate an alias
// We may have not gotten a transliterator: Because we can't
// instantiate a transliterator from inside TransliteratorRegistry::
// get() (that would deadlock), we sometimes pass back an alias. This
// contains the data we need to finish the instantiation outside the
// registry mutex. The alias may, in turn, generate another alias, so
// we handle aliases in a loop. The max times through the loop is two.
// [alan]
while (alias != 0) {
U_ASSERT(t==0);
t = alias->create(pe, ec);
delete alias;
// Rule-based aliases are handled with TransliteratorAlias::
// parse(), followed by TransliteratorRegistry::reget().
// Other aliases are handled with TransliteratorAlias::create().
if (alias->isRuleBased()) {
// Step 1. parse
TransliteratorParser parser;
alias->parse(parser, pe, ec);
delete alias;
alias = 0;
// Step 2. reget
umtx_lock(&registryMutex);
if (HAVE_REGISTRY) {
t = registry->reget(id, parser, alias, pe, ec);
}
umtx_unlock(&registryMutex);
// Step 3. Loop back around!
} else {
t = alias->create(pe, ec);
delete alias;
alias = 0;
break;
}
if (U_FAILURE(ec)) {
delete t;
delete alias;
t = NULL;
break;
}
}

View File

@ -27,6 +27,7 @@
#include "rbt_pars.h"
#include "tridpars.h"
#include "charstr.h"
#include "uassert.h"
// Enable the following symbol to add debugging code that tracks the
// allocation, deletion, and use of Entry objects. BoundsChecker has
@ -64,7 +65,8 @@ TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID) :
aliasID(theAliasID),
trans(0),
compoundFilter(0),
idSplitPoint(-1) {
idSplitPoint(-1),
type(TransliteratorAlias::SIMPLE) {
}
TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
@ -76,7 +78,19 @@ TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
aliasID(idBlock),
trans(adopted),
compoundFilter(cpdFilter),
idSplitPoint(theIDSplitPoint) {
idSplitPoint(theIDSplitPoint),
type(TransliteratorAlias::COMPOUND) {
}
TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
const UnicodeString& rules,
UTransDirection dir) :
ID(theID),
aliasID(rules), // bad name -- rename aliasID!
trans(0),
compoundFilter(0),
idSplitPoint((int32_t) dir), // bad name -- rename idSplitPoint!
type(TransliteratorAlias::RULES) {
}
TransliteratorAlias::~TransliteratorAlias() {
@ -86,10 +100,15 @@ TransliteratorAlias::~TransliteratorAlias() {
Transliterator* TransliteratorAlias::create(UParseError& pe,
UErrorCode& ec) {
if (U_FAILURE(ec)) {
return 0;
}
Transliterator *t;
if (trans == 0) {
switch (type) {
case SIMPLE:
t = Transliterator::createInstance(aliasID, UTRANS_FORWARD, pe, ec);
} else {
break;
case COMPOUND:
t = new CompoundTransliterator(ID, aliasID, idSplitPoint,
trans, ec);
/* test for NULL */
@ -99,12 +118,36 @@ Transliterator* TransliteratorAlias::create(UParseError& pe,
}
trans = 0; // so we don't delete it later
if (compoundFilter) {
// TODO: Is this right? Are we leaking memory here?
// I'm suspicious because of the "trans = 0" line above;
// doesn't seem to fit the cloning here. Don't have time
// to track this down right now. [alan 3.0]
t->adoptFilter((UnicodeSet*) compoundFilter->clone());
}
break;
case RULES:
U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE!
break;
}
return t;
}
UBool TransliteratorAlias::isRuleBased() const {
return type == RULES;
}
void TransliteratorAlias::parse(TransliteratorParser& parser,
UParseError& pe, UErrorCode& ec) const {
U_ASSERT(type == RULES);
if (U_FAILURE(ec)) {
return;
}
// aliasID is really rules -- rename it!
// idSplitPoint is really UTransDirection -- rename it!
parser.parse(aliasID, (UTransDirection) idSplitPoint, pe, ec);
}
//----------------------------------------------------------------------
// class Spec
//----------------------------------------------------------------------
@ -441,11 +484,85 @@ Transliterator* TransliteratorRegistry::get(const UnicodeString& ID,
TransliteratorAlias*& aliasReturn,
UParseError& parseError,
UErrorCode& status) {
U_ASSERT(aliasReturn == NULL);
Entry *entry = find(ID);
return (entry == 0) ? 0
: instantiateEntry(ID, entry, aliasReturn, parseError,status);
}
Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
TransliteratorParser& parser,
TransliteratorAlias*& aliasReturn,
UParseError& parseError,
UErrorCode& status) {
U_ASSERT(aliasReturn == NULL);
Entry *entry = find(ID);
if (entry == 0) {
// We get to this point if there are two threads, one of which
// is instantiating an ID, and another of which is removing
// the same ID from the registry, and the timing is just right.
return 0;
}
// The usage model for the caller is that they will first call
// reg->get() inside the mutex, they'll get back an alias, they call
// alias->isRuleBased(), and if they get TRUE, they call alias->parse()
// outside the mutex, then reg->reget() inside the mutex again. A real
// mess, but it gets things working for ICU 3.0. [alan].
// Note: It's possible that in between the caller calling
// alias->parse() and reg->reget(), that another thread will have
// called reg->reget(), and the entry will already have been fixed up.
// We have to detect this so we don't stomp over existing entry
// data members and potentially leak memory (u.data and compoundFilter).
if (entry->entryType == Entry::RULES_FORWARD ||
entry->entryType == Entry::RULES_REVERSE ||
entry->entryType == Entry::LOCALE_RULES) {
entry->u.data = parser.orphanData();
entry->stringArg = parser.idBlock;
entry->intArg = parser.idSplitPoint;
entry->compoundFilter = parser.orphanCompoundFilter();
// Reset entry->entryType to encapsulate the parsed data. The
// next time we instantiate this ID (including this very next
// time, at the end of this function) we won't have to parse
// again.
// NOTE: The logic here matches that in
// Transliterator::createFromRules().
if (entry->stringArg.length() == 0) {
if (entry->u.data == 0) {
// No idBlock, no data -- this is just an
// alias for Null
entry->entryType = Entry::ALIAS;
entry->stringArg = NullTransliterator::ID;
} else {
// No idBlock, data != 0 -- this is an
// ordinary RBT_DATA
entry->entryType = Entry::RBT_DATA;
}
} else {
if (entry->u.data == 0) {
// idBlock, no data -- this is an alias. The ID has
// been munged from reverse into forward mode, if
// necessary, so instantiate the ID in the forward
// direction.
entry->entryType = Entry::ALIAS;
} else {
// idBlock and data -- this is a compound
// RBT
entry->entryType = Entry::COMPOUND_RBT;
}
}
}
Transliterator *t =
instantiateEntry(ID, entry, aliasReturn, parseError,status);
return t;
}
void TransliteratorRegistry::put(Transliterator* adoptedProto,
UBool visible) {
Entry *entry = new Entry();
@ -1030,45 +1147,62 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
TransliteratorAlias* &aliasReturn,
UParseError& parseError,
UErrorCode& status) {
Transliterator *t = 0;
U_ASSERT(aliasReturn == 0);
for (;;) {
if (entry->entryType == Entry::RBT_DATA) {
return new RuleBasedTransliterator(ID, entry->u.data);
} else if (entry->entryType == Entry::PROTOTYPE) {
return entry->u.prototype->clone();
} else if (entry->entryType == Entry::ALIAS) {
aliasReturn = new TransliteratorAlias(entry->stringArg);
/* test for NULL */
if (aliasReturn == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return 0;
} else if (entry->entryType == Entry::FACTORY) {
return entry->u.factory.function(ID, entry->u.factory.context);
} else if (entry->entryType == Entry::COMPOUND_RBT) {
switch (entry->entryType) {
case Entry::RBT_DATA:
t = new RuleBasedTransliterator(ID, entry->u.data);
if (t == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return t;
case Entry::PROTOTYPE:
t = entry->u.prototype->clone();
if (t == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return t;
case Entry::ALIAS:
aliasReturn = new TransliteratorAlias(entry->stringArg);
if (aliasReturn == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return 0;
case Entry::FACTORY:
t = entry->u.factory.function(ID, entry->u.factory.context);
if (t == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return t;
case Entry::COMPOUND_RBT:
{
UnicodeString id("_", "");
Transliterator *t = new RuleBasedTransliterator(id, entry->u.data);
/* test for NULL */
if (t == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
aliasReturn = new TransliteratorAlias(ID, entry->stringArg, t, entry->intArg, entry->compoundFilter);
return 0;
}
TransliteratorParser parser;
if (entry->entryType == Entry::LOCALE_RULES) {
parser.parse(entry->stringArg, (UTransDirection) entry->intArg,
parseError, status);
} else {
// At this point entry type must be either RULES_FORWARD or
// RULES_REVERSE. We process the rule data into a
// TransliteratorRuleData object, and possibly also into an
// ::id header and/or footer. Then we modify the registry with
// the parsed data and retry.
UBool isReverse = (entry->entryType == Entry::RULES_REVERSE);
if (aliasReturn == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return 0;
case Entry::LOCALE_RULES:
aliasReturn = new TransliteratorAlias(ID, entry->stringArg,
(UTransDirection) entry->intArg);
if (aliasReturn == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return 0;
case Entry::RULES_FORWARD:
case Entry::RULES_REVERSE:
// Process the rule data into a TransliteratorRuleData object,
// and possibly also into an ::id header and/or footer. Then
// we modify the registry with the parsed data and retry.
{
TransliteratorParser parser;
// We use the file name, taken from another resource bundle
// 2-d array at static init time, as a locale language. We're
@ -1079,64 +1213,34 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
UnicodeString rules = ures_getUnicodeStringByKey(bundle, RB_RULE, &status);
ures_close(bundle);
// If the status indicates a failure, then we don't have any
// rules -- there is probably an installation error. The list
// in the root locale should correspond to all the installed
// transliterators; if it lists something that's not
// installed, we'll get an error from ResourceBundle.
parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD,
parseError, status);
}
if (U_FAILURE(status)) {
// We have a failure of some kind. Remove the ID from the
// registry so we don't keep trying. NOTE: This will throw off
// anyone who is, at the moment, trying to iterate over the
// available IDs. That's acceptable since we should never
// really get here except under installation, configuration,
// or unrecoverable run time memory failures.
remove(ID);
break;
}
entry->u.data = parser.orphanData();
entry->stringArg = parser.idBlock;
entry->intArg = parser.idSplitPoint;
entry->compoundFilter = parser.orphanCompoundFilter();
// Reset entry->entryType to something that we process at the
// top of the loop, then loop back to the top. As long as we
// do this, we only loop through twice at most.
// NOTE: The logic here matches that in
// Transliterator::createFromRules().
if (entry->stringArg.length() == 0) {
if (entry->u.data == 0) {
// No idBlock, no data -- this is just an
// alias for Null
entry->entryType = Entry::ALIAS;
entry->stringArg = NullTransliterator::ID;
if (U_FAILURE(status)) {
// We have a failure of some kind. Remove the ID from the
// registry so we don't keep trying. NOTE: This will throw off
// anyone who is, at the moment, trying to iterate over the
// available IDs. That's acceptable since we should never
// really get here except under installation, configuration,
// or unrecoverable run time memory failures.
remove(ID);
} else {
// No idBlock, data != 0 -- this is an
// ordinary RBT_DATA
entry->entryType = Entry::RBT_DATA;
}
} else {
if (entry->u.data == 0) {
// idBlock, no data -- this is an alias. The ID has
// been munged from reverse into forward mode, if
// necessary, so instantiate the ID in the forward
// direction.
entry->entryType = Entry::ALIAS;
} else {
// idBlock and data -- this is a compound
// RBT
entry->entryType = Entry::COMPOUND_RBT;
// If the status indicates a failure, then we don't have any
// rules -- there is probably an installation error. The list
// in the root locale should correspond to all the installed
// transliterators; if it lists something that's not
// installed, we'll get an error from ResourceBundle.
aliasReturn = new TransliteratorAlias(ID, rules,
((entry->entryType == Entry::RULES_REVERSE) ?
UTRANS_REVERSE : UTRANS_FORWARD));
if (aliasReturn == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
}
return 0;
default:
U_ASSERT(FALSE); // can't get here
return 0;
}
return 0; // failed
}
U_NAMESPACE_END

View File

@ -41,29 +41,61 @@ class UnicodeString;
class TransliteratorAlias : public UMemory {
public:
/**
* Construct a simple alias.
* Construct a simple alias (type == SIMPLE)
* @param aliasID the given id.
*/
TransliteratorAlias(const UnicodeString& aliasID);
/**
* Construct a compound RBT alias.
* Construct a compound RBT alias (type == COMPOUND)
*/
TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlock,
Transliterator* adopted, int32_t idSplitPoint,
const UnicodeSet* compoundFilter);
/**
* Construct a rules alias (type = RULES)
*/
TransliteratorAlias(const UnicodeString& theID,
const UnicodeString& rules,
UTransDirection dir);
~TransliteratorAlias();
/**
* The whole point of create() is that the caller must invoke
* it when the registry mutex is NOT held, to prevent deadlock.
* It may only be called once.
*
* Note: Only call create() if isRuleBased() returns FALSE.
*
* This method must be called *outside* of the TransliteratorRegistry
* mutex.
*/
Transliterator* create(UParseError&, UErrorCode&);
/**
* Return TRUE if this alias is rule-based. If so, the caller
* must call parse() on it, then call TransliteratorRegistry::reget().
*/
UBool isRuleBased() const;
/**
* If isRuleBased() returns TRUE, then the caller must call this
* method, followed by TransliteratorRegistry::reget(). The latter
* method must be called inside the TransliteratorRegistry mutex.
*
* Note: Only call parse() if isRuleBased() returns TRUE.
*
* This method must be called *outside* of the TransliteratorRegistry
* mutex, because it can instantiate Transliterators embedded in
* the rules via the "&Latin-Arabic()" syntax.
*/
void parse(TransliteratorParser& parser,
UParseError& pe, UErrorCode& ec) const;
private:
// We actually come in two flavors:
// We actually come in three flavors:
// 1. Simple alias
// Here aliasID is the alias string. Everything else is
// null, zero, empty.
@ -72,11 +104,15 @@ class TransliteratorAlias : public UMemory {
// contained RBT, and idSplitPoint is the offet in aliasID
// where the contained RBT goes. compoundFilter is the
// compound filter, and it is _not_ owned.
// 3. Rules
// Here ID is the ID, aliasID is the rules string.
// idSplitPoint is the UTransDirection.
UnicodeString ID;
UnicodeString aliasID;
UnicodeString aliasID; // rename! holds rules for RULES type
Transliterator* trans; // owned
const UnicodeSet* compoundFilter; // alias
int32_t idSplitPoint;
int32_t idSplitPoint; // rename! holds UTransDirection for RULES type
enum { SIMPLE, COMPOUND, RULES } type;
TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
@ -130,7 +166,8 @@ class TransliteratorRegistry : public UMemory {
* filters or compounds, which we do not understand. Caller should
* make aliasReturn NULL before calling.
* @param ID the given ID
* @param aliasReturn the given TransliteratorAlias
* @param aliasReturn output param to receive TransliteratorAlias;
* should be NULL on entry
* @param parseError Struct to recieve information on position
* of error if an error is encountered
* @param status Output param set to success/failure code.
@ -140,6 +177,27 @@ class TransliteratorRegistry : public UMemory {
UParseError& parseError,
UErrorCode& status);
/**
* The caller must call this after calling get(), if [a] calling get()
* returns an alias, and [b] the alias is rule based. In that
* situation the caller must call alias->parse() to do the parsing
* OUTSIDE THE REGISTRY MUTEX, then call this method to retry
* instantiating the transliterator.
*
* Note: Another alias might be returned by this method.
*
* This method (like all public methods of this class) must be called
* from within the TransliteratorRegistry mutex.
*
* @param aliasReturn output param to receive TransliteratorAlias;
* should be NULL on entry
*/
Transliterator* reget(const UnicodeString& ID,
TransliteratorParser& parser,
TransliteratorAlias*& aliasReturn,
UParseError& parseError,
UErrorCode& status);
/**
* Register a prototype (adopted). This adds an entry to the
* dynamic store, or replaces an existing entry. Any entry in the
@ -326,7 +384,7 @@ class TransliteratorRegistry : public UMemory {
Entry* adopted,
UBool visible);
void registerEntry(const UnicodeString& ID,
void registerEntry(const UnicodeString& ID,
const UnicodeString& source,
const UnicodeString& target,
const UnicodeString& variant,