ICU-5161 Reduce memory consumption.
Allow more memory allocation errors to be reported. Reduce memory fragmentation. Speed up initialization slightly. X-SVN-Rev: 19547
This commit is contained in:
parent
573f662749
commit
e88c000399
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -37,19 +37,19 @@ void RuleBasedTransliterator::_construct(const UnicodeString& rules,
|
||||
return;
|
||||
}
|
||||
|
||||
TransliteratorParser parser;
|
||||
TransliteratorParser parser(status);
|
||||
parser.parse(rules, direction, parseError, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (parser.idBlockVector->size() != 0 ||
|
||||
if (parser.idBlockVector.size() != 0 ||
|
||||
parser.compoundFilter != NULL) {
|
||||
status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
|
||||
return;
|
||||
}
|
||||
|
||||
fData = (TransliterationRuleData*)parser.dataVector->orphanElementAt(0);
|
||||
fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
|
||||
setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -22,21 +22,13 @@
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
TransliterationRuleData::TransliterationRuleData(UErrorCode& status)
|
||||
: UMemory(), ruleSet(status),
|
||||
variableNames(0), variables(0), variablesAreOwned(TRUE)
|
||||
: UMemory(), ruleSet(status), variableNames(status),
|
||||
variables(0), variablesAreOwned(TRUE)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
variableNames = new Hashtable(status);
|
||||
/* test for NULL */
|
||||
if (variableNames == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
if (U_SUCCESS(status)) {
|
||||
variableNames->setValueDeleter(uhash_deleteUnicodeString);
|
||||
}
|
||||
variableNames.setValueDeleter(uhash_deleteUnicodeString);
|
||||
variables = 0;
|
||||
variablesLength = 0;
|
||||
}
|
||||
@ -48,16 +40,13 @@ TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData&
|
||||
variablesLength(other.variablesLength)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
variableNames = new Hashtable(status);
|
||||
if (U_SUCCESS(status)) {
|
||||
variableNames->setValueDeleter(uhash_deleteUnicodeString);
|
||||
int32_t pos = -1;
|
||||
const UHashElement *e;
|
||||
while ((e = other.variableNames->nextElement(pos)) != 0) {
|
||||
UnicodeString* value =
|
||||
new UnicodeString(*(const UnicodeString*)e->value.pointer);
|
||||
variableNames->put(*(UnicodeString*)e->key.pointer, value, status);
|
||||
}
|
||||
variableNames.setValueDeleter(uhash_deleteUnicodeString);
|
||||
int32_t pos = -1;
|
||||
const UHashElement *e;
|
||||
while ((e = other.variableNames.nextElement(pos)) != 0) {
|
||||
UnicodeString* value =
|
||||
new UnicodeString(*(const UnicodeString*)e->value.pointer);
|
||||
variableNames.put(*(UnicodeString*)e->key.pointer, value, status);
|
||||
}
|
||||
|
||||
variables = 0;
|
||||
@ -78,7 +67,6 @@ TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData&
|
||||
}
|
||||
|
||||
TransliterationRuleData::~TransliterationRuleData() {
|
||||
delete variableNames;
|
||||
if (variablesAreOwned && variables != 0) {
|
||||
for (int32_t i=0; i<variablesLength; ++i) {
|
||||
delete variables[i];
|
||||
|
@ -1,5 +1,7 @@
|
||||
/*
|
||||
* Copyright (C) 1999-2005, International Business Machines Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2006, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 11/17/99 aliu Creation.
|
||||
@ -15,14 +17,13 @@
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "rbt_set.h"
|
||||
#include "hash.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeFunctor;
|
||||
class UnicodeString;
|
||||
class UnicodeMatcher;
|
||||
class UnicodeReplacer;
|
||||
class Hashtable;
|
||||
|
||||
/**
|
||||
* The rule data for a RuleBasedTransliterators. RBT objects hold
|
||||
@ -61,7 +62,7 @@ public:
|
||||
* data.variables. The stand-in also represents the UnicodeMatcher in
|
||||
* the stored rules.
|
||||
*/
|
||||
Hashtable* variableNames;
|
||||
Hashtable variableNames;
|
||||
|
||||
/**
|
||||
* Map category variable (UChar) to set (UnicodeFunctor).
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -807,32 +807,33 @@ UBool RuleHalf::isValidInput(TransliteratorParser& transParser) {
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
TransliteratorParser::TransliteratorParser() {
|
||||
dataVector = NULL;
|
||||
idBlockVector = NULL;
|
||||
TransliteratorParser::TransliteratorParser(UErrorCode &statusReturn) :
|
||||
status(U_ZERO_ERROR),
|
||||
dataVector(status),
|
||||
idBlockVector(status),
|
||||
variablesVector(status),
|
||||
segmentObjects(status)
|
||||
{
|
||||
idBlockVector.setDeleter(uhash_deleteUnicodeString);
|
||||
curData = NULL;
|
||||
compoundFilter = NULL;
|
||||
parseData = NULL;
|
||||
variablesVector = NULL;
|
||||
variableNames = NULL;
|
||||
segmentObjects = NULL;
|
||||
variableNames.setValueDeleter(uhash_deleteUnicodeString);
|
||||
if (U_FAILURE(status)) {
|
||||
statusReturn = status;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
TransliteratorParser::~TransliteratorParser() {
|
||||
while (dataVector != NULL && !dataVector->isEmpty())
|
||||
delete (TransliterationRuleData*)(dataVector->orphanElementAt(0));
|
||||
delete dataVector;
|
||||
delete idBlockVector;
|
||||
while (!dataVector.isEmpty())
|
||||
delete (TransliterationRuleData*)(dataVector.orphanElementAt(0));
|
||||
delete compoundFilter;
|
||||
delete parseData;
|
||||
while (variablesVector != NULL && !variablesVector->isEmpty())
|
||||
delete (UnicodeFunctor*)variablesVector->orphanElementAt(0);
|
||||
delete variablesVector;
|
||||
delete variableNames;
|
||||
delete segmentObjects;
|
||||
while (!variablesVector.isEmpty())
|
||||
delete (UnicodeFunctor*)variablesVector.orphanElementAt(0);
|
||||
}
|
||||
|
||||
void
|
||||
@ -878,46 +879,26 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
||||
UBool parsingIDs = TRUE;
|
||||
int32_t ruleCount = 0;
|
||||
|
||||
if (dataVector == NULL)
|
||||
dataVector = new UVector(status);
|
||||
else {
|
||||
while (!dataVector->isEmpty())
|
||||
delete (TransliterationRuleData*)(dataVector->orphanElementAt(0));
|
||||
while (!dataVector.isEmpty()) {
|
||||
delete (TransliterationRuleData*)(dataVector.orphanElementAt(0));
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (idBlockVector == NULL) {
|
||||
idBlockVector = new UVector(status);
|
||||
idBlockVector->setDeleter(uhash_deleteUnicodeString);
|
||||
}
|
||||
else
|
||||
idBlockVector->removeAllElements();
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
idBlockVector.removeAllElements();
|
||||
curData = NULL;
|
||||
|
||||
direction = theDirection;
|
||||
ruleCount = 0;
|
||||
|
||||
delete compoundFilter;
|
||||
compoundFilter = NULL;
|
||||
|
||||
if (variablesVector == NULL) {
|
||||
variablesVector = new UVector(status);
|
||||
} else {
|
||||
while (!variablesVector->isEmpty())
|
||||
delete (UnicodeFunctor*)variablesVector->orphanElementAt(0);
|
||||
while (!variablesVector.isEmpty()) {
|
||||
delete (UnicodeFunctor*)variablesVector.orphanElementAt(0);
|
||||
}
|
||||
if (variableNames == NULL) {
|
||||
variableNames = new Hashtable(status);
|
||||
variableNames->setValueDeleter(uhash_deleteUnicodeString);
|
||||
} else {
|
||||
variableNames->removeAll();
|
||||
}
|
||||
parseData = new ParseData(0, variablesVector, variableNames);
|
||||
variableNames.removeAll();
|
||||
parseData = new ParseData(0, &variablesVector, &variableNames);
|
||||
if (parseData == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
@ -979,9 +960,9 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
||||
if (!parsingIDs) {
|
||||
if (curData != NULL) {
|
||||
if (direction == UTRANS_FORWARD)
|
||||
dataVector->addElement(curData, status);
|
||||
dataVector.addElement(curData, status);
|
||||
else
|
||||
dataVector->insertElementAt(curData, 0, status);
|
||||
dataVector.insertElementAt(curData, 0, status);
|
||||
curData = NULL;
|
||||
}
|
||||
parsingIDs = TRUE;
|
||||
@ -1029,9 +1010,9 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
||||
} else {
|
||||
if (parsingIDs) {
|
||||
if (direction == UTRANS_FORWARD)
|
||||
idBlockVector->addElement(new UnicodeString(idBlockResult), status);
|
||||
idBlockVector.addElement(new UnicodeString(idBlockResult), status);
|
||||
else
|
||||
idBlockVector->insertElementAt(new UnicodeString(idBlockResult), 0, status);
|
||||
idBlockVector.insertElementAt(new UnicodeString(idBlockResult), 0, status);
|
||||
idBlockResult.remove();
|
||||
parsingIDs = FALSE;
|
||||
curData = new TransliterationRuleData(status);
|
||||
@ -1059,23 +1040,23 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
||||
|
||||
if (parsingIDs && idBlockResult.length() > 0) {
|
||||
if (direction == UTRANS_FORWARD)
|
||||
idBlockVector->addElement(new UnicodeString(idBlockResult), status);
|
||||
idBlockVector.addElement(new UnicodeString(idBlockResult), status);
|
||||
else
|
||||
idBlockVector->insertElementAt(new UnicodeString(idBlockResult), 0, status);
|
||||
idBlockVector.insertElementAt(new UnicodeString(idBlockResult), 0, status);
|
||||
}
|
||||
else if (!parsingIDs && curData != NULL) {
|
||||
if (direction == UTRANS_FORWARD)
|
||||
dataVector->addElement(curData, status);
|
||||
dataVector.addElement(curData, status);
|
||||
else
|
||||
dataVector->insertElementAt(curData, 0, status);
|
||||
dataVector.insertElementAt(curData, 0, status);
|
||||
}
|
||||
|
||||
if (U_SUCCESS(status)) {
|
||||
// Convert the set vector to an array
|
||||
int32_t i, dataVectorSize = dataVector->size();
|
||||
int32_t i, dataVectorSize = dataVector.size();
|
||||
for (i = 0; i < dataVectorSize; i++) {
|
||||
TransliterationRuleData* data = (TransliterationRuleData*)dataVector->elementAt(i);
|
||||
data->variablesLength = variablesVector->size();
|
||||
TransliterationRuleData* data = (TransliterationRuleData*)dataVector.elementAt(i);
|
||||
data->variablesLength = variablesVector.size();
|
||||
if (data->variablesLength == 0) {
|
||||
data->variables = 0;
|
||||
} else {
|
||||
@ -1085,19 +1066,19 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
||||
|
||||
for (int32_t j = 0; j < data->variablesLength; j++) {
|
||||
data->variables[j] =
|
||||
((UnicodeSet*)variablesVector->elementAt(j));
|
||||
((UnicodeSet*)variablesVector.elementAt(j));
|
||||
}
|
||||
|
||||
data->variableNames->removeAll();
|
||||
data->variableNames.removeAll();
|
||||
int32_t pos = -1;
|
||||
const UHashElement* he = variableNames->nextElement(pos);
|
||||
const UHashElement* he = variableNames.nextElement(pos);
|
||||
while (he != NULL) {
|
||||
data->variableNames->put(*((UnicodeString*)(he->key.pointer)),
|
||||
data->variableNames.put(*((UnicodeString*)(he->key.pointer)),
|
||||
((UnicodeString*)(he->value.pointer))->clone(), status);
|
||||
he = variableNames->nextElement(pos);
|
||||
he = variableNames.nextElement(pos);
|
||||
}
|
||||
}
|
||||
variablesVector->removeAllElements(); // keeps them from getting deleted when we succeed
|
||||
variablesVector.removeAllElements(); // keeps them from getting deleted when we succeed
|
||||
|
||||
// Index the rules
|
||||
if (compoundFilter != NULL) {
|
||||
@ -1108,11 +1089,11 @@ void TransliteratorParser::parseRules(const UnicodeString& rule,
|
||||
}
|
||||
|
||||
for (i = 0; i < dataVectorSize; i++) {
|
||||
TransliterationRuleData* data = (TransliterationRuleData*)dataVector->elementAt(i);
|
||||
TransliterationRuleData* data = (TransliterationRuleData*)dataVector.elementAt(i);
|
||||
data->ruleSet.freeze(parseError, status);
|
||||
}
|
||||
if (idBlockVector->size() == 1 && ((UnicodeString*)idBlockVector->elementAt(0))->isEmpty()) {
|
||||
idBlockVector->removeElementAt(0);
|
||||
if (idBlockVector.size() == 1 && ((UnicodeString*)idBlockVector.elementAt(0))->isEmpty()) {
|
||||
idBlockVector.removeElementAt(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1127,7 +1108,7 @@ void TransliteratorParser::setVariableRange(int32_t start, int32_t end) {
|
||||
}
|
||||
|
||||
curData->variablesBase = (UChar) start;
|
||||
if (dataVector->size() == 0) {
|
||||
if (dataVector.size() == 0) {
|
||||
variableNext = (UChar) start;
|
||||
variableLimit = (UChar) (end + 1);
|
||||
}
|
||||
@ -1250,11 +1231,7 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
|
||||
|
||||
// Set up segments data
|
||||
segmentStandins.truncate(0);
|
||||
if (segmentObjects == NULL) {
|
||||
segmentObjects = new UVector(status);
|
||||
} else {
|
||||
segmentObjects->removeAllElements();
|
||||
}
|
||||
segmentObjects.removeAllElements();
|
||||
|
||||
// Use pointers to automatics to make swapping possible.
|
||||
RuleHalf _left(*this), _right(*this);
|
||||
@ -1328,7 +1305,7 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
|
||||
}
|
||||
// We allow anything on the right, including an empty string.
|
||||
UnicodeString* value = new UnicodeString(right->text);
|
||||
variableNames->put(undefinedVariableName, value, status);
|
||||
variableNames.put(undefinedVariableName, value, status);
|
||||
++variableLimit;
|
||||
return pos;
|
||||
}
|
||||
@ -1342,7 +1319,7 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
|
||||
}
|
||||
|
||||
// Verify segments
|
||||
if (segmentStandins.length() > segmentObjects->size()) {
|
||||
if (segmentStandins.length() > segmentObjects.size()) {
|
||||
syntaxError(U_UNDEFINED_SEGMENT_REFERENCE, rule, start);
|
||||
}
|
||||
for (i=0; i<segmentStandins.length(); ++i) {
|
||||
@ -1350,8 +1327,8 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
|
||||
syntaxError(U_INTERNAL_TRANSLITERATOR_ERROR, rule, start); // will never happen
|
||||
}
|
||||
}
|
||||
for (i=0; i<segmentObjects->size(); ++i) {
|
||||
if (segmentObjects->elementAt(i) == NULL) {
|
||||
for (i=0; i<segmentObjects.size(); ++i) {
|
||||
if (segmentObjects.elementAt(i) == NULL) {
|
||||
syntaxError(U_INTERNAL_TRANSLITERATOR_ERROR, rule, start); // will never happen
|
||||
}
|
||||
}
|
||||
@ -1410,16 +1387,16 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos,
|
||||
|
||||
// Flatten segment objects vector to an array
|
||||
UnicodeFunctor** segmentsArray = NULL;
|
||||
if (segmentObjects->size() > 0) {
|
||||
segmentsArray = (UnicodeFunctor **)uprv_malloc(segmentObjects->size() * sizeof(UnicodeFunctor *));
|
||||
segmentObjects->toArray((void**) segmentsArray);
|
||||
if (segmentObjects.size() > 0) {
|
||||
segmentsArray = (UnicodeFunctor **)uprv_malloc(segmentObjects.size() * sizeof(UnicodeFunctor *));
|
||||
segmentObjects.toArray((void**) segmentsArray);
|
||||
}
|
||||
|
||||
curData->ruleSet.addRule(new TransliterationRule(
|
||||
left->text, left->ante, left->post,
|
||||
right->text, right->cursor, right->cursorOffset,
|
||||
segmentsArray,
|
||||
segmentObjects->size(),
|
||||
segmentObjects.size(),
|
||||
left->anchorStart, left->anchorEnd,
|
||||
curData,
|
||||
status), status);
|
||||
@ -1484,8 +1461,8 @@ UChar TransliteratorParser::generateStandInFor(UnicodeFunctor* adopted) {
|
||||
|
||||
// Look up previous stand-in, if any. This is a short list
|
||||
// (typical n is 0, 1, or 2); linear search is optimal.
|
||||
for (int32_t i=0; i<variablesVector->size(); ++i) {
|
||||
if (variablesVector->elementAt(i) == adopted) { // [sic] pointer comparison
|
||||
for (int32_t i=0; i<variablesVector.size(); ++i) {
|
||||
if (variablesVector.elementAt(i) == adopted) { // [sic] pointer comparison
|
||||
return (UChar) (curData->variablesBase + i);
|
||||
}
|
||||
}
|
||||
@ -1495,7 +1472,7 @@ UChar TransliteratorParser::generateStandInFor(UnicodeFunctor* adopted) {
|
||||
status = U_VARIABLE_RANGE_EXHAUSTED;
|
||||
return 0;
|
||||
}
|
||||
variablesVector->addElement(adopted, status);
|
||||
variablesVector.addElement(adopted, status);
|
||||
return variableNext++;
|
||||
}
|
||||
|
||||
@ -1518,7 +1495,7 @@ UChar TransliteratorParser::getSegmentStandin(int32_t seg) {
|
||||
// Set a placeholder in the master variables vector that will be
|
||||
// filled in later by setSegmentObject(). We know that we will get
|
||||
// called first because setSegmentObject() will call us.
|
||||
variablesVector->addElement((void*) NULL, status);
|
||||
variablesVector.addElement((void*) NULL, status);
|
||||
segmentStandins.setCharAt(seg-1, c);
|
||||
}
|
||||
return c;
|
||||
@ -1532,18 +1509,18 @@ void TransliteratorParser::setSegmentObject(int32_t seg, StringMatcher* adopted)
|
||||
// segments will result in segment i+1 getting parsed
|
||||
// and stored before segment i; be careful with the
|
||||
// vector handling here.
|
||||
if (segmentObjects->size() < seg) {
|
||||
segmentObjects->setSize(seg);
|
||||
if (segmentObjects.size() < seg) {
|
||||
segmentObjects.setSize(seg);
|
||||
}
|
||||
int32_t index = getSegmentStandin(seg) - curData->variablesBase;
|
||||
if (segmentObjects->elementAt(seg-1) != NULL ||
|
||||
variablesVector->elementAt(index) != NULL) {
|
||||
if (segmentObjects.elementAt(seg-1) != NULL ||
|
||||
variablesVector.elementAt(index) != NULL) {
|
||||
// should never happen
|
||||
status = U_INTERNAL_TRANSLITERATOR_ERROR;
|
||||
return;
|
||||
}
|
||||
segmentObjects->setElementAt(adopted, seg-1);
|
||||
variablesVector->setElementAt(adopted, index);
|
||||
segmentObjects.setElementAt(adopted, seg-1);
|
||||
variablesVector.setElementAt(adopted, index);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1563,7 +1540,7 @@ UChar TransliteratorParser::getDotStandIn() {
|
||||
*/
|
||||
void TransliteratorParser::appendVariableDef(const UnicodeString& name,
|
||||
UnicodeString& buf) {
|
||||
const UnicodeString* s = (const UnicodeString*) variableNames->get(name);
|
||||
const UnicodeString* s = (const UnicodeString*) variableNames.get(name);
|
||||
if (s == NULL) {
|
||||
// We allow one undefined variable so that variable definition
|
||||
// statements work. For the first undefined variable we return
|
||||
|
@ -1,5 +1,7 @@
|
||||
/*
|
||||
* Copyright (C) 1999-2005, International Business Machines Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2006, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 11/17/99 aliu Creation.
|
||||
@ -17,6 +19,8 @@
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "rbt.h"
|
||||
#include "hash.h"
|
||||
#include "uvector.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
@ -25,25 +29,31 @@ class UnicodeFunctor;
|
||||
class ParseData;
|
||||
class RuleHalf;
|
||||
class ParsePosition;
|
||||
class UVector;
|
||||
class Hashtable;
|
||||
class StringMatcher;
|
||||
|
||||
class TransliteratorParser : public UMemory {
|
||||
|
||||
private:
|
||||
/**
|
||||
* We use a single error code during parsing. Rather than pass it
|
||||
* through each API, we keep it here.
|
||||
* THIS MUST BE DEFINED FIRST!
|
||||
*/
|
||||
UErrorCode status;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* A Vector of TransliterationRuleData objects, one for each discrete group
|
||||
* of rules in the rule set
|
||||
*/
|
||||
UVector* dataVector;
|
||||
UVector dataVector;
|
||||
|
||||
/**
|
||||
* PUBLIC data member.
|
||||
* A Vector of UnicodeStrings containing all of the ID blocks in the rule set
|
||||
*/
|
||||
UVector* idBlockVector;
|
||||
UVector idBlockVector;
|
||||
|
||||
/**
|
||||
* PUBLIC data member containing the parsed compound filter, if any.
|
||||
@ -59,12 +69,6 @@ class TransliteratorParser : public UMemory {
|
||||
|
||||
UTransDirection direction;
|
||||
|
||||
/**
|
||||
* We use a single error code during parsing. Rather than pass it
|
||||
* through each API, we keep it here.
|
||||
*/
|
||||
UErrorCode status;
|
||||
|
||||
/**
|
||||
* Parse error information.
|
||||
*/
|
||||
@ -80,13 +84,13 @@ class TransliteratorParser : public UMemory {
|
||||
* is copied into the array data.variables. As with data.variables,
|
||||
* element 0 corresponds to character data.variablesBase.
|
||||
*/
|
||||
UVector* variablesVector;
|
||||
UVector variablesVector;
|
||||
|
||||
/**
|
||||
* Temporary table of variable names. When parsing is complete, this is
|
||||
* copied into data.variableNames.
|
||||
*/
|
||||
Hashtable* variableNames;
|
||||
Hashtable variableNames;
|
||||
|
||||
/**
|
||||
* String of standins for segments. Used during the parsing of a single
|
||||
@ -101,7 +105,7 @@ class TransliteratorParser : public UMemory {
|
||||
* segmentStandins.charAt(0) is the standin for "$1" and corresponds
|
||||
* to StringMatcher object segmentObjects.elementAt(0), etc.
|
||||
*/
|
||||
UVector* segmentObjects;
|
||||
UVector segmentObjects;
|
||||
|
||||
/**
|
||||
* The next available stand-in for variables. This starts at some point in
|
||||
@ -139,7 +143,7 @@ public:
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
TransliteratorParser();
|
||||
TransliteratorParser(UErrorCode &statusReturn);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2005, International Business Machines
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -994,7 +994,7 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id,
|
||||
// Other aliases are handled with TransliteratorAlias::create().
|
||||
if (alias->isRuleBased()) {
|
||||
// Step 1. parse
|
||||
TransliteratorParser parser;
|
||||
TransliteratorParser parser(ec);
|
||||
alias->parse(parser, pe, ec);
|
||||
delete alias;
|
||||
alias = 0;
|
||||
@ -1045,7 +1045,7 @@ Transliterator::createFromRules(const UnicodeString& ID,
|
||||
{
|
||||
Transliterator* t = NULL;
|
||||
|
||||
TransliteratorParser parser;
|
||||
TransliteratorParser parser(status);
|
||||
parser.parse(rules, dir, parseError, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
@ -1053,13 +1053,13 @@ Transliterator::createFromRules(const UnicodeString& ID,
|
||||
}
|
||||
|
||||
// NOTE: The logic here matches that in TransliteratorRegistry.
|
||||
if (parser.idBlockVector->size() == 0 && parser.dataVector->size() == 0) {
|
||||
if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) {
|
||||
t = new NullTransliterator();
|
||||
}
|
||||
else if (parser.idBlockVector->size() == 0 && parser.dataVector->size() == 1) {
|
||||
t = new RuleBasedTransliterator(ID, (TransliterationRuleData*)parser.dataVector->orphanElementAt(0), TRUE);
|
||||
else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) {
|
||||
t = new RuleBasedTransliterator(ID, (TransliterationRuleData*)parser.dataVector.orphanElementAt(0), TRUE);
|
||||
}
|
||||
else if (parser.idBlockVector->size() == 1 && parser.dataVector->size() == 0) {
|
||||
else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) {
|
||||
// idBlock, no data -- this is an alias. The ID has
|
||||
// been munged from reverse into forward mode, if
|
||||
// necessary, so instantiate the ID in the forward
|
||||
@ -1068,10 +1068,10 @@ Transliterator::createFromRules(const UnicodeString& ID,
|
||||
UnicodeString filterPattern;
|
||||
parser.compoundFilter->toPattern(filterPattern, FALSE);
|
||||
t = createInstance(filterPattern + UnicodeString(ID_DELIM)
|
||||
+ *((UnicodeString*)parser.idBlockVector->elementAt(0)), UTRANS_FORWARD, parseError, status);
|
||||
+ *((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status);
|
||||
}
|
||||
else
|
||||
t = createInstance(*((UnicodeString*)parser.idBlockVector->elementAt(0)), UTRANS_FORWARD, parseError, status);
|
||||
t = createInstance(*((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status);
|
||||
|
||||
|
||||
if (t != NULL) {
|
||||
@ -1082,13 +1082,13 @@ Transliterator::createFromRules(const UnicodeString& ID,
|
||||
UVector transliterators(status);
|
||||
int32_t passNumber = 1;
|
||||
|
||||
int32_t limit = parser.idBlockVector->size();
|
||||
if (parser.dataVector->size() > limit)
|
||||
limit = parser.dataVector->size();
|
||||
int32_t limit = parser.idBlockVector.size();
|
||||
if (parser.dataVector.size() > limit)
|
||||
limit = parser.dataVector.size();
|
||||
|
||||
for (int32_t i = 0; i < limit; i++) {
|
||||
if (i < parser.idBlockVector->size()) {
|
||||
UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector->elementAt(i);
|
||||
if (i < parser.idBlockVector.size()) {
|
||||
UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
|
||||
if (!idBlock->isEmpty()) {
|
||||
Transliterator* temp = createInstance(*idBlock, UTRANS_FORWARD, parseError, status);
|
||||
if (temp != NULL && temp->getDynamicClassID() != NullTransliterator::getStaticClassID())
|
||||
@ -1097,8 +1097,8 @@ Transliterator::createFromRules(const UnicodeString& ID,
|
||||
delete temp;
|
||||
}
|
||||
}
|
||||
if (!parser.dataVector->isEmpty()) {
|
||||
TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector->orphanElementAt(0);
|
||||
if (!parser.dataVector.isEmpty()) {
|
||||
TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
|
||||
transliterators.addElement(
|
||||
new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + (passNumber++),
|
||||
data, TRUE), status);
|
||||
@ -1251,7 +1251,7 @@ void U_EXPORT2 Transliterator::registerAlias(const UnicodeString& aliasID,
|
||||
|
||||
void Transliterator::_registerAlias(const UnicodeString& aliasID,
|
||||
const UnicodeString& realID) {
|
||||
registry->put(aliasID, realID, TRUE);
|
||||
registry->put(aliasID, realID, FALSE, TRUE);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1507,13 +1507,13 @@ UBool Transliterator::initializeRegistry() {
|
||||
(ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) ==
|
||||
0x0046 /*F*/) ?
|
||||
UTRANS_FORWARD : UTRANS_REVERSE;
|
||||
registry->put(id, UnicodeString(TRUE, resString, len), dir, visible);
|
||||
registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible);
|
||||
}
|
||||
break;
|
||||
case 0x61: // 'a'
|
||||
// 'alias'; row[2]=createInstance argument
|
||||
resString = ures_getString(res, &len, &status);
|
||||
registry->put(id, UnicodeString(TRUE, resString, len), TRUE);
|
||||
registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -562,17 +562,17 @@ Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
|
||||
entry->entryType == Entry::RULES_REVERSE ||
|
||||
entry->entryType == Entry::LOCALE_RULES) {
|
||||
|
||||
if (parser.idBlockVector->isEmpty() && parser.dataVector->isEmpty()) {
|
||||
if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) {
|
||||
entry->u.data = 0;
|
||||
entry->entryType = Entry::ALIAS;
|
||||
entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL");
|
||||
}
|
||||
else if (parser.idBlockVector->isEmpty() && parser.dataVector->size() == 1) {
|
||||
entry->u.data = (TransliterationRuleData*)parser.dataVector->orphanElementAt(0);
|
||||
else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) {
|
||||
entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
|
||||
entry->entryType = Entry::RBT_DATA;
|
||||
}
|
||||
else if (parser.idBlockVector->size() == 1 && parser.dataVector->isEmpty()) {
|
||||
entry->stringArg = *(UnicodeString*)(parser.idBlockVector->elementAt(0));
|
||||
else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) {
|
||||
entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0));
|
||||
entry->compoundFilter = parser.orphanCompoundFilter();
|
||||
entry->entryType = Entry::ALIAS;
|
||||
}
|
||||
@ -582,18 +582,18 @@ Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
|
||||
entry->u.dataVector = new UVector(status);
|
||||
entry->stringArg.remove();
|
||||
|
||||
int32_t limit = parser.idBlockVector->size();
|
||||
if (parser.dataVector->size() > limit)
|
||||
limit = parser.dataVector->size();
|
||||
int32_t limit = parser.idBlockVector.size();
|
||||
if (parser.dataVector.size() > limit)
|
||||
limit = parser.dataVector.size();
|
||||
|
||||
for (int32_t i = 0; i < limit; i++) {
|
||||
if (i < parser.idBlockVector->size()) {
|
||||
UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector->elementAt(i);
|
||||
if (i < parser.idBlockVector.size()) {
|
||||
UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
|
||||
if (!idBlock->isEmpty())
|
||||
entry->stringArg += *idBlock;
|
||||
}
|
||||
if (!parser.dataVector->isEmpty()) {
|
||||
TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector->orphanElementAt(0);
|
||||
if (!parser.dataVector.isEmpty()) {
|
||||
TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
|
||||
entry->u.dataVector->addElement(data, status);
|
||||
entry->stringArg += (UChar)0xffff; // use U+FFFF to mark position of RBTs in ID block
|
||||
}
|
||||
@ -625,20 +625,32 @@ void TransliteratorRegistry::put(const UnicodeString& ID,
|
||||
void TransliteratorRegistry::put(const UnicodeString& ID,
|
||||
const UnicodeString& resourceName,
|
||||
UTransDirection dir,
|
||||
UBool readonlyResourceAlias,
|
||||
UBool visible) {
|
||||
Entry *entry = new Entry();
|
||||
entry->entryType = (dir == UTRANS_FORWARD) ? Entry::RULES_FORWARD
|
||||
: Entry::RULES_REVERSE;
|
||||
entry->stringArg = resourceName;
|
||||
if (readonlyResourceAlias) {
|
||||
entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -1);
|
||||
}
|
||||
else {
|
||||
entry->stringArg = resourceName;
|
||||
}
|
||||
registerEntry(ID, entry, visible);
|
||||
}
|
||||
|
||||
void TransliteratorRegistry::put(const UnicodeString& ID,
|
||||
const UnicodeString& alias,
|
||||
UBool readonlyAliasAlias,
|
||||
UBool visible) {
|
||||
Entry *entry = new Entry();
|
||||
entry->entryType = Entry::ALIAS;
|
||||
entry->stringArg = alias;
|
||||
if (readonlyAliasAlias) {
|
||||
entry->stringArg.setTo(TRUE, alias.getBuffer(), -1);
|
||||
}
|
||||
else {
|
||||
entry->stringArg = alias;
|
||||
}
|
||||
registerEntry(ID, entry, visible);
|
||||
}
|
||||
|
||||
@ -1251,7 +1263,7 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
|
||||
// and possibly also into an ::id header and/or footer. Then
|
||||
// we modify the registry with the parsed data and retry.
|
||||
{
|
||||
TransliteratorParser parser;
|
||||
TransliteratorParser parser(status);
|
||||
|
||||
// We use the file name, taken from another resource bundle
|
||||
// 2-d array at static init time, as a locale language. We're
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2005, International Business Machines
|
||||
* Copyright (c) 2001-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
@ -222,6 +222,7 @@ class TransliteratorRegistry : public UMemory {
|
||||
void put(const UnicodeString& ID,
|
||||
const UnicodeString& resourceName,
|
||||
UTransDirection dir,
|
||||
UBool readonlyResourceAlias,
|
||||
UBool visible);
|
||||
|
||||
/**
|
||||
@ -231,6 +232,7 @@ class TransliteratorRegistry : public UMemory {
|
||||
*/
|
||||
void put(const UnicodeString& ID,
|
||||
const UnicodeString& alias,
|
||||
UBool readonlyAliasAlias,
|
||||
UBool visible);
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user