ICU-96 performance improvements

X-SVN-Rev: 247
This commit is contained in:
Steven R. Loomis 1999-11-23 22:49:29 +00:00
parent c372fee921
commit c07aed7913
5 changed files with 609 additions and 117 deletions

View File

@ -35,6 +35,7 @@
// 6/20/97 helena Java class name change.
// 04/23/99 stephen Removed EDecompositionMode, merged with
// Normalizer::EMode
// 11/23/9 srl Inlining of some critical functions
//=============================================================================
#include "colcache.h"
@ -140,11 +141,6 @@ Collator::greater(const UnicodeString& source,
return (compare(source, target) == Collator::GREATER);
}
Collator::ECollationStrength
Collator::getStrength() const
{
return strength;
}
void
Collator::setStrength(Collator::ECollationStrength newStrength)
@ -152,11 +148,6 @@ Collator::setStrength(Collator::ECollationStrength newStrength)
strength = newStrength;
}
Normalizer::EMode
Collator::getDecomposition() const
{
return decmp;
}
void
Collator::setDecomposition(Normalizer::EMode decompositionMode)
{

View File

@ -34,6 +34,10 @@
// 02/10/98 damiba Added compare() with length as parameter.
// 04/23/99 stephen Removed EDecompositionMode, merged with
// Normalizer::EMode.
// 11/02/99 helena Collator performance enhancements. Eliminates the
// UnicodeString construction and special case for NO_OP.
// 11/23/99 srl More performance enhancements. Inlining of
// critical accessors.
//=============================================================================
#ifndef COLL_H
@ -299,6 +303,38 @@ public:
int32_t length) const = 0;
/**
* The comparison function compares the character data stored in two
* different string arrays. Returns information about whether a string
* array is less than, greater than or equal to another string array.
* <p>Example of use:
* <pre>
* . UErrorCode status = U_ZERO_ERROR;
* . Collator *myCollation = Collator::createInstance(Locale::US, status);
* . if (U_FAILURE(status)) return;
* . myCollation->setStrength(Collator::PRIMARY);
* . // result would be Collator::EQUAL ("abc" == "ABC")
* . // (no primary difference between "abc" and "ABC")
* . Collator::EComparisonResult result = myCollation->compare(L"abc", 3, L"ABC", 3);
* . myCollation->setStrength(Collator::TERTIARY);
* . // result would be Collator::LESS (abc" &lt;&lt;&lt; "ABC")
* . // (with tertiary difference between "abc" and "ABC")
* . Collator::EComparisonResult result = myCollation->compare(L"abc", 3, L"ABC", 3);
* </pre>
* @param source the source string array to be compared with.
* @param sourceLength the length of the source string array. If this value
* is equal to -1, the string array is null-terminated.
* @param target the string that is to be compared with the source string.
* @param targetLength the length of the target string array. If this value
* is equal to -1, the string array is null-terminated.
* @return Returns a byte value. GREATER if source is greater
* than target; EQUAL if source is equal to target; LESS if source is less
* than target
**/
virtual EComparisonResult compare( const UChar* source,
int32_t sourceLength,
const UChar* target,
int32_t targetLength) const = 0;
/** Transforms the string into a series of characters that can be compared
* with CollationKey::compareTo. It is not possible to restore the original
@ -339,6 +375,24 @@ public:
virtual CollationKey& getCollationKey(const UnicodeString& source,
CollationKey& key,
UErrorCode& status) const = 0;
/** Transforms the string into a series of characters that can be compared
* with CollationKey::compareTo. It is not possible to restore the original
* string from the chars in the sort key. The generated sort key handles
* only a limited number of ignorable characters.
* <p>Use CollationKey::equals or CollationKey::compare to compare the
* generated sort keys.
* <p>If the source string is null, a null collation key will be returned.
* @param source the source string to be transformed into a sort key.
* @param sourceLength length of the collation key
* @param key the collation key to be filled in
* @return the collation key of the string based on the collation rules.
* @see CollationKey#compare
*/
virtual CollationKey& getCollationKey(const UChar *source,
int32_t sourceLength,
CollationKey& key,
UErrorCode& status) const = 0;
/**
* Generates the hash code for the collation object
*/
@ -503,4 +557,17 @@ Collator::operator!=(const Collator& other) const
return result;
}
inline Collator::ECollationStrength
Collator::getStrength() const
{
return strength;
}
inline Normalizer::EMode
Collator::getDecomposition() const
{
return decmp;
}
#endif

View File

@ -43,7 +43,10 @@
* Normalizer::EMode
* 06/14/99 stephen Removed kResourceBundleSuffix
* 06/22/99 stephen Fixed logic in constructFromFile() since .ctx
* files are no longer used.
* files are no longer used.
* 11/02/99 helena Collator performance enhancements. Special case
* for NO_OP situations.
* 11/17/99 srl More performance enhancements. Inlined some internal functions.
*******************************************************************************
*/
@ -69,6 +72,8 @@
#include <string.h>
#include <ustring.h>
class RuleBasedCollatorStreamer
{
@ -129,14 +134,124 @@ const int16_t RuleBasedCollator::FILEID = 0x5443; // unique f
const char* RuleBasedCollator::kFilenameSuffix = ".col"; // binary collation file extension
char RuleBasedCollator::fgClassID = 0; // Value is irrelevant // class id
//================ Some inline definitions of implementation functions........ ========
// Get the character order in the mapping table
inline int32_t
RuleBasedCollator::getUnicodeOrder(UChar ch) const
{
return ucmp32_get(data->mapping, ch);
}
inline int32_t
RuleBasedCollator::strengthOrder(int32_t value) const
{
if (getStrength() == PRIMARY)
{
return (value & PRIMARYDIFFERENCEONLY);
} else if (getStrength() == SECONDARY)
{
return (value & SECONDARYDIFFERENCEONLY);
}
return value;
}
inline int32_t
RuleBasedCollator::getStrengthOrder(NormalizerIterator* cursor,
UErrorCode status) const
{
if (U_FAILURE(status))
{
return CollationElementIterator::NULLORDER;
}
if (cursor->bufferAlias != NULL)
{
// bufferAlias needs a bit of an explanation.
// When we hit an expanding character in the text, we call the order's
// getExpandValues method to retrieve an array of the orderings for all
// of the characters in the expansion (see the end of this method).
// The first ordering is returned, and an alias to the orderings array
// is saved so that the remaining orderings can be returned on subsequent
// calls to next. So, if the expanding buffer is not exhausted,
// all we have to do here is return the next ordering in the buffer.
if (cursor->expIndex < cursor->bufferAlias->size())
{
//_L((stderr, "next from [%08X] from bufferAlias\n", this));
return strengthOrder(cursor->bufferAlias->at(cursor->expIndex++));
}
else
{
cursor->bufferAlias = NULL;
cursor->expIndex = 0;
}
}
else if (cursor->swapOrder != 0)
{
// If we find a character with no order, we return the marking
// flag, UNMAPPEDCHARVALUE, 0x7fff0000, and then the character
// itself shifted left 16 bits as orders. At this point, the
// UNMAPPEDCHARVALUE flag has already been returned by the code
// below, so just return the shifted character here.
int32_t order = cursor->swapOrder << 16;
//_L((stderr, "next from [%08X] swaporder..\n", this));
cursor->swapOrder = 0;
return order;
}
UChar ch = cursor->current();
cursor->next();
//_L((stderr, "Next from [%08X] = [%04X], [%c]\n", cursor, (int)ch & 0xFFFF, (char)(ch & 0xFF)));
if (ch == Normalizer::DONE) {
return CollationElementIterator::NULLORDER;
}
// Ask the collator for this character's ordering.
int32_t value = getUnicodeOrder(ch);
if (value == UNMAPPED)
{
// Returned an "unmapped" flag and save the character so it can be
// returned next time this method is called.
if (ch == 0x0000) return ch;
cursor->swapOrder = ch; // \u0000 is not valid in C++'s UnicodeString
return CollationElementIterator::UNMAPPEDCHARVALUE;
}
if (value >= CONTRACTCHARINDEX)
{
value = nextContractChar(cursor, ch, status);
}
if (value >= EXPANDCHARINDEX)
{
cursor->bufferAlias = getExpandValueList(value);
cursor->expIndex = 0;
value = cursor->bufferAlias->at(cursor->expIndex++);
}
int32_t str = strengthOrder(value);
return strengthOrder(value);
}
// ==================== End inlines ============================================
//===============================================================================
RuleBasedCollator::RuleBasedCollator()
: Collator(),
isOverIgnore(FALSE),
mPattern(0),
sourceCursor(0),
targetCursor(0),
// sourceCursor(0),
//targetCursor(0),
cursor1(0),
cursor2(0),
data(0),
dataIsOwned(FALSE)
{
@ -146,8 +261,10 @@ RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
: Collator(that),
isOverIgnore(that.isOverIgnore),
mPattern(0),
sourceCursor(0),
targetCursor(0),
// sourceCursor(0),
//targetCursor(0),
cursor1(0),
cursor2(0),
dataIsOwned(FALSE),
data(that.data) // Alias the data pointer
{
@ -214,8 +331,10 @@ RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
: Collator(),
isOverIgnore(FALSE),
mPattern(0),
sourceCursor(0),
targetCursor(0),
// sourceCursor(0),
/// targetCursor(0),
cursor1(0),
cursor2(0),
data(0),
dataIsOwned(FALSE)
{
@ -233,8 +352,10 @@ RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
: Collator(collationStrength, Normalizer::NO_OP),
isOverIgnore(FALSE),
mPattern(0),
sourceCursor(0),
targetCursor(0),
// sourceCursor(0),
// targetCursor(0),
cursor1(0),
cursor2(0),
data(0),
dataIsOwned(FALSE)
{
@ -242,7 +363,6 @@ RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
{
return;
}
constructFromRules(rules, status);
}
@ -252,8 +372,10 @@ RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
: Collator(TERTIARY, decompositionMode),
isOverIgnore(FALSE),
mPattern(0),
sourceCursor(0),
targetCursor(0),
// sourceCursor(0),
// targetCursor(0),
cursor1(0),
cursor2(0),
data(0),
dataIsOwned(FALSE)
{
@ -272,8 +394,10 @@ RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
: Collator(collationStrength, decompositionMode),
isOverIgnore(FALSE),
mPattern(0),
sourceCursor(0),
targetCursor(0),
// sourceCursor(0),
//targetCursor(0),
cursor1(0),
cursor2(0),
data(0),
dataIsOwned(FALSE)
{
@ -392,10 +516,14 @@ RuleBasedCollator::RuleBasedCollator( const Locale& desiredLocale,
isOverIgnore(FALSE),
dataIsOwned(FALSE),
data(0),
sourceCursor(0),
targetCursor(0),
// sourceCursor(0),
//targetCursor(0),
cursor1(0),
cursor2(0),
mPattern(0)
{
if (U_FAILURE(status))
{
return;
@ -447,6 +575,18 @@ RuleBasedCollator::RuleBasedCollator( const Locale& desiredLocale,
return;
}
// srl write out default.col
{
UnicodeString defLocaleName = ResourceBundle::kDefaultFilename;
char *binaryFilePath = createPathName(Locale::getDataDirectory(),
defLocaleName, kFilenameSuffix);
bool_t ok = writeToFile(binaryFilePath);
delete [] binaryFilePath;
#ifdef COLLDEBUG
cerr << defLocaleName << " [default] binary write " << (ok? "OK" : "Failed") << endl;
#endif
}
data->desiredLocale = desiredLocale;
desiredLocale.getName(localeName);
data->realLocaleName = localeName;
@ -567,7 +707,7 @@ RuleBasedCollator::constructFromFile( const Locale& locale,
// Try to load up the collation from a binary file first
constructFromFile(binaryFilePath, status);
#ifdef COLLDEBUG
cerr << localeFileName << " binary load " << errorName(status) << endl;
cerr << localeFileName << kFilenameSuffix << " binary load " << errorName(status) << endl;
#endif
if(U_SUCCESS(status) || status == U_MEMORY_ALLOCATION_ERROR)
return;
@ -629,7 +769,7 @@ RuleBasedCollator::constructFromFile( const Locale& locale,
}
#ifdef COLLDEBUG
cerr << localeFileName << " ascii load " << (U_SUCCESS(status) ? "OK" : "Failed") << endl;
cerr << localeFileName << " ascii load " << (U_SUCCESS(status) ? "OK" : "Failed") << " - try= " << (tryBinaryFile?"true":"false") << endl;
#endif
if(U_SUCCESS(status) && tryBinaryFile) {
@ -655,11 +795,20 @@ RuleBasedCollator::~RuleBasedCollator()
data = 0;
delete sourceCursor;
sourceCursor = 0;
// delete sourceCursor;
// sourceCursor = 0;
delete targetCursor;
targetCursor = 0;
// delete targetCursor;
// targetCursor = 0;
if (cursor1 != NULL) {
delete cursor1;
cursor1 = 0;
}
if (cursor2 != NULL) {
delete cursor2;
cursor2 = 0;
}
delete mPattern;
mPattern = 0;
@ -742,13 +891,13 @@ RuleBasedCollator::getRules() const
data->isRuleTableLoaded = TRUE;
#ifdef _DEBUG
// the following is useful for specific debugging purposes
// UnicodeString name;
// cerr << "Table collation rules loaded dynamically for "
// << data->desiredLocale.getName(name)
// << " at "
// << data->realLocaleName
// << ", " << dec << data->ruleTable.size() << " characters"
// << endl;
UnicodeString name;
cerr << "Table collation rules loaded dynamically for "
<< data->desiredLocale.getName(name)
<< " at "
<< data->realLocaleName
<< ", " << dec << data->ruleTable.size() << " characters"
<< endl;
#endif
}
else
@ -762,6 +911,16 @@ RuleBasedCollator::getRules() const
<< endl;
cerr << "Status " << errorName(status) << ", mPattern " << temp.mPattern << endl;
#endif
/* SRL have to add this because we now have the situation where
DEFAULT is loaded from a binary file w/ no rules. */
UErrorCode intStatus = U_ZERO_ERROR;
temp.constructFromRules(RuleBasedCollator::DEFAULTRULES, intStatus);
if(U_SUCCESS(intStatus) && (temp.mPattern != 0))
{
data->ruleTable = temp.getRules();
data->isRuleTableLoaded = TRUE;
}
}
}
@ -783,14 +942,15 @@ RuleBasedCollator::compare( const UnicodeString& source,
return (RuleBasedCollator::compare(source_togo, target_togo));
}
// Compare two strings using this collator
Collator::EComparisonResult
RuleBasedCollator::compare(const UnicodeString& source,
const UnicodeString& target) const
RuleBasedCollator::compare(const UChar* source,
int32_t sourceLength,
const UChar* target,
int32_t targetLength) const
{
// check if source and target are valid strings
if (source.isBogus() || target.isBogus())
if (((source == 0) && (target == 0)) ||
((sourceLength == 0) && (targetLength == 0)))
{
return Collator::EQUAL;
}
@ -798,55 +958,36 @@ RuleBasedCollator::compare(const UnicodeString& source,
Collator::EComparisonResult result = Collator::EQUAL;
UErrorCode status = U_ZERO_ERROR;
// The basic algorithm here is that we use CollationElementIterators
// to step through both the source and target strings. We compare each
// collation element in the source string against the corresponding one
// in the target, checking for differences.
//
// If a difference is found, we set <result> to LESS or GREATER to
// indicate whether the source string is less or greater than the target.
//
// However, it's not that simple. If we find a tertiary difference
// (e.g. 'A' vs. 'a') near the beginning of a string, it can be
// overridden by a primary difference (e.g. "A" vs. "B") later in
// the string. For example, "AA" < "aB", even though 'A' > 'a'.
//
// To keep track of this, we use checkSecTer and checkTertiary to keep
// track of the strength of the most significant difference that has been
// found so far. When we find a difference whose strength is greater than
// the previous ones, it overrides the last difference (if any) that
// was found.
//
if (sourceCursor == NULL)
if (cursor1 == NULL)
{
((RuleBasedCollator *)this)->sourceCursor = createCollationElementIterator(source);
((RuleBasedCollator *)this)->cursor1 = new NormalizerIterator(source, sourceLength, getDecomposition());
}
else
{
sourceCursor->setText(source, status);
cursor1->setModeAndText(getDecomposition(), source, sourceLength, status);
}
if (sourceCursor == NULL || U_FAILURE(status))
if ( /*cursor1->cursor == NULL ||*/ U_FAILURE(status))
{
return Collator::EQUAL;
}
if (targetCursor == NULL)
if (cursor2 == NULL)
{
((RuleBasedCollator *)this)->targetCursor = createCollationElementIterator(target);
((RuleBasedCollator *)this)->cursor2 = new NormalizerIterator(target, targetLength, getDecomposition());
}
else
{
targetCursor->setText(target, status);
cursor2->setModeAndText(getDecomposition(), target, targetLength, status);
}
if (targetCursor == NULL || U_FAILURE(status))
if (/*cursor2 == NULL ||*/ U_FAILURE(status))
{
return Collator::EQUAL;
}
int32_t sOrder, tOrder;
// int32_t sOrder = CollationElementIterator::NULLORDER, tOrder = CollationElementIterator::NULLORDER;
bool_t gets = TRUE, gett = TRUE;
bool_t initialCheckSecTer = getStrength() >= Collator::SECONDARY;
bool_t checkSecTer = initialCheckSecTer;
@ -860,7 +1001,7 @@ RuleBasedCollator::compare(const UnicodeString& source,
// we've been requested to skip it.
if (gets)
{
sOrder = sourceCursor->next(status);
sOrder = getStrengthOrder((NormalizerIterator*)cursor1, status);
if (U_FAILURE(status))
{
@ -872,7 +1013,7 @@ RuleBasedCollator::compare(const UnicodeString& source,
if (gett)
{
tOrder = targetCursor->next(status);
tOrder = getStrengthOrder((NormalizerIterator*)cursor2, status);
if (U_FAILURE(status))
{
@ -1036,7 +1177,7 @@ RuleBasedCollator::compare(const UnicodeString& source,
}
}
}
while ((sOrder = sourceCursor->next(status)) != CollationElementIterator::NULLORDER);
while ((sOrder = getStrengthOrder(cursor1, status)) != CollationElementIterator::NULLORDER);
}
else if (tOrder != CollationElementIterator::NULLORDER)
{
@ -1060,7 +1201,7 @@ RuleBasedCollator::compare(const UnicodeString& source,
}
}
}
while ((tOrder = targetCursor->next(status)) != CollationElementIterator::NULLORDER);
while ((tOrder = getStrengthOrder(cursor2, status)) != CollationElementIterator::NULLORDER);
}
@ -1070,15 +1211,46 @@ RuleBasedCollator::compare(const UnicodeString& source,
// puts the result of the string comparison directly into result
if (result == Collator::EQUAL && getStrength() == IDENTICAL)
{
UnicodeString sourceDecomp, targetDecomp;
#if 0
// ******** for the UChar normalization interface.
// It doesn't work much faster, and the code was broken
// so it's commented out. --srl
// UChar sourceDecomp[1024], targetDecomp[1024];
// int32_t sourceDecompLength = 1024;
// int32_t targetDecompLength = 1024;
// int8_t comparison;
// Normalizer::EMode decompMode = getDecomposition();
// if (decompMode != Normalizer::NO_OP)
// {
// Normalizer::normalize(source, sourceLength, decompMode,
// 0, sourceDecomp, sourceDecompLength, status);
// Normalizer::normalize(target, targetLength, decompMode,
// 0, targetDecomp, targetDecompLength, status);
// comparison = u_strcmp(sourceDecomp,targetDecomp);
// }
// else
// {
// comparison = u_strcmp(source, target); /* ! */
// }
#else
UnicodeString sourceDecomp, targetDecomp;
int8_t comparison;
Normalizer::normalize(source, getDecomposition(),
0, sourceDecomp, status);
0, sourceDecomp, status);
Normalizer::normalize(target, getDecomposition(),
0, targetDecomp, status);
0, targetDecomp, status);
comparison = sourceDecomp.compare(targetDecomp);
#endif
if (comparison < 0)
{
@ -1097,6 +1269,49 @@ RuleBasedCollator::compare(const UnicodeString& source,
return result;
}
int32_t
RuleBasedCollator::nextContractChar(NormalizerIterator *cursor,
UChar ch,
UErrorCode& status) const
{
// First get the ordering of this single character
VectorOfPToContractElement *list = getContractValues(ch);
EntryPair *pair = (EntryPair *)list->at(0);
int32_t order = pair->value;
// Now iterate through the chars following it and
// look for the longest match
((UnicodeString&)key).remove();
((UnicodeString&)key) += ch;
while ((ch = cursor->current()) != Normalizer::DONE)
{
((UnicodeString&)key) += ch;
int32_t n = getEntry(list, key, TRUE);
if (n == UNMAPPED)
{
break;
}
cursor->next();
pair = (EntryPair *)list->at(n);
order = pair->value;
}
return order;
}
// Compare two strings using this collator
Collator::EComparisonResult
RuleBasedCollator::compare(const UnicodeString& source,
const UnicodeString& target) const
{
return compare(source.getUChars(), source.length(), target.getUChars(), target.length());
}
// Retrieve a collation key for the specified string
// The key can be compared with other collation keys using a bitwise comparison
// (e.g. memcmp) to find the ordering of their respective source strings.
@ -1134,6 +1349,15 @@ CollationKey&
RuleBasedCollator::getCollationKey( const UnicodeString& source,
CollationKey& sortkey,
UErrorCode& status) const
{
return RuleBasedCollator::getCollationKey(source.getUChars(), source.size(), sortkey, status);
}
CollationKey&
RuleBasedCollator::getCollationKey( const UChar* source,
int32_t sourceLen,
CollationKey& sortkey,
UErrorCode& status) const
{
if (U_FAILURE(status))
{
@ -1141,27 +1365,21 @@ RuleBasedCollator::getCollationKey( const UnicodeString& source,
return sortkey.setToBogus();
}
if (source.isBogus())
{
status = U_MEMORY_ALLOCATION_ERROR;
return sortkey.setToBogus();
}
if (source.size() == 0)
if ((!source) || (sourceLen == 0))
{
return sortkey.reset();
}
if (sourceCursor == NULL)
if (cursor1 == NULL)
{
((RuleBasedCollator *)this)->sourceCursor = createCollationElementIterator(source);
((RuleBasedCollator *)this)->cursor1 = new NormalizerIterator(source, sourceLen, getDecomposition());
}
else
{
sourceCursor->setText(source, status);
cursor1->setModeAndText(getDecomposition(), source,sourceLen, status);
}
if (sourceCursor == NULL || U_FAILURE(status))
if (U_FAILURE(status))
{
return sortkey.setToBogus();
}
@ -1177,7 +1395,8 @@ RuleBasedCollator::getCollationKey( const UnicodeString& source,
UnicodeString decomp;
// iterate over the source, counting primary, secondary, and tertiary entries
while((order = sourceCursor->next(status)) != CollationElementIterator::NULLORDER)
while((order = getStrengthOrder((NormalizerIterator*)cursor1, status)) !=
CollationElementIterator::NULLORDER)
{
int32_t secOrder = CollationElementIterator::secondaryOrder(order);
int32_t terOrder = CollationElementIterator::tertiaryOrder(order);
@ -1230,7 +1449,7 @@ RuleBasedCollator::getCollationKey( const UnicodeString& source,
if (compareIdent)
{
Normalizer::normalize(source, getDecomposition(),
Normalizer::normalize(source, getDecomposition(), // SRL: ??
0, decomp, status);
if (U_SUCCESS(status))
@ -1259,10 +1478,10 @@ RuleBasedCollator::getCollationKey( const UnicodeString& source,
int32_t identCursor = terCursor + (2 * totalTer);
// reset source to the beginning
sourceCursor->reset();
cursor1->reset();
// now iterate over the source computing the actual entries
while((order = sourceCursor->next(status)) != CollationElementIterator::NULLORDER)
while((order = getStrengthOrder((NormalizerIterator*)cursor1, status)) != CollationElementIterator::NULLORDER)
{
if (U_FAILURE(status))
{
@ -1336,6 +1555,14 @@ RuleBasedCollator::getCollationKey( const UnicodeString& source,
sortkey.storeUnicodeString(identCursor, decomp);
}
// Debugging - print out the sortkey [--srl]
// {
// const uint8_t *bytes;
// int32_t xcount;
// bytes = sortkey.getByteArray(xcount);
// // fprintf(stderr, "\n\n- [%02X] [%02X]\n\n", (int)(bytes[0]&0xFF), (int)(bytes[1]&0xFF) );
// }
return sortkey;
}
@ -1615,6 +1842,8 @@ RuleBasedCollator::increment(Collator::ECollationStrength aStrength, int32_t las
data->maxTerOrder += 1;
}
break;
// case IDENTICAL?
}
return lastValue;
@ -2017,12 +2246,6 @@ VectorOfInt *RuleBasedCollator::getExpandValueList(int32_t order) const
return data->expandTable->at(order - EXPANDCHARINDEX);
}
// Get the character order in the mapping table
int32_t
RuleBasedCollator::getUnicodeOrder(UChar ch) const
{
return ucmp32_get(data->mapping, ch);
}
void RuleBasedCollatorStreamer::streamIn(RuleBasedCollator* collator, FileStream* is)
@ -2117,7 +2340,7 @@ bool_t RuleBasedCollator::writeToFile(const char* fileName) const
#ifdef COLLDEBUG
fprintf(stderr, "binary write %s size %d %s\n", fileName, T_FileStream_size(ofs),
(!T_FileStream_error(ofs) ? ", OK" : ", FAIL");
(!T_FileStream_error(ofs) ? ", OK" : ", FAIL"));
#endif
bool_t err = T_FileStream_error(ofs) == 0;

View File

@ -37,7 +37,10 @@
* 04/23/99 stephen Removed EDecompositionMode, merged with
* Normalizer::EMode
* 06/14/99 stephen Removed kResourceBundleSuffix
*
* 11/02/99 helena Collator performance enhancements. Eliminates the
* UnicodeString construction and special case for NO_OP.
* 11/23/99 srl More performance enhancements. Updates to NormalizerIterator
* internal state management.
*******************************************************************************
*/
@ -420,6 +423,39 @@ public:
const UnicodeString& target,
int32_t length) const;
/**
* The comparison function compares the character data stored in two
* different string arrays. Returns information about whether a string
* array is less than, greater than or equal to another string array.
* <p>Example of use:
* <pre>
* . UErrorCode status = U_ZERO_ERROR;
* . Collator *myCollation = Collator::createInstance(Locale::US, status);
* . if (U_FAILURE(status)) return;
* . myCollation->setStrength(Collator::PRIMARY);
* . // result would be Collator::EQUAL ("abc" == "ABC")
* . // (no primary difference between "abc" and "ABC")
* . Collator::EComparisonResult result = myCollation->compare(L"abc", 3, L"ABC", 3);
* . myCollation->setStrength(Collator::TERTIARY);
* . // result would be Collator::LESS (abc" &lt;&lt;&lt; "ABC")
* . // (with tertiary difference between "abc" and "ABC")
* . Collator::EComparisonResult result = myCollation->compare(L"abc", 3, L"ABC", 3);
* </pre>
* @param source the source string array to be compared with.
* @param sourceLength the length of the source string array. If this value
* is equal to -1, the string array is null-terminated.
* @param target the string that is to be compared with the source string.
* @param targetLength the length of the target string array. If this value
* is equal to -1, the string array is null-terminated.
* @return Returns a byte value. GREATER if source is greater
* than target; EQUAL if source is equal to target; LESS if source is less
* than target
**/
virtual EComparisonResult compare( const UChar* source,
int32_t sourceLength,
const UChar* target,
int32_t targetLength) const ;
/** Transforms a specified region of the string into a series of characters
* that can be compared with CollationKey.compare. Use a CollationKey when
* you need to do repeated comparisions on the same string. For a single comparison
@ -433,6 +469,13 @@ public:
virtual CollationKey& getCollationKey( const UnicodeString& source,
CollationKey& key,
UErrorCode& status) const;
virtual CollationKey& getCollationKey(const UChar *source,
int32_t sourceLength,
CollationKey& key,
UErrorCode& status) const;
/**
* Generates the hash code for the rule-based collation object.
* @return the hash code.
@ -705,11 +748,41 @@ private:
const UnicodeString& name,
const UnicodeString& suffix);
/**
* Chops off the last portion of the locale name. For example, from "en_US_CA"
* to "en_US" and "en_US" to "en".
* @param localeName the locale name.
/* Internal class for quick iteration over the text.
100% pure inline code
*/
class NormalizerIterator {
public:
Normalizer *cursor;
VectorOfInt *bufferAlias;
int32_t swapOrder;
UChar* text;
int32_t expIndex;
int32_t textLen;
UTextOffset currentOffset;
NormalizerIterator(void);
NormalizerIterator(const UChar* source, int32_t length, Normalizer::EMode mode);
~NormalizerIterator(void);
void setText(const UChar* source, int32_t length, UErrorCode& status);
void setModeAndText(Normalizer::EMode mode, const UChar* source, int32_t length, UErrorCode& status);
UChar current(void) const;
UChar next(void);
void reset(void);
};
int32_t getStrengthOrder(NormalizerIterator* cursor,
UErrorCode status) const;
int32_t strengthOrder(int32_t value) const ;
int32_t nextContractChar(NormalizerIterator *cursor,
UChar ch,
UErrorCode& status) const;
/**
* Chops off the last portion of the locale name. For example, from "en_US_CA"
* to "en_US" and "en_US" to "en".
* @param localeName the locale name.
*/
static void chopLocale(UnicodeString& localeName);
//--------------------------------------------------------------------------
@ -751,12 +824,151 @@ private:
UnicodeString sbuffer;
UnicodeString tbuffer;
UnicodeString key;
CollationElementIterator *sourceCursor;
CollationElementIterator *targetCursor;
NormalizerIterator *cursor1;
NormalizerIterator *cursor2;
bool_t dataIsOwned;
TableCollationData* data;
};
inline
RuleBasedCollator::NormalizerIterator::NormalizerIterator() :
cursor(0),
bufferAlias(0),
swapOrder(0),
text(0),
textLen(0),
currentOffset(0),
expIndex(0)
{
}
inline
RuleBasedCollator::NormalizerIterator::NormalizerIterator(const UChar* source, int32_t length, Normalizer::EMode mode) :
cursor(0),
bufferAlias(0),
swapOrder(0),
text(0),
textLen(0),
currentOffset(0),
expIndex(0)
{
if (mode == Normalizer::NO_OP) {
text = (UChar*)source;
textLen = length;
currentOffset = 0;
} else {
cursor = new Normalizer(source, length, mode);
}
}
inline
RuleBasedCollator::NormalizerIterator::~NormalizerIterator()
{
if (cursor != 0) {
delete cursor;
cursor = 0;
}
}
inline
void
RuleBasedCollator::NormalizerIterator::setText(const UChar* source, int32_t length, UErrorCode& status)
{
if (cursor == 0) {
text = (UChar*)source;
textLen = length;
currentOffset = 0;
} else {
text = 0;
cursor->setText(source, length, status);
}
bufferAlias = 0;
swapOrder = 0;
expIndex = 0;
currentOffset = 0;
}
/* You can only set mode after the comparision of two strings is completed.
Setting the mode in the middle of a comparison is not allowed.
*/
inline
void
RuleBasedCollator::NormalizerIterator::setModeAndText(Normalizer::EMode mode, const UChar* source, int32_t length, UErrorCode& status)
{
if (cursor != NULL) {
if (mode != Normalizer::NO_OP) {
cursor->setMode(mode);
cursor->setText(source, length, status);
} else {
delete cursor;
cursor = 0;
text = (UChar*)source;
textLen = length;
currentOffset = 0;
}
} else {
if(mode == Normalizer::NO_OP)
{
text = (UChar*)source;
textLen = length;
currentOffset = 0;
}
else
{
cursor = new Normalizer(source, length, mode);
}
}
bufferAlias = 0;
swapOrder = 0;
expIndex = 0;
}
inline
UChar
RuleBasedCollator::NormalizerIterator::current(void) const
{
if (text != 0) {
if(currentOffset >= textLen)
{
return Normalizer::DONE;
}
else
{
return text[currentOffset];
}
}
return cursor->current();
}
inline
UChar
RuleBasedCollator::NormalizerIterator::next(void)
{
if (text != 0) {
return ((currentOffset < textLen) ? text[++currentOffset] : Normalizer::DONE);
}
return cursor->next();
}
inline
void
RuleBasedCollator::NormalizerIterator::reset(void)
{
currentOffset = 0;
if(cursor)
{
cursor->reset();
}
}
inline bool_t
RuleBasedCollator::operator!=(const Collator& other) const
@ -772,4 +984,7 @@ RuleBasedCollator::addContractOrder(const UnicodeString &groupChars,
addContractOrder(groupChars, anOrder, TRUE, status);
}
#endif

View File

@ -138,11 +138,7 @@ ucol_strcoll( const UCollator *coll,
const UChar *target,
int32_t targetLength)
{
int32_t srcLen = (sourceLength == -1 ? u_strlen(source) : sourceLength);
const UnicodeString tempSource((UChar*)source, sourceLength, sourceLength);
int32_t targLen = (targetLength == -1 ? u_strlen(target) : targetLength);
const UnicodeString tempTarget((UChar*)target, targLen, targLen);
return (UCollationResult) ((Collator*)coll)->compare(tempSource, tempTarget);
return (UCollationResult) ((Collator*)coll)->compare(source,sourceLength,target,targetLength);
}
U_CAPI bool_t
@ -290,12 +286,12 @@ ucol_getSortKey(const UCollator *coll,
const uint8_t* bytes = NULL;
CollationKey key;
int32_t copyLen;
int32_t len = (sourceLength == -1 ? u_strlen(source)
int32_t len = (sourceLength == -1 ? u_strlen(source)
: sourceLength);
UnicodeString string((UChar*)source, len, len);
// UnicodeString string((UChar*)source, len, len);
UErrorCode status = U_ZERO_ERROR;
((Collator*)coll)->getCollationKey(string, key, status);
((Collator*)coll)->getCollationKey(source, len, key, status);
if(U_FAILURE(status))
return 0;