df6d1549a1
X-SVN-Rev: 4884
803 lines
27 KiB
C++
803 lines
27 KiB
C++
/*
|
||
******************************************************************************
|
||
* Copyright (C) {1996-2001}, International Business Machines Corporation and *
|
||
* others. All Rights Reserved. *
|
||
******************************************************************************
|
||
*/
|
||
|
||
/**
|
||
* File tblcoll.cpp
|
||
*
|
||
* Created by: Helena Shih
|
||
*
|
||
* Modification History:
|
||
*
|
||
* Date Name Description
|
||
* 2/5/97 aliu Added streamIn and streamOut methods. Added
|
||
* constructor which reads RuleBasedCollator object from
|
||
* a binary file. Added writeToFile method which streams
|
||
* RuleBasedCollator out to a binary file. The streamIn
|
||
* and streamOut methods use istream and ostream objects
|
||
* in binary mode.
|
||
* 2/11/97 aliu Moved declarations out of for loop initializer.
|
||
* Added Mac compatibility #ifdef for ios::nocreate.
|
||
* 2/12/97 aliu Modified to use TableCollationData sub-object to
|
||
* hold invariant data.
|
||
* 2/13/97 aliu Moved several methods into this class from Collation.
|
||
* Added a private RuleBasedCollator(Locale&) constructor,
|
||
* to be used by Collator::getInstance(). General
|
||
* clean up. Made use of UErrorCode variables consistent.
|
||
* 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
|
||
* constructor and getDynamicClassID.
|
||
* 3/5/97 aliu Changed compaction cycle to improve performance. We
|
||
* use the maximum allowable value which is kBlockCount.
|
||
* Modified getRules() to load rules dynamically. Changed
|
||
* constructFromFile() call to accomodate this (added
|
||
* parameter to specify whether binary loading is to
|
||
* take place).
|
||
* 05/06/97 helena Added memory allocation error check.
|
||
* 6/20/97 helena Java class name change.
|
||
* 6/23/97 helena Adding comments to make code more readable.
|
||
* 09/03/97 helena Added createCollationKeyValues().
|
||
* 06/26/98 erm Changes for CollationKeys using byte arrays.
|
||
* 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java
|
||
* 04/23/99 stephen Removed EDecompositionMode, merged with
|
||
* Normalizer::EMode
|
||
* 06/14/99 stephen Removed kResourceBundleSuffix
|
||
* 06/22/99 stephen Fixed logic in constructFromFile() since .ctx
|
||
* files are no longer used.
|
||
* 11/02/99 helena Collator performance enhancements. Special case
|
||
* for NO_OP situations.
|
||
* 11/17/99 srl More performance enhancements. Inlined some internal functions.
|
||
* 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
|
||
* to implementation file.
|
||
* 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
|
||
*/
|
||
|
||
#include "ucol_imp.h"
|
||
#include "unicode/tblcoll.h"
|
||
#include "unicode/coleitr.h"
|
||
#include "uhash.h"
|
||
#include "unicode/resbund.h"
|
||
#include "cmemory.h"
|
||
|
||
#ifdef _DEBUG
|
||
#include "unistrm.h"
|
||
#endif
|
||
|
||
/* global variable ---------------------------------------------------------- */
|
||
|
||
/*
|
||
synwee : using another name for this
|
||
const uint32_t tblcoll_STACK_BUFFER_LENGTH_ = 1024;
|
||
*/
|
||
#define STACK_BUFFER_LENGTH_ 1024
|
||
|
||
/* forward declarations ----------------------------------------------------- */
|
||
|
||
U_CAPI UChar forwardCharIteratorGlue(void *iterator);
|
||
|
||
/* RuleBasedCollator declaration ----------------------------------------- */
|
||
|
||
/* ---------------------------------------------------------------------------
|
||
The following diagram shows the data structure of the RuleBasedCollator object.
|
||
Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
|
||
"a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O;
|
||
'o-umlaut'/E, 'O-umlaut'/E ...".
|
||
What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary
|
||
difference and sorts 'o-umlaut' as if it's always expanded with 'e'.
|
||
|
||
mapping table contracting list expanding list
|
||
(contains all unicode
|
||
char entries) ___ ____________ _________________________
|
||
________ |=>|_*_|->|'c' |v('c') | |=>|v('o')|v('umlaut')|v('e')|
|
||
|_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|
|
||
|_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |
|
||
|____:___| | |_:_| |------------| | |-------------------------|
|
||
|____:___| | |'cH'|v('cH')| | | : |
|
||
|__'a'___|-> v('a') | |------------| | |-------------------------|
|
||
|__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |
|
||
|____:___| | |------------| | |-------------------------|
|
||
|____:___| | |'CH'|v('CH')| | | : |
|
||
|___'c'__|---------------- ------------ | |-------------------------|
|
||
|____:___| | | : |
|
||
|o-umlaut|---------------------------------------- |_________________________|
|
||
|____:___|
|
||
|
||
--------------------------------------------------------------------------- */
|
||
|
||
/* public RuleBasedCollator constructor ---------------------------------- */
|
||
|
||
/**
|
||
* Copy constructor
|
||
*/
|
||
RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) :
|
||
Collator(that), dataIsOwned(FALSE), ucollator(that.ucollator),
|
||
urulestring(that.urulestring)
|
||
{
|
||
}
|
||
|
||
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
|
||
UErrorCode& status) :
|
||
dataIsOwned(FALSE)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return;
|
||
|
||
int32_t length = rules.length();
|
||
|
||
UChar ucharrules[STACK_BUFFER_LENGTH_];
|
||
UChar *pucharrules = ucharrules;
|
||
|
||
if (length >= STACK_BUFFER_LENGTH_)
|
||
pucharrules = new UChar[length + 1];
|
||
|
||
rules.extract(0, length, pucharrules);
|
||
pucharrules[length] = 0;
|
||
|
||
ucollator = ucol_openRules(pucharrules, length, UCOL_DEFAULT_NORMALIZATION,
|
||
UCOL_DEFAULT_STRENGTH, &status);
|
||
|
||
if (U_SUCCESS(status))
|
||
{
|
||
const UChar *r = ucol_getRules(ucollator, &length);
|
||
if (length > 0) {
|
||
urulestring = new UnicodeString(r, length);
|
||
}
|
||
else {
|
||
urulestring = new UnicodeString();
|
||
}
|
||
|
||
dataIsOwned = TRUE;
|
||
}
|
||
|
||
if (pucharrules != ucharrules)
|
||
delete[] pucharrules;
|
||
}
|
||
|
||
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
|
||
ECollationStrength collationStrength,
|
||
UErrorCode& status) : dataIsOwned(FALSE)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return;
|
||
|
||
int32_t length = rules.length();
|
||
|
||
UChar ucharrules[STACK_BUFFER_LENGTH_];
|
||
UChar *pucharrules = ucharrules;
|
||
|
||
if (length >= STACK_BUFFER_LENGTH_)
|
||
pucharrules = new UChar[length + 1];
|
||
|
||
rules.extract(0, length, pucharrules);
|
||
pucharrules[length] = 0;
|
||
|
||
UCollationStrength strength = getUCollationStrength(collationStrength);
|
||
ucollator = ucol_openRules(pucharrules, length, UCOL_DEFAULT_NORMALIZATION,
|
||
strength, &status);
|
||
|
||
if (U_SUCCESS(status))
|
||
{
|
||
const UChar *r = ucol_getRules(ucollator, &length);
|
||
if (length > 0) {
|
||
urulestring = new UnicodeString(r, length);
|
||
}
|
||
else {
|
||
urulestring = new UnicodeString();
|
||
}
|
||
dataIsOwned = TRUE;
|
||
}
|
||
|
||
if (pucharrules != ucharrules)
|
||
delete[] pucharrules;
|
||
}
|
||
|
||
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
|
||
Normalizer::EMode decompositionMode,
|
||
UErrorCode& status) :
|
||
dataIsOwned(FALSE)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return;
|
||
|
||
int32_t length = rules.length();
|
||
|
||
UChar ucharrules[STACK_BUFFER_LENGTH_];
|
||
UChar *pucharrules = ucharrules;
|
||
|
||
if (length >= STACK_BUFFER_LENGTH_)
|
||
pucharrules = new UChar[length + 1];
|
||
|
||
rules.extract(0, length, pucharrules);
|
||
pucharrules[length] = 0;
|
||
|
||
UNormalizationMode mode = Normalizer::getUNormalizationMode(
|
||
decompositionMode, status);
|
||
ucollator = ucol_openRules(pucharrules, length, mode,
|
||
UCOL_DEFAULT_STRENGTH, &status);
|
||
|
||
if (U_SUCCESS(status))
|
||
{
|
||
const UChar *r = ucol_getRules(ucollator, &length);
|
||
if (length > 0) {
|
||
urulestring = new UnicodeString(r, length);
|
||
}
|
||
else {
|
||
urulestring = new UnicodeString();
|
||
}
|
||
dataIsOwned = TRUE;
|
||
}
|
||
|
||
if (pucharrules != ucharrules)
|
||
delete[] pucharrules;
|
||
}
|
||
|
||
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
|
||
ECollationStrength collationStrength,
|
||
Normalizer::EMode decompositionMode,
|
||
UErrorCode& status) : dataIsOwned(FALSE)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return;
|
||
|
||
int32_t length = rules.length();
|
||
|
||
UChar ucharrules[STACK_BUFFER_LENGTH_];
|
||
UChar *pucharrules = ucharrules;
|
||
|
||
if (length >= STACK_BUFFER_LENGTH_)
|
||
pucharrules = new UChar[length + 1];
|
||
|
||
rules.extract(0, length, pucharrules);
|
||
pucharrules[length] = 0;
|
||
|
||
UCollationStrength strength = getUCollationStrength(collationStrength);
|
||
UNormalizationMode mode = Normalizer::getUNormalizationMode(
|
||
decompositionMode, status);
|
||
ucollator = ucol_openRules(pucharrules, length, mode, strength, &status);
|
||
if (U_SUCCESS(status))
|
||
{
|
||
const UChar *r = ucol_getRules(ucollator, &length);
|
||
if (length > 0) {
|
||
urulestring = new UnicodeString(r, length);
|
||
}
|
||
else {
|
||
urulestring = new UnicodeString();
|
||
}
|
||
dataIsOwned = TRUE;
|
||
}
|
||
|
||
if (pucharrules != ucharrules)
|
||
delete[] pucharrules;
|
||
}
|
||
|
||
|
||
|
||
/* RuleBasedCollator public destructor ----------------------------------- */
|
||
|
||
RuleBasedCollator::~RuleBasedCollator()
|
||
{
|
||
if (dataIsOwned)
|
||
{
|
||
ucol_close(ucollator);
|
||
delete urulestring;
|
||
}
|
||
ucollator = NULL;
|
||
}
|
||
|
||
/* RuleBaseCollator public methods --------------------------------------- */
|
||
|
||
UBool RuleBasedCollator::operator==(const Collator& that) const
|
||
{
|
||
/* only checks for address equals here */
|
||
if (Collator::operator==(that))
|
||
return TRUE;
|
||
|
||
if (getDynamicClassID() != that.getDynamicClassID())
|
||
return FALSE; /* not the same class */
|
||
|
||
RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
|
||
|
||
/*
|
||
synwee : orginal code does not check for data compatibility
|
||
*/
|
||
if (ucollator != thatAlias.ucollator)
|
||
return FALSE;
|
||
|
||
return TRUE;
|
||
}
|
||
|
||
RuleBasedCollator& RuleBasedCollator::operator=(
|
||
const RuleBasedCollator& that)
|
||
{
|
||
if (this != &that)
|
||
{
|
||
if (dataIsOwned)
|
||
{
|
||
ucol_close(ucollator);
|
||
ucollator = NULL;
|
||
delete urulestring;
|
||
}
|
||
|
||
dataIsOwned = FALSE;
|
||
ucollator = that.ucollator;
|
||
urulestring = that.urulestring;
|
||
}
|
||
return *this;
|
||
}
|
||
|
||
Collator* RuleBasedCollator::clone() const
|
||
{
|
||
return new RuleBasedCollator(*this);
|
||
}
|
||
|
||
/**
|
||
* Create a CollationElementIterator object that will iterator over the
|
||
* elements in a string, using the collation rules defined in this
|
||
* RuleBasedCollator
|
||
*/
|
||
CollationElementIterator* RuleBasedCollator::createCollationElementIterator
|
||
(const UnicodeString& source) const
|
||
{
|
||
UErrorCode status = U_ZERO_ERROR;
|
||
CollationElementIterator *result = new CollationElementIterator(source, this,
|
||
status);
|
||
|
||
if (U_FAILURE(status))
|
||
return NULL;
|
||
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* Create a CollationElementIterator object that will iterator over the
|
||
* elements in a string, using the collation rules defined in this
|
||
* RuleBasedCollator
|
||
*/
|
||
CollationElementIterator* RuleBasedCollator::createCollationElementIterator
|
||
(const CharacterIterator& source) const
|
||
{
|
||
UErrorCode status = U_ZERO_ERROR;
|
||
CollationElementIterator *result = new CollationElementIterator(source, this,
|
||
status);
|
||
|
||
if (U_FAILURE(status))
|
||
return NULL;
|
||
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* Return a string representation of this collator's rules. The string can
|
||
* later be passed to the constructor that takes a UnicodeString argument,
|
||
* which will construct a collator that's functionally identical to this one.
|
||
* You can also allow users to edit the string in order to change the collation
|
||
* data, or you can print it out for inspection, or whatever.
|
||
*/
|
||
const UnicodeString& RuleBasedCollator::getRules() const
|
||
{
|
||
return (*urulestring);
|
||
}
|
||
|
||
Collator::EComparisonResult RuleBasedCollator::compare(
|
||
const UnicodeString& source,
|
||
const UnicodeString& target,
|
||
int32_t length) const
|
||
{
|
||
UnicodeString source_togo;
|
||
UnicodeString target_togo;
|
||
UTextOffset begin=0;
|
||
|
||
source.extract(begin, uprv_min(length,source.length()), source_togo);
|
||
target.extract(begin, uprv_min(length,target.length()), target_togo);
|
||
return compare(source_togo, target_togo);
|
||
}
|
||
|
||
Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
|
||
int32_t sourceLength,
|
||
const UChar* target,
|
||
int32_t targetLength)
|
||
const
|
||
{
|
||
return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
|
||
target, targetLength));
|
||
}
|
||
|
||
/**
|
||
* Compare two strings using this collator
|
||
*/
|
||
Collator::EComparisonResult RuleBasedCollator::compare(
|
||
const UnicodeString& source,
|
||
const UnicodeString& target) const
|
||
{
|
||
UChar uSstart[STACK_BUFFER_LENGTH_];
|
||
UChar uTstart[STACK_BUFFER_LENGTH_];
|
||
UChar *uSource = uSstart;
|
||
UChar *uTarget = uTstart;
|
||
uint32_t sourceLen = source.length();
|
||
uint32_t targetLen = target.length();
|
||
|
||
if(sourceLen >= STACK_BUFFER_LENGTH_)
|
||
uSource = new UChar[sourceLen+1];
|
||
|
||
if(targetLen >= STACK_BUFFER_LENGTH_)
|
||
uTarget = new UChar[targetLen+1];
|
||
|
||
source.extract(0, sourceLen, uSource);
|
||
uSource[sourceLen] = 0;
|
||
target.extract(0, targetLen, uTarget);
|
||
uTarget[targetLen] = 0;
|
||
EComparisonResult result = compare(uSource, sourceLen, uTarget, targetLen);
|
||
|
||
if(uSstart != uSource)
|
||
delete[] uSource;
|
||
|
||
if(uTstart != uTarget)
|
||
delete[] uTarget;
|
||
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* Retrieve a collation key for the specified string. The key can be compared
|
||
* with other collation keys using a bitwise comparison (e.g. memcmp) to find
|
||
* the ordering of their respective source strings. This is handy when doing a
|
||
* sort, where each sort key must be compared many times.
|
||
*
|
||
* The basic algorithm here is to find all of the collation elements for each
|
||
* character in the source string, convert them to an ASCII representation, and
|
||
* put them into the collation key. But it's trickier than that. Each
|
||
* collation element in a string has three components: primary ('A' vs 'B'),
|
||
* secondary ('u' vs '<27>'), and tertiary ('A' vs 'a'), and a primary difference
|
||
* at the end of a string takes precedence over a secondary or tertiary
|
||
* difference earlier in the string.
|
||
*
|
||
* To account for this, we put all of the primary orders at the beginning of
|
||
* the string, followed by the secondary and tertiary orders. Each set of
|
||
* orders is terminated by nulls so that a key for a string which is a initial
|
||
* substring of another key will compare less without any special case.
|
||
*
|
||
* Here's a hypothetical example, with the collation element represented as a
|
||
* three-digit number, one digit for primary, one for secondary, etc.
|
||
*
|
||
* String: A a B <20>
|
||
* Collation Elements: 101 100 201 511
|
||
* Collation Key: 1125<null>0001<null>1011<null>
|
||
*
|
||
* To make things even trickier, secondary differences (accent marks) are
|
||
* compared starting at the *end* of the string in languages with French
|
||
* secondary ordering. But when comparing the accent marks on a single base
|
||
* character, they are compared from the beginning. To handle this, we reverse
|
||
* all of the accents that belong to each base character, then we reverse the
|
||
* entire string of secondary orderings at the end.
|
||
*/
|
||
CollationKey& RuleBasedCollator::getCollationKey(
|
||
const UnicodeString& source,
|
||
CollationKey& sortkey,
|
||
UErrorCode& status) const
|
||
{
|
||
UChar sStart[STACK_BUFFER_LENGTH_];
|
||
UChar *uSource = sStart;
|
||
uint32_t sourceLen = source.length();
|
||
|
||
if(sourceLen >= STACK_BUFFER_LENGTH_)
|
||
uSource = new UChar[sourceLen+1];
|
||
|
||
source.extract(0, sourceLen, uSource);
|
||
uSource[sourceLen] = 0;
|
||
CollationKey& result = getCollationKey(uSource, sourceLen, sortkey, status);
|
||
if(sStart != uSource)
|
||
delete[] uSource;
|
||
|
||
return result;
|
||
}
|
||
|
||
CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
|
||
int32_t sourceLen,
|
||
CollationKey& sortkey,
|
||
UErrorCode& status) const
|
||
{
|
||
if (U_FAILURE(status))
|
||
{
|
||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||
return sortkey.setToBogus();
|
||
}
|
||
|
||
if ((!source) || (sourceLen == 0))
|
||
return sortkey.reset();
|
||
|
||
/*
|
||
* have to use malloc, lowest denomination, since adopt can be used by
|
||
* a c return value.
|
||
*/
|
||
uint8_t *result = (uint8_t *)uprv_malloc(UCOL_MAX_BUFFER * sizeof(uint8_t));
|
||
int32_t resLen = ucol_getSortKey(ucollator, source, sourceLen, result,
|
||
UCOL_MAX_BUFFER);
|
||
sortkey.adopt(result, resLen);
|
||
|
||
return sortkey;
|
||
}
|
||
|
||
/**
|
||
* Return the maximum length of any expansion sequences that end with the
|
||
* specified comparison order.
|
||
* @param order a collation order returned by previous or next.
|
||
* @return the maximum length of any expansion seuences ending with the
|
||
* specified order or 1 if collation order does not occur at the end of any
|
||
* expansion sequence.
|
||
* @see CollationElementIterator#getMaxExpansion
|
||
*/
|
||
int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
|
||
{
|
||
uint8_t result;
|
||
UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
|
||
return result;
|
||
}
|
||
|
||
uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
|
||
UErrorCode &status)
|
||
{
|
||
return ucol_cloneRuleData(ucollator, &length, &status);
|
||
}
|
||
|
||
void RuleBasedCollator::setAttribute(UColAttribute attr,
|
||
UColAttributeValue value,
|
||
UErrorCode &status)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return;
|
||
ucol_setAttribute(ucollator, attr, value, &status);
|
||
}
|
||
|
||
UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
|
||
UErrorCode &status)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return UCOL_DEFAULT;
|
||
return ucol_getAttribute(ucollator, attr, &status);
|
||
}
|
||
|
||
Collator* RuleBasedCollator::safeClone(void)
|
||
{
|
||
UErrorCode intStatus = U_ZERO_ERROR;
|
||
int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
|
||
UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
|
||
&intStatus);
|
||
if (U_FAILURE(intStatus))
|
||
return NULL;
|
||
int32_t length = 0;
|
||
UnicodeString *r = new UnicodeString(ucol_getRules(ucollator, &length),
|
||
length);
|
||
RuleBasedCollator *result = new RuleBasedCollator(ucol, r);
|
||
result->dataIsOwned = TRUE;
|
||
return result;
|
||
}
|
||
|
||
Collator::EComparisonResult RuleBasedCollator::compare(
|
||
ForwardCharacterIterator &source,
|
||
ForwardCharacterIterator &target)
|
||
{
|
||
return getEComparisonResult(
|
||
ucol_strcollinc(ucollator, forwardCharIteratorGlue, &source,
|
||
forwardCharIteratorGlue, &target));
|
||
}
|
||
|
||
int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
|
||
uint8_t *result, int32_t resultLength)
|
||
const
|
||
{
|
||
UChar sStart[STACK_BUFFER_LENGTH_];
|
||
UChar *uSource = sStart;
|
||
uint32_t sourceLen = source.length();
|
||
if(sourceLen >= STACK_BUFFER_LENGTH_)
|
||
uSource = new UChar[sourceLen+1];
|
||
|
||
source.extract(0, sourceLen, uSource);
|
||
uSource[sourceLen] = 0;
|
||
|
||
int32_t resLen = ucol_getSortKey(ucollator, uSource, sourceLen, result,
|
||
resultLength);
|
||
if(sStart != uSource)
|
||
delete[] uSource;
|
||
|
||
return resLen;
|
||
}
|
||
|
||
int32_t RuleBasedCollator::getSortKey(const UChar *source,
|
||
int32_t sourceLength, uint8_t *result,
|
||
int32_t resultLength) const
|
||
{
|
||
return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
|
||
}
|
||
|
||
Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
|
||
{
|
||
UErrorCode intStatus = U_ZERO_ERROR;
|
||
return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
|
||
&intStatus));
|
||
}
|
||
|
||
void RuleBasedCollator::setStrength(ECollationStrength newStrength)
|
||
{
|
||
UErrorCode intStatus = U_ZERO_ERROR;
|
||
UCollationStrength strength = getUCollationStrength(newStrength);
|
||
ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
|
||
}
|
||
|
||
/**
|
||
* Create a hash code for this collation. Just hash the main rule table -- that
|
||
* should be good enough for almost any use.
|
||
*/
|
||
int32_t RuleBasedCollator::hashCode() const
|
||
{
|
||
int32_t length;
|
||
const UChar *rules = ucol_getRules(ucollator, &length);
|
||
return uhash_hashUCharsN(rules, length);
|
||
}
|
||
|
||
/**
|
||
* Set the decomposition mode of the Collator object. success is equal to
|
||
* U_ILLEGAL_ARGUMENT_ERROR if error occurs.
|
||
* @param the new decomposition mode
|
||
* @see Collator#getDecomposition
|
||
*/
|
||
void RuleBasedCollator::setDecomposition(Normalizer::EMode mode)
|
||
{
|
||
UErrorCode status = U_ZERO_ERROR;
|
||
ucol_setNormalization(ucollator, Normalizer::getUNormalizationMode(mode,
|
||
status));
|
||
}
|
||
|
||
/**
|
||
* Get the decomposition mode of the Collator object.
|
||
* @return the decomposition mode
|
||
* @see Collator#setDecomposition
|
||
*/
|
||
Normalizer::EMode RuleBasedCollator::getDecomposition(void) const
|
||
{
|
||
UErrorCode status = U_ZERO_ERROR;
|
||
return Normalizer::getNormalizerEMode(ucol_getNormalization(ucollator),
|
||
status);
|
||
}
|
||
|
||
// RuleBaseCollatorNew private constructor ----------------------------------
|
||
|
||
RuleBasedCollator::RuleBasedCollator() : dataIsOwned(FALSE), ucollator(0)
|
||
{
|
||
}
|
||
|
||
RuleBasedCollator::RuleBasedCollator(UCollator *collator,
|
||
UnicodeString *rule) : dataIsOwned(FALSE)
|
||
{
|
||
ucollator = collator;
|
||
urulestring = rule;
|
||
}
|
||
|
||
RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
|
||
UErrorCode& status) :
|
||
dataIsOwned(FALSE), ucollator(0)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return;
|
||
|
||
/*
|
||
Try to load, in order:
|
||
1. The desired locale's collation.
|
||
2. A fallback of the desired locale.
|
||
3. The default locale's collation.
|
||
4. A fallback of the default locale.
|
||
5. The default collation rules, which contains en_US collation rules.
|
||
|
||
To reiterate, we try:
|
||
Specific:
|
||
language+country+variant
|
||
language+country
|
||
language
|
||
Default:
|
||
language+country+variant
|
||
language+country
|
||
language
|
||
Root: (aka DEFAULTRULES)
|
||
steps 1-5 are handled by resource bundle fallback mechanism.
|
||
however, in a very unprobable situation that no resource bundle
|
||
data exists, step 5 is repeated with hardcoded default rules.
|
||
*/
|
||
|
||
setUCollator(desiredLocale, status);
|
||
|
||
if (U_FAILURE(status))
|
||
{
|
||
status = U_ZERO_ERROR;
|
||
|
||
setUCollator(ResourceBundle::kDefaultFilename, status);
|
||
if (U_FAILURE(status))
|
||
{
|
||
status = U_ZERO_ERROR;
|
||
|
||
if (status == U_ZERO_ERROR)
|
||
status = U_USING_DEFAULT_ERROR;
|
||
|
||
if (status == U_MEMORY_ALLOCATION_ERROR)
|
||
return;
|
||
}
|
||
}
|
||
|
||
if (U_SUCCESS(status))
|
||
{
|
||
int32_t length;
|
||
const UChar *r = ucol_getRules(ucollator, &length);
|
||
if (length > 0) {
|
||
urulestring = new UnicodeString(r, length);
|
||
}
|
||
else {
|
||
urulestring = new UnicodeString();
|
||
}
|
||
dataIsOwned = TRUE;
|
||
}
|
||
|
||
return;
|
||
}
|
||
|
||
/* RuleBasedCollator private data members -------------------------------- */
|
||
|
||
/* need look up in .commit() */
|
||
const int32_t RuleBasedCollator::CHARINDEX = 0x70000000;
|
||
/* Expand index follows */
|
||
const int32_t RuleBasedCollator::EXPANDCHARINDEX = 0x7E000000;
|
||
/* contract indexes follows */
|
||
const int32_t RuleBasedCollator::CONTRACTCHARINDEX = 0x7F000000;
|
||
/* unmapped character values */
|
||
const int32_t RuleBasedCollator::UNMAPPED = 0xFFFFFFFF;
|
||
/* primary strength increment */
|
||
const int32_t RuleBasedCollator::PRIMARYORDERINCREMENT = 0x00010000;
|
||
/* secondary strength increment */
|
||
const int32_t RuleBasedCollator::SECONDARYORDERINCREMENT = 0x00000100;
|
||
/* tertiary strength increment */
|
||
const int32_t RuleBasedCollator::TERTIARYORDERINCREMENT = 0x00000001;
|
||
/* mask off anything but primary order */
|
||
const int32_t RuleBasedCollator::PRIMARYORDERMASK = 0xffff0000;
|
||
/* mask off anything but secondary order */
|
||
const int32_t RuleBasedCollator::SECONDARYORDERMASK = 0x0000ff00;
|
||
/* mask off anything but tertiary order */
|
||
const int32_t RuleBasedCollator::TERTIARYORDERMASK = 0x000000ff;
|
||
/* mask off ignorable char order */
|
||
const int32_t RuleBasedCollator::IGNORABLEMASK = 0x0000ffff;
|
||
/* use only the primary difference */
|
||
const int32_t RuleBasedCollator::PRIMARYDIFFERENCEONLY = 0xffff0000;
|
||
/* use only the primary and secondary difference */
|
||
const int32_t RuleBasedCollator::SECONDARYDIFFERENCEONLY = 0xffffff00;
|
||
/* primary order shift */
|
||
const int32_t RuleBasedCollator::PRIMARYORDERSHIFT = 16;
|
||
/* secondary order shift */
|
||
const int32_t RuleBasedCollator::SECONDARYORDERSHIFT = 8;
|
||
/* starting value for collation elements */
|
||
const int32_t RuleBasedCollator::COLELEMENTSTART = 0x02020202;
|
||
/* testing mask for primary low element */
|
||
const int32_t RuleBasedCollator::PRIMARYLOWZEROMASK = 0x00FF0000;
|
||
/* reseting value for secondaries and tertiaries */
|
||
const int32_t RuleBasedCollator::RESETSECONDARYTERTIARY = 0x00000202;
|
||
/* reseting value for tertiaries */
|
||
const int32_t RuleBasedCollator::RESETTERTIARY = 0x00000002;
|
||
|
||
const int32_t RuleBasedCollator::PRIMIGNORABLE = 0x0202;
|
||
|
||
/* unique file id for parity check */
|
||
const int16_t RuleBasedCollator::FILEID = 0x5443;
|
||
/* binary collation file extension */
|
||
const char* RuleBasedCollator::kFilenameSuffix = ".col";
|
||
/* class id ? Value is irrelevant */
|
||
char RuleBasedCollator::fgClassID = 0;
|
||
|
||
/* other methods not belonging to any classes ------------------------------- */
|
||
|
||
U_CAPI UChar forwardCharIteratorGlue(void *iterator)
|
||
{
|
||
ForwardCharacterIterator *iter = ((ForwardCharacterIterator *)iterator);
|
||
UChar result = iter->nextPostInc();
|
||
if (result == ForwardCharacterIterator::DONE)
|
||
return 0xFFFF;
|
||
else
|
||
return result;
|
||
}
|