1999-08-16 21:50:52 +00:00
|
|
|
/*
|
2007-07-27 03:12:12 +00:00
|
|
|
******************************************************************************
|
2013-09-13 19:25:01 +00:00
|
|
|
* Copyright (C) 1996-2013, International Business Machines Corporation and
|
2010-05-19 17:29:33 +00:00
|
|
|
* others. All Rights Reserved.
|
2007-07-27 03:12:12 +00:00
|
|
|
******************************************************************************
|
|
|
|
*/
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
/**
|
2007-07-27 03:12:12 +00:00
|
|
|
* File tblcoll.cpp
|
|
|
|
*
|
|
|
|
* Created by: Helena Shih
|
|
|
|
*
|
|
|
|
* Modification History:
|
|
|
|
*
|
|
|
|
* Date Name Description
|
|
|
|
* 2/5/97 aliu Added streamIn and streamOut methods. Added
|
|
|
|
* constructor which reads RuleBasedCollator object from
|
|
|
|
* a binary file. Added writeToFile method which streams
|
|
|
|
* RuleBasedCollator out to a binary file. The streamIn
|
|
|
|
* and streamOut methods use istream and ostream objects
|
|
|
|
* in binary mode.
|
|
|
|
* 2/11/97 aliu Moved declarations out of for loop initializer.
|
|
|
|
* Added Mac compatibility #ifdef for ios::nocreate.
|
|
|
|
* 2/12/97 aliu Modified to use TableCollationData sub-object to
|
|
|
|
* hold invariant data.
|
|
|
|
* 2/13/97 aliu Moved several methods into this class from Collation.
|
|
|
|
* Added a private RuleBasedCollator(Locale&) constructor,
|
|
|
|
* to be used by Collator::getInstance(). General
|
|
|
|
* clean up. Made use of UErrorCode variables consistent.
|
|
|
|
* 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
|
|
|
|
* constructor and getDynamicClassID.
|
|
|
|
* 3/5/97 aliu Changed compaction cycle to improve performance. We
|
|
|
|
* use the maximum allowable value which is kBlockCount.
|
|
|
|
* Modified getRules() to load rules dynamically. Changed
|
|
|
|
* constructFromFile() call to accomodate this (added
|
|
|
|
* parameter to specify whether binary loading is to
|
|
|
|
* take place).
|
|
|
|
* 05/06/97 helena Added memory allocation error check.
|
|
|
|
* 6/20/97 helena Java class name change.
|
|
|
|
* 6/23/97 helena Adding comments to make code more readable.
|
|
|
|
* 09/03/97 helena Added createCollationKeyValues().
|
|
|
|
* 06/26/98 erm Changes for CollationKeys using byte arrays.
|
|
|
|
* 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java
|
|
|
|
* 04/23/99 stephen Removed EDecompositionMode, merged with
|
|
|
|
* Normalizer::EMode
|
|
|
|
* 06/14/99 stephen Removed kResourceBundleSuffix
|
|
|
|
* 06/22/99 stephen Fixed logic in constructFromFile() since .ctx
|
|
|
|
* files are no longer used.
|
|
|
|
* 11/02/99 helena Collator performance enhancements. Special case
|
|
|
|
* for NO_OP situations.
|
|
|
|
* 11/17/99 srl More performance enhancements. Inlined some internal functions.
|
|
|
|
* 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
|
|
|
|
* to implementation file.
|
|
|
|
* 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
|
|
|
|
*/
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2002-09-20 01:54:48 +00:00
|
|
|
#include "unicode/utypes.h"
|
|
|
|
|
|
|
|
#if !UCONFIG_NO_COLLATION
|
|
|
|
|
2000-12-14 01:11:11 +00:00
|
|
|
#include "unicode/tblcoll.h"
|
1999-12-28 23:57:50 +00:00
|
|
|
#include "unicode/coleitr.h"
|
2004-09-03 15:24:16 +00:00
|
|
|
#include "unicode/ures.h"
|
2002-09-04 06:08:04 +00:00
|
|
|
#include "unicode/uset.h"
|
2002-07-12 21:42:24 +00:00
|
|
|
#include "ucol_imp.h"
|
|
|
|
#include "uresimp.h"
|
|
|
|
#include "uhash.h"
|
2001-03-15 22:56:02 +00:00
|
|
|
#include "cmemory.h"
|
2003-04-28 21:13:14 +00:00
|
|
|
#include "cstring.h"
|
2004-10-18 02:43:33 +00:00
|
|
|
#include "putilimp.h"
|
2011-06-03 05:23:57 +00:00
|
|
|
#include "ustr_imp.h"
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2001-02-15 19:30:25 +00:00
|
|
|
/* public RuleBasedCollator constructor ---------------------------------- */
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
/**
|
2003-05-02 22:17:14 +00:00
|
|
|
* Copy constructor, aliasing, not write-through
|
1999-12-16 01:41:19 +00:00
|
|
|
*/
|
2003-08-29 18:29:26 +00:00
|
|
|
RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
|
|
|
|
: Collator(that)
|
|
|
|
, dataIsOwned(FALSE)
|
|
|
|
, isWriteThroughAlias(FALSE)
|
2006-04-02 19:07:50 +00:00
|
|
|
, ucollator(NULL)
|
1999-12-16 01:41:19 +00:00
|
|
|
{
|
2006-04-02 19:07:50 +00:00
|
|
|
RuleBasedCollator::operator=(that);
|
1999-12-16 01:41:19 +00:00
|
|
|
}
|
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
|
2003-08-29 18:29:26 +00:00
|
|
|
UErrorCode& status) :
|
|
|
|
dataIsOwned(FALSE)
|
1999-12-16 01:41:19 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
construct(rules,
|
|
|
|
UCOL_DEFAULT_STRENGTH,
|
|
|
|
UCOL_DEFAULT,
|
|
|
|
status);
|
1999-12-16 01:41:19 +00:00
|
|
|
}
|
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
|
2003-08-29 18:29:26 +00:00
|
|
|
ECollationStrength collationStrength,
|
|
|
|
UErrorCode& status) : dataIsOwned(FALSE)
|
1999-12-16 01:41:19 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
construct(rules,
|
2012-08-03 21:49:28 +00:00
|
|
|
(UColAttributeValue)collationStrength,
|
2003-08-29 18:29:26 +00:00
|
|
|
UCOL_DEFAULT,
|
|
|
|
status);
|
1999-12-16 01:41:19 +00:00
|
|
|
}
|
|
|
|
|
2002-08-21 19:12:24 +00:00
|
|
|
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
|
|
|
|
UColAttributeValue decompositionMode,
|
|
|
|
UErrorCode& status) :
|
2003-08-29 18:29:26 +00:00
|
|
|
dataIsOwned(FALSE)
|
2002-08-21 19:12:24 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
construct(rules,
|
|
|
|
UCOL_DEFAULT_STRENGTH,
|
|
|
|
decompositionMode,
|
|
|
|
status);
|
2002-08-21 19:12:24 +00:00
|
|
|
}
|
|
|
|
|
2001-09-22 01:10:41 +00:00
|
|
|
RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
|
2003-08-29 18:29:26 +00:00
|
|
|
ECollationStrength collationStrength,
|
|
|
|
UColAttributeValue decompositionMode,
|
|
|
|
UErrorCode& status) : dataIsOwned(FALSE)
|
2001-09-22 01:10:41 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
construct(rules,
|
2012-08-03 21:49:28 +00:00
|
|
|
(UColAttributeValue)collationStrength,
|
2003-08-29 18:29:26 +00:00
|
|
|
decompositionMode,
|
|
|
|
status);
|
2001-09-22 01:10:41 +00:00
|
|
|
}
|
2009-04-22 23:10:07 +00:00
|
|
|
RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
|
|
|
|
const RuleBasedCollator *base,
|
2006-04-15 08:07:30 +00:00
|
|
|
UErrorCode &status) :
|
|
|
|
dataIsOwned(TRUE),
|
|
|
|
isWriteThroughAlias(FALSE)
|
2005-06-16 20:08:56 +00:00
|
|
|
{
|
|
|
|
ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
|
|
|
|
}
|
2001-01-18 00:23:29 +00:00
|
|
|
|
2001-09-22 01:10:41 +00:00
|
|
|
void
|
2006-04-02 19:07:50 +00:00
|
|
|
RuleBasedCollator::setRuleStringFromCollator()
|
2001-09-22 01:10:41 +00:00
|
|
|
{
|
2006-04-02 19:07:50 +00:00
|
|
|
int32_t length;
|
|
|
|
const UChar *r = ucol_getRules(ucollator, &length);
|
2003-08-29 18:29:26 +00:00
|
|
|
|
2006-04-02 19:07:50 +00:00
|
|
|
if (r && length > 0) {
|
|
|
|
// alias the rules string
|
|
|
|
urulestring.setTo(TRUE, r, length);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
urulestring.truncate(0); // Clear string.
|
2002-06-29 09:31:05 +00:00
|
|
|
}
|
2000-01-13 21:36:39 +00:00
|
|
|
}
|
|
|
|
|
2003-05-02 22:17:14 +00:00
|
|
|
// not aliasing, not write-through
|
|
|
|
void
|
|
|
|
RuleBasedCollator::construct(const UnicodeString& rules,
|
|
|
|
UColAttributeValue collationStrength,
|
|
|
|
UColAttributeValue decompositionMode,
|
|
|
|
UErrorCode& status)
|
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
|
|
|
|
decompositionMode, collationStrength,
|
|
|
|
NULL, &status);
|
2003-05-02 22:17:14 +00:00
|
|
|
|
2003-08-29 18:29:26 +00:00
|
|
|
dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
|
|
|
|
isWriteThroughAlias = FALSE;
|
2003-05-02 22:17:14 +00:00
|
|
|
|
2006-07-21 22:01:55 +00:00
|
|
|
if(ucollator == NULL) {
|
|
|
|
if(U_SUCCESS(status)) {
|
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
}
|
|
|
|
return; // Failure
|
|
|
|
}
|
|
|
|
|
2006-04-02 19:07:50 +00:00
|
|
|
setRuleStringFromCollator();
|
2003-05-02 22:17:14 +00:00
|
|
|
}
|
|
|
|
|
2001-02-15 19:30:25 +00:00
|
|
|
/* RuleBasedCollator public destructor ----------------------------------- */
|
2000-01-13 21:36:39 +00:00
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
RuleBasedCollator::~RuleBasedCollator()
|
1999-11-23 22:49:29 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
if (dataIsOwned)
|
|
|
|
{
|
|
|
|
ucol_close(ucollator);
|
|
|
|
}
|
|
|
|
ucollator = 0;
|
1999-11-23 22:49:29 +00:00
|
|
|
}
|
|
|
|
|
2001-02-15 19:30:25 +00:00
|
|
|
/* RuleBaseCollator public methods --------------------------------------- */
|
1999-11-23 22:49:29 +00:00
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
UBool RuleBasedCollator::operator==(const Collator& that) const
|
1999-11-23 22:49:29 +00:00
|
|
|
{
|
2001-02-15 19:30:25 +00:00
|
|
|
/* only checks for address equals here */
|
2012-08-03 21:49:28 +00:00
|
|
|
if (this == &that) {
|
2001-01-18 00:23:29 +00:00
|
|
|
return TRUE;
|
2012-08-03 21:49:28 +00:00
|
|
|
}
|
|
|
|
if (!Collator::operator==(that)) {
|
2001-02-15 19:30:25 +00:00
|
|
|
return FALSE; /* not the same class */
|
2012-08-03 21:49:28 +00:00
|
|
|
}
|
2001-03-22 00:09:10 +00:00
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
|
1999-11-23 22:49:29 +00:00
|
|
|
|
2003-05-01 00:57:27 +00:00
|
|
|
return ucol_equals(this->ucollator, thatAlias.ucollator);
|
2003-08-29 18:29:26 +00:00
|
|
|
}
|
|
|
|
|
2003-05-02 22:17:14 +00:00
|
|
|
// aliasing, not write-through
|
2001-09-27 00:59:41 +00:00
|
|
|
RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
|
2001-01-18 00:23:29 +00:00
|
|
|
{
|
2012-08-08 21:03:52 +00:00
|
|
|
if (this == &that) { return *this; }
|
2001-03-22 00:09:10 +00:00
|
|
|
|
2012-08-08 21:03:52 +00:00
|
|
|
UErrorCode intStatus = U_ZERO_ERROR;
|
2013-09-17 19:48:50 +00:00
|
|
|
UCollator *ucol = ucol_safeClone(that.ucollator, NULL, NULL, &intStatus);
|
2012-08-08 21:03:52 +00:00
|
|
|
if (U_FAILURE(intStatus)) { return *this; }
|
2006-04-02 19:07:50 +00:00
|
|
|
|
2012-08-08 21:03:52 +00:00
|
|
|
if (dataIsOwned) {
|
|
|
|
ucol_close(ucollator);
|
2003-08-29 18:29:26 +00:00
|
|
|
}
|
2012-08-08 21:03:52 +00:00
|
|
|
ucollator = ucol;
|
|
|
|
dataIsOwned = TRUE;
|
|
|
|
isWriteThroughAlias = FALSE;
|
|
|
|
setRuleStringFromCollator();
|
2003-08-29 18:29:26 +00:00
|
|
|
return *this;
|
1999-11-23 22:49:29 +00:00
|
|
|
}
|
|
|
|
|
2003-05-02 22:17:14 +00:00
|
|
|
// aliasing, not write-through
|
2001-01-18 00:23:29 +00:00
|
|
|
Collator* RuleBasedCollator::clone() const
|
|
|
|
{
|
2012-08-08 21:03:52 +00:00
|
|
|
RuleBasedCollator* coll = new RuleBasedCollator(*this);
|
|
|
|
// There is a small chance that the internal ucol_safeClone() call fails.
|
|
|
|
if (coll != NULL && coll->ucollator == NULL) {
|
|
|
|
delete coll;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return coll;
|
2001-01-18 00:23:29 +00:00
|
|
|
}
|
1999-11-23 22:49:29 +00:00
|
|
|
|
2011-04-12 18:23:27 +00:00
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
CollationElementIterator* RuleBasedCollator::createCollationElementIterator
|
|
|
|
(const UnicodeString& source) const
|
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
CollationElementIterator *result = new CollationElementIterator(source, this,
|
|
|
|
status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
delete result;
|
|
|
|
return NULL;
|
|
|
|
}
|
2001-03-22 00:09:10 +00:00
|
|
|
|
2003-08-29 18:29:26 +00:00
|
|
|
return result;
|
2000-12-07 07:22:55 +00:00
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2001-03-22 00:09:10 +00:00
|
|
|
/**
|
2003-05-02 22:17:14 +00:00
|
|
|
* Create a CollationElementIterator object that will iterate over the
|
2001-03-22 00:09:10 +00:00
|
|
|
* elements in a string, using the collation rules defined in this
|
2001-01-18 00:23:29 +00:00
|
|
|
* RuleBasedCollator
|
|
|
|
*/
|
|
|
|
CollationElementIterator* RuleBasedCollator::createCollationElementIterator
|
|
|
|
(const CharacterIterator& source) const
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
CollationElementIterator *result = new CollationElementIterator(source, this,
|
|
|
|
status);
|
2001-03-22 00:09:10 +00:00
|
|
|
|
2003-08-29 18:29:26 +00:00
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
delete result;
|
|
|
|
return NULL;
|
|
|
|
}
|
2001-03-22 00:09:10 +00:00
|
|
|
|
2003-08-29 18:29:26 +00:00
|
|
|
return result;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2001-03-22 00:09:10 +00:00
|
|
|
/**
|
|
|
|
* Return a string representation of this collator's rules. The string can
|
|
|
|
* later be passed to the constructor that takes a UnicodeString argument,
|
|
|
|
* which will construct a collator that's functionally identical to this one.
|
|
|
|
* You can also allow users to edit the string in order to change the collation
|
2001-01-18 00:23:29 +00:00
|
|
|
* data, or you can print it out for inspection, or whatever.
|
|
|
|
*/
|
|
|
|
const UnicodeString& RuleBasedCollator::getRules() const
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2006-04-02 19:07:50 +00:00
|
|
|
return urulestring;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2002-02-13 02:57:21 +00:00
|
|
|
void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
|
|
|
|
{
|
2002-03-26 05:28:18 +00:00
|
|
|
int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1);
|
2002-02-28 01:42:40 +00:00
|
|
|
|
2002-03-26 05:28:18 +00:00
|
|
|
if (rulesize > 0) {
|
|
|
|
UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
|
2002-07-20 04:44:57 +00:00
|
|
|
if(rules != NULL) {
|
2003-08-29 18:29:26 +00:00
|
|
|
ucol_getRulesEx(ucollator, delta, rules, rulesize);
|
|
|
|
buffer.setTo(rules, rulesize);
|
|
|
|
uprv_free(rules);
|
2009-04-22 23:10:07 +00:00
|
|
|
} else { // couldn't allocate
|
2003-08-29 18:29:26 +00:00
|
|
|
buffer.remove();
|
2002-07-20 04:44:57 +00:00
|
|
|
}
|
2002-03-26 05:28:18 +00:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
buffer.remove();
|
|
|
|
}
|
2002-02-13 02:57:21 +00:00
|
|
|
}
|
|
|
|
|
2002-09-17 06:27:51 +00:00
|
|
|
UnicodeSet *
|
2002-09-04 06:08:04 +00:00
|
|
|
RuleBasedCollator::getTailoredSet(UErrorCode &status) const
|
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
if(U_FAILURE(status)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
|
2002-09-04 06:08:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2002-02-28 20:01:48 +00:00
|
|
|
void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const
|
|
|
|
{
|
|
|
|
if (versionInfo!=NULL){
|
|
|
|
ucol_getVersion(ucollator, versionInfo);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-03 21:49:28 +00:00
|
|
|
/**
|
|
|
|
* Compare two strings using this collator
|
|
|
|
*/
|
2003-05-01 17:43:01 +00:00
|
|
|
UCollationResult RuleBasedCollator::compare(
|
|
|
|
const UnicodeString& source,
|
|
|
|
const UnicodeString& target,
|
2003-08-29 18:29:26 +00:00
|
|
|
int32_t length,
|
2003-05-01 17:43:01 +00:00
|
|
|
UErrorCode &status) const
|
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
|
2001-01-18 00:23:29 +00:00
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2003-05-01 17:43:01 +00:00
|
|
|
UCollationResult RuleBasedCollator::compare(const UChar* source,
|
|
|
|
int32_t sourceLength,
|
|
|
|
const UChar* target,
|
2003-08-29 18:29:26 +00:00
|
|
|
int32_t targetLength,
|
2003-05-01 17:43:01 +00:00
|
|
|
UErrorCode &status) const
|
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
if(U_SUCCESS(status)) {
|
|
|
|
return ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
|
|
|
|
} else {
|
|
|
|
return UCOL_EQUAL;
|
|
|
|
}
|
2003-05-01 17:43:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
UCollationResult RuleBasedCollator::compare(
|
|
|
|
const UnicodeString& source,
|
2003-08-29 18:29:26 +00:00
|
|
|
const UnicodeString& target,
|
2003-05-01 17:43:01 +00:00
|
|
|
UErrorCode &status) const
|
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
if(U_SUCCESS(status)) {
|
|
|
|
return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
|
|
|
|
target.getBuffer(), target.length());
|
|
|
|
} else {
|
|
|
|
return UCOL_EQUAL;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2009-03-13 23:38:43 +00:00
|
|
|
UCollationResult RuleBasedCollator::compare(UCharIterator &sIter,
|
|
|
|
UCharIterator &tIter,
|
|
|
|
UErrorCode &status) const {
|
|
|
|
if(U_SUCCESS(status)) {
|
|
|
|
return ucol_strcollIter(ucollator, &sIter, &tIter, &status);
|
|
|
|
} else {
|
|
|
|
return UCOL_EQUAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
/**
|
2001-03-22 00:09:10 +00:00
|
|
|
* Retrieve a collation key for the specified string. The key can be compared
|
|
|
|
* with other collation keys using a bitwise comparison (e.g. memcmp) to find
|
|
|
|
* the ordering of their respective source strings. This is handy when doing a
|
2001-01-18 00:23:29 +00:00
|
|
|
* sort, where each sort key must be compared many times.
|
|
|
|
*
|
|
|
|
* The basic algorithm here is to find all of the collation elements for each
|
2001-03-22 00:09:10 +00:00
|
|
|
* character in the source string, convert them to an ASCII representation, and
|
|
|
|
* put them into the collation key. But it's trickier than that. Each
|
|
|
|
* collation element in a string has three components: primary ('A' vs 'B'),
|
2004-11-15 23:03:53 +00:00
|
|
|
* secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference
|
2001-03-22 00:09:10 +00:00
|
|
|
* at the end of a string takes precedence over a secondary or tertiary
|
2001-01-18 00:23:29 +00:00
|
|
|
* difference earlier in the string.
|
|
|
|
*
|
2001-03-22 00:09:10 +00:00
|
|
|
* To account for this, we put all of the primary orders at the beginning of
|
|
|
|
* the string, followed by the secondary and tertiary orders. Each set of
|
|
|
|
* orders is terminated by nulls so that a key for a string which is a initial
|
2001-01-18 00:23:29 +00:00
|
|
|
* substring of another key will compare less without any special case.
|
|
|
|
*
|
2001-03-22 00:09:10 +00:00
|
|
|
* Here's a hypothetical example, with the collation element represented as a
|
2001-01-18 00:23:29 +00:00
|
|
|
* three-digit number, one digit for primary, one for secondary, etc.
|
|
|
|
*
|
2004-11-15 23:03:53 +00:00
|
|
|
* String: A a B \u00C9
|
2001-01-18 00:23:29 +00:00
|
|
|
* Collation Elements: 101 100 201 511
|
|
|
|
* Collation Key: 1125<null>0001<null>1011<null>
|
|
|
|
*
|
2001-03-22 00:09:10 +00:00
|
|
|
* To make things even trickier, secondary differences (accent marks) are
|
|
|
|
* compared starting at the *end* of the string in languages with French
|
|
|
|
* secondary ordering. But when comparing the accent marks on a single base
|
|
|
|
* character, they are compared from the beginning. To handle this, we reverse
|
|
|
|
* all of the accents that belong to each base character, then we reverse the
|
2001-01-18 00:23:29 +00:00
|
|
|
* entire string of secondary orderings at the end.
|
|
|
|
*/
|
|
|
|
CollationKey& RuleBasedCollator::getCollationKey(
|
|
|
|
const UnicodeString& source,
|
|
|
|
CollationKey& sortkey,
|
|
|
|
UErrorCode& status) const
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
|
2001-01-18 00:23:29 +00:00
|
|
|
}
|
2000-12-14 01:11:11 +00:00
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
|
|
|
|
int32_t sourceLen,
|
|
|
|
CollationKey& sortkey,
|
|
|
|
UErrorCode& status) const
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2011-05-03 00:29:45 +00:00
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
return sortkey.setToBogus();
|
|
|
|
}
|
|
|
|
if (sourceLen < -1 || (source == NULL && sourceLen != 0)) {
|
|
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
2003-08-29 18:29:26 +00:00
|
|
|
return sortkey.setToBogus();
|
|
|
|
}
|
2001-03-22 00:09:10 +00:00
|
|
|
|
2011-05-03 00:29:45 +00:00
|
|
|
if (sourceLen < 0) {
|
|
|
|
sourceLen = u_strlen(source);
|
|
|
|
}
|
|
|
|
if (sourceLen == 0) {
|
2003-08-29 18:29:26 +00:00
|
|
|
return sortkey.reset();
|
|
|
|
}
|
2001-03-15 22:56:02 +00:00
|
|
|
|
2012-09-19 23:41:47 +00:00
|
|
|
int32_t resultLen = ucol_getCollationKey(ucollator, source, sourceLen, sortkey, status);
|
2011-05-03 00:29:45 +00:00
|
|
|
|
|
|
|
if (U_SUCCESS(status)) {
|
2012-09-19 23:41:47 +00:00
|
|
|
sortkey.setLength(resultLen);
|
2011-05-03 00:29:45 +00:00
|
|
|
} else {
|
|
|
|
sortkey.setToBogus();
|
|
|
|
}
|
2003-08-29 18:29:26 +00:00
|
|
|
return sortkey;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
/**
|
2001-03-22 00:09:10 +00:00
|
|
|
* Return the maximum length of any expansion sequences that end with the
|
2001-01-18 00:23:29 +00:00
|
|
|
* specified comparison order.
|
|
|
|
* @param order a collation order returned by previous or next.
|
2001-03-22 00:09:10 +00:00
|
|
|
* @return the maximum length of any expansion seuences ending with the
|
|
|
|
* specified order or 1 if collation order does not occur at the end of any
|
2001-03-03 04:06:43 +00:00
|
|
|
* expansion sequence.
|
2001-01-18 00:23:29 +00:00
|
|
|
* @see CollationElementIterator#getMaxExpansion
|
|
|
|
*/
|
|
|
|
int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
uint8_t result;
|
|
|
|
UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
|
|
|
|
return result;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2001-03-22 00:09:10 +00:00
|
|
|
uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
|
2001-01-18 00:23:29 +00:00
|
|
|
UErrorCode &status)
|
|
|
|
{
|
2013-09-13 19:25:01 +00:00
|
|
|
if (U_FAILURE(status)) { return NULL; }
|
|
|
|
LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000));
|
|
|
|
if (buffer.isNull()) {
|
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
length = cloneBinary(buffer.getAlias(), 20000, status);
|
|
|
|
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
|
|
|
if (buffer.allocateInsteadAndCopy(length, 0) == NULL) {
|
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
status = U_ZERO_ERROR;
|
|
|
|
length = cloneBinary(buffer.getAlias(), length, status);
|
|
|
|
}
|
|
|
|
if (U_FAILURE(status)) { return NULL; }
|
|
|
|
return buffer.orphan();
|
2001-01-18 00:23:29 +00:00
|
|
|
}
|
2000-12-14 01:11:11 +00:00
|
|
|
|
2005-06-16 20:08:56 +00:00
|
|
|
|
|
|
|
int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
|
|
|
|
{
|
|
|
|
return ucol_cloneBinary(ucollator, buffer, capacity, &status);
|
|
|
|
}
|
|
|
|
|
2001-03-22 00:09:10 +00:00
|
|
|
void RuleBasedCollator::setAttribute(UColAttribute attr,
|
|
|
|
UColAttributeValue value,
|
|
|
|
UErrorCode &status)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
if (U_FAILURE(status))
|
|
|
|
return;
|
|
|
|
checkOwned();
|
|
|
|
ucol_setAttribute(ucollator, attr, value, &status);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2001-03-22 00:09:10 +00:00
|
|
|
UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
|
2012-08-03 21:49:28 +00:00
|
|
|
UErrorCode &status) const
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
if (U_FAILURE(status))
|
|
|
|
return UCOL_DEFAULT;
|
|
|
|
return ucol_getAttribute(ucollator, attr, &status);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2001-06-26 22:24:10 +00:00
|
|
|
uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
|
2003-08-29 18:29:26 +00:00
|
|
|
checkOwned();
|
|
|
|
return ucol_setVariableTop(ucollator, varTop, len, &status);
|
2001-06-26 22:24:10 +00:00
|
|
|
}
|
|
|
|
|
2012-08-03 21:49:28 +00:00
|
|
|
uint32_t RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &status) {
|
2003-08-29 18:29:26 +00:00
|
|
|
checkOwned();
|
|
|
|
return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
|
2001-06-26 22:24:10 +00:00
|
|
|
}
|
|
|
|
|
2012-08-03 21:49:28 +00:00
|
|
|
void RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &status) {
|
2003-08-29 18:29:26 +00:00
|
|
|
checkOwned();
|
|
|
|
ucol_restoreVariableTop(ucollator, varTop, &status);
|
2001-06-26 22:24:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
|
|
|
|
return ucol_getVariableTop(ucollator, &status);
|
|
|
|
}
|
|
|
|
|
2001-03-22 00:09:10 +00:00
|
|
|
int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
|
|
|
|
uint8_t *result, int32_t resultLength)
|
|
|
|
const
|
2001-01-18 00:23:29 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2001-03-22 00:09:10 +00:00
|
|
|
int32_t RuleBasedCollator::getSortKey(const UChar *source,
|
2001-01-18 00:23:29 +00:00
|
|
|
int32_t sourceLength, uint8_t *result,
|
2001-03-28 00:15:46 +00:00
|
|
|
int32_t resultLength) const
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
|
2001-01-18 00:23:29 +00:00
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2010-11-04 20:12:39 +00:00
|
|
|
int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
|
2010-11-04 22:36:25 +00:00
|
|
|
int32_t destCapacity,
|
2010-10-27 18:02:52 +00:00
|
|
|
UErrorCode& status) const
|
|
|
|
{
|
2010-11-03 02:41:22 +00:00
|
|
|
return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
|
2010-10-27 18:02:52 +00:00
|
|
|
}
|
|
|
|
|
2010-11-03 02:41:22 +00:00
|
|
|
void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
|
2010-11-04 22:36:25 +00:00
|
|
|
int32_t reorderCodesLength,
|
2010-10-27 18:02:52 +00:00
|
|
|
UErrorCode& status)
|
|
|
|
{
|
2011-04-12 18:23:27 +00:00
|
|
|
checkOwned();
|
2010-11-03 02:41:22 +00:00
|
|
|
ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
|
2010-10-27 18:02:52 +00:00
|
|
|
}
|
|
|
|
|
2011-03-18 22:52:30 +00:00
|
|
|
int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode,
|
|
|
|
int32_t* dest,
|
|
|
|
int32_t destCapacity,
|
|
|
|
UErrorCode& status)
|
|
|
|
{
|
|
|
|
return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status);
|
|
|
|
}
|
2010-10-27 18:02:52 +00:00
|
|
|
|
2001-03-22 00:09:10 +00:00
|
|
|
/**
|
|
|
|
* Create a hash code for this collation. Just hash the main rule table -- that
|
2001-01-18 00:23:29 +00:00
|
|
|
* should be good enough for almost any use.
|
|
|
|
*/
|
|
|
|
int32_t RuleBasedCollator::hashCode() const
|
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
int32_t length;
|
|
|
|
const UChar *rules = ucol_getRules(ucollator, &length);
|
2011-06-03 05:23:57 +00:00
|
|
|
return ustr_hashUCharsN(rules, length);
|
2001-01-18 00:23:29 +00:00
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2002-02-28 07:20:52 +00:00
|
|
|
/**
|
|
|
|
* return the locale of this collator
|
|
|
|
*/
|
2012-08-03 21:49:28 +00:00
|
|
|
Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
|
2009-04-22 23:10:07 +00:00
|
|
|
const char *result = ucol_getLocaleByType(ucollator, type, &status);
|
2003-08-29 18:29:26 +00:00
|
|
|
if(result == NULL) {
|
|
|
|
Locale res("");
|
|
|
|
res.setToBogus();
|
|
|
|
return res;
|
|
|
|
} else {
|
|
|
|
return Locale(result);
|
|
|
|
}
|
2002-02-28 07:20:52 +00:00
|
|
|
}
|
|
|
|
|
2003-04-28 21:13:14 +00:00
|
|
|
void
|
2007-12-18 01:19:16 +00:00
|
|
|
RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
|
2003-08-29 18:29:26 +00:00
|
|
|
checkOwned();
|
2007-12-18 01:19:16 +00:00
|
|
|
char* rloc = uprv_strdup(requestedLocale.getName());
|
2003-04-28 21:13:14 +00:00
|
|
|
if (rloc) {
|
2007-12-18 01:19:16 +00:00
|
|
|
char* vloc = uprv_strdup(validLocale.getName());
|
2003-04-28 21:13:14 +00:00
|
|
|
if (vloc) {
|
2007-12-18 01:19:16 +00:00
|
|
|
char* aloc = uprv_strdup(actualLocale.getName());
|
|
|
|
if (aloc) {
|
|
|
|
ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
uprv_free(vloc);
|
2003-04-28 21:13:14 +00:00
|
|
|
}
|
|
|
|
uprv_free(rloc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
// RuleBaseCollatorNew private constructor ----------------------------------
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2003-08-29 18:29:26 +00:00
|
|
|
RuleBasedCollator::RuleBasedCollator()
|
2006-04-02 19:07:50 +00:00
|
|
|
: dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
|
2001-01-18 00:23:29 +00:00
|
|
|
{
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2001-01-18 00:23:29 +00:00
|
|
|
RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
|
2006-04-02 19:07:50 +00:00
|
|
|
UErrorCode& status)
|
|
|
|
: dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
|
2000-05-22 19:49:10 +00:00
|
|
|
{
|
2003-08-29 18:29:26 +00:00
|
|
|
if (U_FAILURE(status))
|
|
|
|
return;
|
2000-08-11 01:27:17 +00:00
|
|
|
|
2003-08-29 18:29:26 +00:00
|
|
|
/*
|
|
|
|
Try to load, in order:
|
|
|
|
1. The desired locale's collation.
|
|
|
|
2. A fallback of the desired locale.
|
|
|
|
3. The default locale's collation.
|
|
|
|
4. A fallback of the default locale.
|
|
|
|
5. The default collation rules, which contains en_US collation rules.
|
|
|
|
|
|
|
|
To reiterate, we try:
|
|
|
|
Specific:
|
|
|
|
language+country+variant
|
|
|
|
language+country
|
|
|
|
language
|
|
|
|
Default:
|
|
|
|
language+country+variant
|
|
|
|
language+country
|
|
|
|
language
|
|
|
|
Root: (aka DEFAULTRULES)
|
|
|
|
steps 1-5 are handled by resource bundle fallback mechanism.
|
|
|
|
however, in a very unprobable situation that no resource bundle
|
|
|
|
data exists, step 5 is repeated with hardcoded default rules.
|
|
|
|
*/
|
|
|
|
|
|
|
|
setUCollator(desiredLocale, status);
|
|
|
|
|
|
|
|
if (U_FAILURE(status))
|
|
|
|
{
|
|
|
|
status = U_ZERO_ERROR;
|
2001-03-22 00:09:10 +00:00
|
|
|
|
2003-08-29 18:29:26 +00:00
|
|
|
setUCollator(kRootLocaleName, status);
|
|
|
|
if (status == U_ZERO_ERROR) {
|
|
|
|
status = U_USING_DEFAULT_WARNING;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2003-08-29 18:29:26 +00:00
|
|
|
if (U_SUCCESS(status))
|
|
|
|
{
|
2006-04-02 19:07:50 +00:00
|
|
|
setRuleStringFromCollator();
|
2002-06-29 09:31:05 +00:00
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2009-04-22 23:10:07 +00:00
|
|
|
void
|
2003-05-08 22:21:05 +00:00
|
|
|
RuleBasedCollator::setUCollator(const char *locale,
|
|
|
|
UErrorCode &status)
|
|
|
|
{
|
2011-04-12 18:23:27 +00:00
|
|
|
if (U_FAILURE(status)) {
|
2003-08-29 18:29:26 +00:00
|
|
|
return;
|
2011-04-12 18:23:27 +00:00
|
|
|
}
|
2003-08-29 18:29:26 +00:00
|
|
|
if (ucollator && dataIsOwned)
|
|
|
|
ucol_close(ucollator);
|
|
|
|
ucollator = ucol_open_internal(locale, &status);
|
|
|
|
dataIsOwned = TRUE;
|
|
|
|
isWriteThroughAlias = FALSE;
|
2003-05-08 22:21:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-05-02 22:17:14 +00:00
|
|
|
void
|
|
|
|
RuleBasedCollator::checkOwned() {
|
2003-08-29 18:29:26 +00:00
|
|
|
if (!(dataIsOwned || isWriteThroughAlias)) {
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
|
2006-04-02 19:07:50 +00:00
|
|
|
setRuleStringFromCollator();
|
2003-08-29 18:29:26 +00:00
|
|
|
dataIsOwned = TRUE;
|
|
|
|
isWriteThroughAlias = FALSE;
|
|
|
|
}
|
2003-05-02 22:17:14 +00:00
|
|
|
}
|
|
|
|
|
2012-02-18 23:10:51 +00:00
|
|
|
|
2012-02-19 09:14:56 +00:00
|
|
|
int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
|
2012-02-18 23:10:51 +00:00
|
|
|
char *buffer,
|
|
|
|
int32_t capacity,
|
2012-02-19 06:39:16 +00:00
|
|
|
UErrorCode &status) const {
|
2012-02-18 23:10:51 +00:00
|
|
|
/* simply delegate */
|
|
|
|
return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &status);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-08-29 18:29:26 +00:00
|
|
|
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
|
2001-10-08 23:26:58 +00:00
|
|
|
|
|
|
|
U_NAMESPACE_END
|
2002-09-20 01:54:48 +00:00
|
|
|
|
|
|
|
#endif /* #if !UCONFIG_NO_COLLATION */
|