ICU-96 collation key adopt, ucol_createSortKey, fix for collation key hash code, C++ incremental compare implementation
X-SVN-Rev: 3196
This commit is contained in:
parent
eb2fca812e
commit
9ecaf4fcae
@ -44,7 +44,6 @@ RSC=rc.exe
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /YX /FD /c
|
||||
# ADD CPP /nologo /MD /W3 /GX /Ob1 /I "..\..\source\common" /I "..\..\include" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /D "U_I18N_IMPLEMENTATION" /YX /FD /c
|
||||
# SUBTRACT CPP /O<none>
|
||||
# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32
|
||||
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
@ -1727,6 +1726,10 @@ InputPath=.\unicode\ucol.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucolimp.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\udat.h
|
||||
|
||||
!IF "$(CFG)" == "i18n - Win32 Release"
|
||||
|
@ -32,6 +32,8 @@
|
||||
#include "cmemory.h"
|
||||
#endif
|
||||
|
||||
#include "uhash.h"
|
||||
|
||||
// A hash code of kInvalidHashCode indicates that the has code needs
|
||||
// to be computed. A hash code of kEmptyHashCode is used for empty keys
|
||||
// and for any key whose computed hash code is kInvalidHashCode.
|
||||
@ -44,12 +46,19 @@ CollationKey::CollationKey()
|
||||
{
|
||||
}
|
||||
|
||||
// Adopt bytes allocated with malloc
|
||||
CollationKey::CollationKey(int32_t count, uint8_t *values)
|
||||
: fBogus(FALSE), fBytes(values), fCount(count), fCapacity(count),
|
||||
fHashCode(kInvalidHashCode)
|
||||
{
|
||||
}
|
||||
|
||||
// Create a collation key from a bit array.
|
||||
CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
|
||||
: fBogus(FALSE), fCount(count), fCapacity(count),
|
||||
fHashCode(kInvalidHashCode)
|
||||
{
|
||||
fBytes = new uint8_t[count];
|
||||
fBytes = (uint8_t *)uprv_malloc(count);
|
||||
|
||||
if (fBytes == NULL)
|
||||
{
|
||||
@ -75,7 +84,7 @@ CollationKey::CollationKey(const CollationKey& other)
|
||||
return;
|
||||
}
|
||||
|
||||
fBytes = new uint8_t[fCapacity];
|
||||
fBytes = (uint8_t *)uprv_malloc(fCapacity);
|
||||
|
||||
if (fBytes == NULL)
|
||||
{
|
||||
@ -91,9 +100,19 @@ CollationKey::CollationKey(const CollationKey& other)
|
||||
|
||||
CollationKey::~CollationKey()
|
||||
{
|
||||
delete[] fBytes;
|
||||
uprv_free(fBytes);
|
||||
}
|
||||
|
||||
void CollationKey::adopt(uint8_t *values, int32_t count) {
|
||||
if(fBytes != NULL) {
|
||||
uprv_free(fBytes);
|
||||
}
|
||||
fBogus = FALSE;
|
||||
fBytes = values;
|
||||
fCount = count;
|
||||
fCapacity = count;
|
||||
fHashCode = kInvalidHashCode;
|
||||
}
|
||||
// set the key to an empty state
|
||||
CollationKey&
|
||||
CollationKey::reset()
|
||||
@ -205,9 +224,9 @@ CollationKey::ensureCapacity(int32_t newSize)
|
||||
{
|
||||
if (fCapacity < newSize)
|
||||
{
|
||||
delete[] fBytes;
|
||||
uprv_free(fBytes);
|
||||
|
||||
fBytes = new uint8_t[newSize];
|
||||
fBytes = (uint8_t *)uprv_malloc(newSize);
|
||||
|
||||
if (fBytes == NULL)
|
||||
{
|
||||
@ -338,6 +357,8 @@ CollationKey::hashCode() const
|
||||
|
||||
if (fHashCode == kInvalidHashCode)
|
||||
{
|
||||
((CollationKey *)this)->fHashCode = uhash_hashChars(fBytes);
|
||||
#if 0
|
||||
// We compute the hash by iterating sparsely over 64 (at most) characters
|
||||
// spaced evenly through the string. For each character, we multiply the
|
||||
// previous hash value by a prime number and add the new character in,
|
||||
@ -361,6 +382,7 @@ CollationKey::hashCode() const
|
||||
}
|
||||
|
||||
((CollationKey *)this)->fHashCode = hash; // cast away const
|
||||
#endif
|
||||
}
|
||||
|
||||
return fHashCode;
|
||||
|
@ -6,7 +6,7 @@
|
||||
*
|
||||
* File tblcoll.cpp
|
||||
*
|
||||
* Created by: Helena Shih
|
||||
* Created by: Helena Shih
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
@ -57,6 +57,8 @@
|
||||
|
||||
#include "unicode/tblcoll.h"
|
||||
|
||||
#include "ucolimp.h"
|
||||
|
||||
#include "unicode/coleitr.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/unicode.h"
|
||||
@ -152,6 +154,7 @@ const int32_t RuleBasedCollator::TERIGNORABLE = 0x02;
|
||||
const int16_t RuleBasedCollator::FILEID = 0x5443; // unique file id for parity check
|
||||
const char* RuleBasedCollator::kFilenameSuffix = ".col"; // binary collation file extension
|
||||
char RuleBasedCollator::fgClassID = 0; // Value is irrelevant // class id
|
||||
UChar RuleBasedCollator::cacheKey = 0;
|
||||
|
||||
UMTX RuleBasedCollator::collMutex = NULL;
|
||||
UBool RuleBasedCollator::isMutexInited = RuleBasedCollator::initMutex();
|
||||
@ -706,6 +709,7 @@ void RuleBasedCollator::constructFromRules(const UnicodeString& rules,
|
||||
data = 0;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
isOverIgnore = FALSE;
|
||||
setStrength(Collator::TERTIARY);
|
||||
|
||||
@ -724,6 +728,7 @@ void RuleBasedCollator::constructFromRules(const UnicodeString& rules,
|
||||
// Now that we've got all the buffers allocated, do the actual work
|
||||
mPattern = 0;
|
||||
build(rules, status);
|
||||
addToCache(UnicodeString(cacheKey++));
|
||||
}
|
||||
|
||||
void
|
||||
@ -1695,18 +1700,11 @@ RuleBasedCollator::getCollationKey( const UChar* source,
|
||||
return sortkey.reset();
|
||||
}
|
||||
|
||||
uint8_t result[tblcoll_StackBufferLen];
|
||||
uint8_t *allocRes = result;
|
||||
int32_t resLen = ucol_getSortKey((UCollator *)this, source, sourceLen, allocRes, tblcoll_StackBufferLen);
|
||||
// if the result is too big
|
||||
if(resLen > tblcoll_StackBufferLen) {
|
||||
allocRes = new uint8_t[resLen];
|
||||
resLen = ucol_getSortKey((UCollator *)this, source, sourceLen, allocRes, tblcoll_StackBufferLen);
|
||||
}
|
||||
sortkey = CollationKey(allocRes, resLen);
|
||||
if(allocRes != result) {
|
||||
delete[] allocRes;
|
||||
}
|
||||
uint8_t *result;
|
||||
int32_t resLen = 0;
|
||||
result = ucol_getSortKeyWithAllocation((UCollator *)this, source, sourceLen, &resLen);
|
||||
|
||||
sortkey.adopt(result, resLen);
|
||||
|
||||
return sortkey;
|
||||
}
|
||||
@ -3012,16 +3010,32 @@ UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCod
|
||||
}
|
||||
|
||||
Collator* RuleBasedCollator::safeClone(void) {
|
||||
umtx_lock(&collMutex);
|
||||
Collator *result = new RuleBasedCollator(*this);
|
||||
umtx_unlock(&collMutex);
|
||||
return result;
|
||||
return new RuleBasedCollator(*this);
|
||||
}
|
||||
|
||||
UChar forwardCharIteratorGlue(void *iterator) {
|
||||
ForwardCharacterIterator *iter = ((ForwardCharacterIterator *)iterator);
|
||||
UChar result = iter->nextPostInc();
|
||||
if(result == ForwardCharacterIterator::DONE) {
|
||||
return 0xFFFF;
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Collator::EComparisonResult RuleBasedCollator::compare(ForwardCharacterIterator &source,
|
||||
ForwardCharacterIterator &target) {
|
||||
return EQUAL;
|
||||
|
||||
UCollationResult strcoll_result = ucol_strcollinc((UCollator *)this, forwardCharIteratorGlue, &source, forwardCharIteratorGlue, &target);
|
||||
|
||||
if(strcoll_result == UCOL_LESS) {
|
||||
return Collator::LESS;
|
||||
} else if(strcoll_result == UCOL_GREATER) {
|
||||
return Collator::GREATER;
|
||||
} else {
|
||||
return Collator::EQUAL;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
|
||||
|
@ -137,18 +137,28 @@ private:
|
||||
friend int32_t ucol_getNextCE(const UCollator *coll, collIterate *source, UErrorCode *status);
|
||||
friend int32_t ucol_getIncrementalCE(const UCollator *coll, incrementalContext *source, UErrorCode *status);
|
||||
friend int32_t getComplicatedCE(const UCollator *coll, collIterate *source, UErrorCode *status);
|
||||
/*
|
||||
friend int32_t ucol_getSortKey(const UCollator *coll, const UChar *source,
|
||||
int32_t sourceLength, uint8_t *result, int32_t resultLength);
|
||||
*/
|
||||
friend int32_t ucol_calcSortKey(const UCollator *coll,
|
||||
const UChar *source,
|
||||
int32_t sourceLength,
|
||||
uint8_t **result,
|
||||
int32_t resultLength,
|
||||
UBool allocatePrimary);
|
||||
friend UCollationResult ucol_strcoll( const UCollator *coll,
|
||||
const UChar *source,
|
||||
int32_t sourceLength,
|
||||
const UChar *target,
|
||||
int32_t targetLength);
|
||||
/*
|
||||
friend UCollationResult ucol_strcollEx( const UCollator *coll,
|
||||
const UChar *source,
|
||||
int32_t sourceLength,
|
||||
const UChar *target,
|
||||
int32_t targetLength);
|
||||
*/
|
||||
friend int32_t ucol_getSortKeySize(const UCollator *coll,
|
||||
collIterate *s,
|
||||
int32_t currentSize,
|
||||
|
@ -5,7 +5,7 @@
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/ucol.h"
|
||||
#include "ucolimp.h"
|
||||
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/coll.h"
|
||||
@ -1666,6 +1666,7 @@ ucol_strcoll( const UCollator *coll,
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* This is the original sort key function */
|
||||
U_CAPI int32_t
|
||||
ucol_getSortKeyEx(const UCollator *coll,
|
||||
@ -1759,13 +1760,13 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
|
||||
}
|
||||
|
||||
|
||||
|
||||
U_CAPI int32_t
|
||||
ucol_getSortKey(const UCollator *coll,
|
||||
int32_t
|
||||
ucol_calcSortKey(const UCollator *coll,
|
||||
const UChar *source,
|
||||
int32_t sourceLength,
|
||||
uint8_t *result,
|
||||
int32_t resultLength)
|
||||
uint8_t **result,
|
||||
int32_t resultLength,
|
||||
UBool allocatePrimary)
|
||||
{
|
||||
|
||||
uint32_t i = 0; // general purpose counter
|
||||
@ -1774,7 +1775,12 @@ ucol_getSortKey(const UCollator *coll,
|
||||
|
||||
uint8_t second[UCOL_MAX_BUFFER], tert[UCOL_MAX_BUFFER];
|
||||
|
||||
uint8_t *primaries = result, *secondaries = second, *tertiaries = tert;
|
||||
uint8_t *primaries = *result, *secondaries = second, *tertiaries = tert;
|
||||
|
||||
if(primaries == NULL && allocatePrimary == TRUE) {
|
||||
primaries = *result = (uint8_t *)uprv_malloc(UCOL_MAX_BUFFER);
|
||||
resultLength = UCOL_MAX_BUFFER;
|
||||
}
|
||||
|
||||
int32_t primSize = resultLength, secSize = UCOL_MAX_BUFFER, terSize = UCOL_MAX_BUFFER;
|
||||
|
||||
@ -1878,9 +1884,20 @@ ucol_getSortKey(const UCollator *coll,
|
||||
}
|
||||
}
|
||||
if(sortKeySize>resultLength) {
|
||||
resultOverflow = TRUE;
|
||||
sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, strength, len);
|
||||
goto cleanup;
|
||||
if(allocatePrimary == FALSE) {
|
||||
resultOverflow = TRUE;
|
||||
sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, strength, len);
|
||||
goto cleanup;
|
||||
} else {
|
||||
uint8_t *newStart;
|
||||
newStart = (uint8_t *)uprv_realloc(primStart, 2*resultLength);
|
||||
if(primStart == NULL) {
|
||||
/*freak out*/
|
||||
}
|
||||
primaries=newStart+(primaries-primStart);
|
||||
resultLength *= 2;
|
||||
primStart = *result = newStart;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(finished) {
|
||||
@ -2012,6 +2029,25 @@ cleanup:
|
||||
return sortKeySize;
|
||||
}
|
||||
|
||||
U_CFUNC uint8_t *ucol_getSortKeyWithAllocation(const UCollator *coll,
|
||||
const UChar *source,
|
||||
int32_t sourceLength,
|
||||
int32_t *resultLen) {
|
||||
uint8_t *result = NULL;
|
||||
*resultLen = ucol_calcSortKey(coll, source, sourceLength, &result, 0, TRUE);
|
||||
return result;
|
||||
}
|
||||
|
||||
U_CAPI int32_t
|
||||
ucol_getSortKey(const UCollator *coll,
|
||||
const UChar *source,
|
||||
int32_t sourceLength,
|
||||
uint8_t *result,
|
||||
int32_t resultLength)
|
||||
{
|
||||
return ucol_calcSortKey(coll, source, sourceLength, &result, resultLength, FALSE);
|
||||
}
|
||||
|
||||
U_CAPI int32_t
|
||||
ucol_keyHashCode( const uint8_t* key,
|
||||
int32_t length)
|
||||
|
@ -93,7 +93,9 @@ public:
|
||||
* @deprecated
|
||||
*/
|
||||
CollationKey();
|
||||
/**
|
||||
|
||||
|
||||
/**
|
||||
* Creates a collation key based on the collation key values.
|
||||
* @param values the collation key values
|
||||
* @param count number of collation key values, including trailing nulls.
|
||||
@ -207,6 +209,13 @@ private:
|
||||
*/
|
||||
uint16_t* copyValues(int32_t &size) const;
|
||||
|
||||
void adopt(uint8_t *values, int32_t count);
|
||||
|
||||
/**
|
||||
* The secret adopting constructor!
|
||||
*/
|
||||
CollationKey(int32_t count, uint8_t *values);
|
||||
|
||||
/*
|
||||
* Creates a collation key with a string.
|
||||
*/
|
||||
@ -225,7 +234,7 @@ private:
|
||||
static const int32_t kInvalidHashCode;
|
||||
static const int32_t kEmptyHashCode;
|
||||
|
||||
UBool fBogus;
|
||||
UBool fBogus;
|
||||
int32_t fCount;
|
||||
int32_t fCapacity;
|
||||
int32_t fHashCode;
|
||||
|
@ -715,18 +715,30 @@ private:
|
||||
friend int32_t ucol_getNextCE(const UCollator *coll, collIterate *source, UErrorCode *status);
|
||||
friend int32_t ucol_getIncrementalCE(const UCollator *coll, incrementalContext *source, UErrorCode *status);
|
||||
friend int32_t getComplicatedCE(const UCollator *coll, collIterate *source, UErrorCode *status);
|
||||
/*
|
||||
friend int32_t ucol_getSortKey(const UCollator *coll, const UChar *source,
|
||||
int32_t sourceLength, uint8_t *result, int32_t resultLength);
|
||||
*/
|
||||
|
||||
friend int32_t ucol_calcSortKey(const UCollator *coll,
|
||||
const UChar *source,
|
||||
int32_t sourceLength,
|
||||
uint8_t **result,
|
||||
int32_t resultLength,
|
||||
UBool allocatePrimary);
|
||||
|
||||
friend UCollationResult ucol_strcoll( const UCollator *coll,
|
||||
const UChar *source,
|
||||
int32_t sourceLength,
|
||||
const UChar *target,
|
||||
int32_t targetLength);
|
||||
/*
|
||||
friend UCollationResult ucol_strcollEx( const UCollator *coll,
|
||||
const UChar *source,
|
||||
int32_t sourceLength,
|
||||
const UChar *target,
|
||||
int32_t targetLength);
|
||||
*/
|
||||
friend int32_t ucol_getSortKeySize(const UCollator *coll,
|
||||
collIterate *s,
|
||||
int32_t currentSize,
|
||||
@ -997,6 +1009,7 @@ private:
|
||||
|
||||
static UBool isMutexInited;
|
||||
static UMTX collMutex;
|
||||
static UChar cacheKey;
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Data Members
|
||||
|
Loading…
Reference in New Issue
Block a user