ICU-96 collation key adopt, ucol_createSortKey, fix for collation key hash code, C++ incremental compare implementation

X-SVN-Rev: 3196
This commit is contained in:
Vladimir Weinstein 2000-12-12 01:15:30 +00:00
parent eb2fca812e
commit 9ecaf4fcae
7 changed files with 143 additions and 36 deletions

View File

@ -44,7 +44,6 @@ RSC=rc.exe
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /YX /FD /c
# ADD CPP /nologo /MD /W3 /GX /Ob1 /I "..\..\source\common" /I "..\..\include" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /D "U_I18N_IMPLEMENTATION" /YX /FD /c
# SUBTRACT CPP /O<none>
# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
# ADD BASE RSC /l 0x409 /d "NDEBUG"
@ -1727,6 +1726,10 @@ InputPath=.\unicode\ucol.h
# End Source File
# Begin Source File
SOURCE=.\ucolimp.h
# End Source File
# Begin Source File
SOURCE=.\unicode\udat.h
!IF "$(CFG)" == "i18n - Win32 Release"

View File

@ -32,6 +32,8 @@
#include "cmemory.h"
#endif
#include "uhash.h"
// A hash code of kInvalidHashCode indicates that the has code needs
// to be computed. A hash code of kEmptyHashCode is used for empty keys
// and for any key whose computed hash code is kInvalidHashCode.
@ -44,12 +46,19 @@ CollationKey::CollationKey()
{
}
// Adopt bytes allocated with malloc
CollationKey::CollationKey(int32_t count, uint8_t *values)
: fBogus(FALSE), fBytes(values), fCount(count), fCapacity(count),
fHashCode(kInvalidHashCode)
{
}
// Create a collation key from a bit array.
CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
: fBogus(FALSE), fCount(count), fCapacity(count),
fHashCode(kInvalidHashCode)
{
fBytes = new uint8_t[count];
fBytes = (uint8_t *)uprv_malloc(count);
if (fBytes == NULL)
{
@ -75,7 +84,7 @@ CollationKey::CollationKey(const CollationKey& other)
return;
}
fBytes = new uint8_t[fCapacity];
fBytes = (uint8_t *)uprv_malloc(fCapacity);
if (fBytes == NULL)
{
@ -91,9 +100,19 @@ CollationKey::CollationKey(const CollationKey& other)
CollationKey::~CollationKey()
{
delete[] fBytes;
uprv_free(fBytes);
}
void CollationKey::adopt(uint8_t *values, int32_t count) {
if(fBytes != NULL) {
uprv_free(fBytes);
}
fBogus = FALSE;
fBytes = values;
fCount = count;
fCapacity = count;
fHashCode = kInvalidHashCode;
}
// set the key to an empty state
CollationKey&
CollationKey::reset()
@ -205,9 +224,9 @@ CollationKey::ensureCapacity(int32_t newSize)
{
if (fCapacity < newSize)
{
delete[] fBytes;
uprv_free(fBytes);
fBytes = new uint8_t[newSize];
fBytes = (uint8_t *)uprv_malloc(newSize);
if (fBytes == NULL)
{
@ -338,6 +357,8 @@ CollationKey::hashCode() const
if (fHashCode == kInvalidHashCode)
{
((CollationKey *)this)->fHashCode = uhash_hashChars(fBytes);
#if 0
// We compute the hash by iterating sparsely over 64 (at most) characters
// spaced evenly through the string. For each character, we multiply the
// previous hash value by a prime number and add the new character in,
@ -361,6 +382,7 @@ CollationKey::hashCode() const
}
((CollationKey *)this)->fHashCode = hash; // cast away const
#endif
}
return fHashCode;

View File

@ -6,7 +6,7 @@
*
* File tblcoll.cpp
*
* Created by: Helena Shih
* Created by: Helena Shih
*
* Modification History:
*
@ -57,6 +57,8 @@
#include "unicode/tblcoll.h"
#include "ucolimp.h"
#include "unicode/coleitr.h"
#include "unicode/locid.h"
#include "unicode/unicode.h"
@ -152,6 +154,7 @@ const int32_t RuleBasedCollator::TERIGNORABLE = 0x02;
const int16_t RuleBasedCollator::FILEID = 0x5443; // unique file id for parity check
const char* RuleBasedCollator::kFilenameSuffix = ".col"; // binary collation file extension
char RuleBasedCollator::fgClassID = 0; // Value is irrelevant // class id
UChar RuleBasedCollator::cacheKey = 0;
UMTX RuleBasedCollator::collMutex = NULL;
UBool RuleBasedCollator::isMutexInited = RuleBasedCollator::initMutex();
@ -706,6 +709,7 @@ void RuleBasedCollator::constructFromRules(const UnicodeString& rules,
data = 0;
}
status = U_ZERO_ERROR;
isOverIgnore = FALSE;
setStrength(Collator::TERTIARY);
@ -724,6 +728,7 @@ void RuleBasedCollator::constructFromRules(const UnicodeString& rules,
// Now that we've got all the buffers allocated, do the actual work
mPattern = 0;
build(rules, status);
addToCache(UnicodeString(cacheKey++));
}
void
@ -1695,18 +1700,11 @@ RuleBasedCollator::getCollationKey( const UChar* source,
return sortkey.reset();
}
uint8_t result[tblcoll_StackBufferLen];
uint8_t *allocRes = result;
int32_t resLen = ucol_getSortKey((UCollator *)this, source, sourceLen, allocRes, tblcoll_StackBufferLen);
// if the result is too big
if(resLen > tblcoll_StackBufferLen) {
allocRes = new uint8_t[resLen];
resLen = ucol_getSortKey((UCollator *)this, source, sourceLen, allocRes, tblcoll_StackBufferLen);
}
sortkey = CollationKey(allocRes, resLen);
if(allocRes != result) {
delete[] allocRes;
}
uint8_t *result;
int32_t resLen = 0;
result = ucol_getSortKeyWithAllocation((UCollator *)this, source, sourceLen, &resLen);
sortkey.adopt(result, resLen);
return sortkey;
}
@ -3012,16 +3010,32 @@ UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCod
}
Collator* RuleBasedCollator::safeClone(void) {
umtx_lock(&collMutex);
Collator *result = new RuleBasedCollator(*this);
umtx_unlock(&collMutex);
return result;
return new RuleBasedCollator(*this);
}
UChar forwardCharIteratorGlue(void *iterator) {
ForwardCharacterIterator *iter = ((ForwardCharacterIterator *)iterator);
UChar result = iter->nextPostInc();
if(result == ForwardCharacterIterator::DONE) {
return 0xFFFF;
} else {
return result;
}
}
Collator::EComparisonResult RuleBasedCollator::compare(ForwardCharacterIterator &source,
ForwardCharacterIterator &target) {
return EQUAL;
UCollationResult strcoll_result = ucol_strcollinc((UCollator *)this, forwardCharIteratorGlue, &source, forwardCharIteratorGlue, &target);
if(strcoll_result == UCOL_LESS) {
return Collator::LESS;
} else if(strcoll_result == UCOL_GREATER) {
return Collator::GREATER;
} else {
return Collator::EQUAL;
}
}
int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,

View File

@ -137,18 +137,28 @@ private:
friend int32_t ucol_getNextCE(const UCollator *coll, collIterate *source, UErrorCode *status);
friend int32_t ucol_getIncrementalCE(const UCollator *coll, incrementalContext *source, UErrorCode *status);
friend int32_t getComplicatedCE(const UCollator *coll, collIterate *source, UErrorCode *status);
/*
friend int32_t ucol_getSortKey(const UCollator *coll, const UChar *source,
int32_t sourceLength, uint8_t *result, int32_t resultLength);
*/
friend int32_t ucol_calcSortKey(const UCollator *coll,
const UChar *source,
int32_t sourceLength,
uint8_t **result,
int32_t resultLength,
UBool allocatePrimary);
friend UCollationResult ucol_strcoll( const UCollator *coll,
const UChar *source,
int32_t sourceLength,
const UChar *target,
int32_t targetLength);
/*
friend UCollationResult ucol_strcollEx( const UCollator *coll,
const UChar *source,
int32_t sourceLength,
const UChar *target,
int32_t targetLength);
*/
friend int32_t ucol_getSortKeySize(const UCollator *coll,
collIterate *s,
int32_t currentSize,

View File

@ -5,7 +5,7 @@
*******************************************************************************
*/
#include "unicode/ucol.h"
#include "ucolimp.h"
#include "unicode/uloc.h"
#include "unicode/coll.h"
@ -1666,6 +1666,7 @@ ucol_strcoll( const UCollator *coll,
return result;
}
/* This is the original sort key function */
U_CAPI int32_t
ucol_getSortKeyEx(const UCollator *coll,
@ -1759,13 +1760,13 @@ int32_t ucol_getSortKeySize(const UCollator *coll, collIterate *s, int32_t curre
}
U_CAPI int32_t
ucol_getSortKey(const UCollator *coll,
int32_t
ucol_calcSortKey(const UCollator *coll,
const UChar *source,
int32_t sourceLength,
uint8_t *result,
int32_t resultLength)
uint8_t **result,
int32_t resultLength,
UBool allocatePrimary)
{
uint32_t i = 0; // general purpose counter
@ -1774,7 +1775,12 @@ ucol_getSortKey(const UCollator *coll,
uint8_t second[UCOL_MAX_BUFFER], tert[UCOL_MAX_BUFFER];
uint8_t *primaries = result, *secondaries = second, *tertiaries = tert;
uint8_t *primaries = *result, *secondaries = second, *tertiaries = tert;
if(primaries == NULL && allocatePrimary == TRUE) {
primaries = *result = (uint8_t *)uprv_malloc(UCOL_MAX_BUFFER);
resultLength = UCOL_MAX_BUFFER;
}
int32_t primSize = resultLength, secSize = UCOL_MAX_BUFFER, terSize = UCOL_MAX_BUFFER;
@ -1878,9 +1884,20 @@ ucol_getSortKey(const UCollator *coll,
}
}
if(sortKeySize>resultLength) {
resultOverflow = TRUE;
sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, strength, len);
goto cleanup;
if(allocatePrimary == FALSE) {
resultOverflow = TRUE;
sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, strength, len);
goto cleanup;
} else {
uint8_t *newStart;
newStart = (uint8_t *)uprv_realloc(primStart, 2*resultLength);
if(primStart == NULL) {
/*freak out*/
}
primaries=newStart+(primaries-primStart);
resultLength *= 2;
primStart = *result = newStart;
}
}
}
if(finished) {
@ -2012,6 +2029,25 @@ cleanup:
return sortKeySize;
}
U_CFUNC uint8_t *ucol_getSortKeyWithAllocation(const UCollator *coll,
const UChar *source,
int32_t sourceLength,
int32_t *resultLen) {
uint8_t *result = NULL;
*resultLen = ucol_calcSortKey(coll, source, sourceLength, &result, 0, TRUE);
return result;
}
U_CAPI int32_t
ucol_getSortKey(const UCollator *coll,
const UChar *source,
int32_t sourceLength,
uint8_t *result,
int32_t resultLength)
{
return ucol_calcSortKey(coll, source, sourceLength, &result, resultLength, FALSE);
}
U_CAPI int32_t
ucol_keyHashCode( const uint8_t* key,
int32_t length)

View File

@ -93,7 +93,9 @@ public:
* @deprecated
*/
CollationKey();
/**
/**
* Creates a collation key based on the collation key values.
* @param values the collation key values
* @param count number of collation key values, including trailing nulls.
@ -207,6 +209,13 @@ private:
*/
uint16_t* copyValues(int32_t &size) const;
void adopt(uint8_t *values, int32_t count);
/**
* The secret adopting constructor!
*/
CollationKey(int32_t count, uint8_t *values);
/*
* Creates a collation key with a string.
*/
@ -225,7 +234,7 @@ private:
static const int32_t kInvalidHashCode;
static const int32_t kEmptyHashCode;
UBool fBogus;
UBool fBogus;
int32_t fCount;
int32_t fCapacity;
int32_t fHashCode;

View File

@ -715,18 +715,30 @@ private:
friend int32_t ucol_getNextCE(const UCollator *coll, collIterate *source, UErrorCode *status);
friend int32_t ucol_getIncrementalCE(const UCollator *coll, incrementalContext *source, UErrorCode *status);
friend int32_t getComplicatedCE(const UCollator *coll, collIterate *source, UErrorCode *status);
/*
friend int32_t ucol_getSortKey(const UCollator *coll, const UChar *source,
int32_t sourceLength, uint8_t *result, int32_t resultLength);
*/
friend int32_t ucol_calcSortKey(const UCollator *coll,
const UChar *source,
int32_t sourceLength,
uint8_t **result,
int32_t resultLength,
UBool allocatePrimary);
friend UCollationResult ucol_strcoll( const UCollator *coll,
const UChar *source,
int32_t sourceLength,
const UChar *target,
int32_t targetLength);
/*
friend UCollationResult ucol_strcollEx( const UCollator *coll,
const UChar *source,
int32_t sourceLength,
const UChar *target,
int32_t targetLength);
*/
friend int32_t ucol_getSortKeySize(const UCollator *coll,
collIterate *s,
int32_t currentSize,
@ -997,6 +1009,7 @@ private:
static UBool isMutexInited;
static UMTX collMutex;
static UChar cacheKey;
//--------------------------------------------------------------------------
// Data Members