2001-01-15 19:02:30 +00:00
|
|
|
/*
|
2001-02-20 00:26:50 +00:00
|
|
|
******************************************************************************
|
2010-03-12 20:08:20 +00:00
|
|
|
* Copyright (C) 2001-2010, International Business Machines
|
2001-01-15 19:02:30 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
2001-02-20 00:26:50 +00:00
|
|
|
******************************************************************************
|
|
|
|
*
|
|
|
|
* File ucoleitr.cpp
|
|
|
|
*
|
|
|
|
* Modification History:
|
|
|
|
*
|
|
|
|
* Date Name Description
|
|
|
|
* 02/15/2001 synwee Modified all methods to process its own function
|
|
|
|
* instead of calling the equivalent c++ api (coleitr.h)
|
|
|
|
******************************************************************************/
|
2001-01-15 19:02:30 +00:00
|
|
|
|
2002-09-20 01:54:48 +00:00
|
|
|
#include "unicode/utypes.h"
|
|
|
|
|
|
|
|
#if !UCONFIG_NO_COLLATION
|
|
|
|
|
2001-01-15 19:02:30 +00:00
|
|
|
#include "unicode/ucoleitr.h"
|
|
|
|
#include "unicode/ustring.h"
|
2001-11-14 22:24:17 +00:00
|
|
|
#include "unicode/sortkey.h"
|
2008-05-23 04:22:28 +00:00
|
|
|
#include "unicode/uobject.h"
|
2002-07-12 21:42:24 +00:00
|
|
|
#include "ucol_imp.h"
|
2001-02-20 00:26:50 +00:00
|
|
|
#include "cmemory.h"
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_USE
|
|
|
|
|
2001-04-12 00:08:26 +00:00
|
|
|
#define BUFFER_LENGTH 100
|
2001-02-20 00:26:50 +00:00
|
|
|
|
2008-05-23 04:22:28 +00:00
|
|
|
#define DEFAULT_BUFFER_SIZE 16
|
|
|
|
#define BUFFER_GROW 8
|
|
|
|
|
|
|
|
#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
|
|
|
|
|
|
|
|
#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
|
|
|
|
|
|
|
|
#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
|
|
|
|
|
|
|
|
#define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
|
|
|
|
|
|
|
|
#define DELETE_ARRAY(array) uprv_free((void *) (array))
|
|
|
|
|
2001-02-20 00:26:50 +00:00
|
|
|
typedef struct collIterate collIterator;
|
2001-01-15 19:02:30 +00:00
|
|
|
|
2008-05-23 04:22:28 +00:00
|
|
|
struct RCEI
|
|
|
|
{
|
|
|
|
uint32_t ce;
|
|
|
|
int32_t low;
|
|
|
|
int32_t high;
|
|
|
|
};
|
|
|
|
|
2008-06-06 21:51:25 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
2008-05-23 04:22:28 +00:00
|
|
|
struct RCEBuffer
|
|
|
|
{
|
|
|
|
RCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
|
|
|
|
RCEI *buffer;
|
|
|
|
int32_t bufferIndex;
|
|
|
|
int32_t bufferSize;
|
|
|
|
|
|
|
|
RCEBuffer();
|
|
|
|
~RCEBuffer();
|
|
|
|
|
|
|
|
UBool empty() const;
|
|
|
|
void put(uint32_t ce, int32_t ixLow, int32_t ixHigh);
|
|
|
|
const RCEI *get();
|
|
|
|
};
|
|
|
|
|
|
|
|
RCEBuffer::RCEBuffer()
|
|
|
|
{
|
|
|
|
buffer = defaultBuffer;
|
|
|
|
bufferIndex = 0;
|
|
|
|
bufferSize = DEFAULT_BUFFER_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
RCEBuffer::~RCEBuffer()
|
|
|
|
{
|
|
|
|
if (buffer != defaultBuffer) {
|
|
|
|
DELETE_ARRAY(buffer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UBool RCEBuffer::empty() const
|
|
|
|
{
|
|
|
|
return bufferIndex <= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh)
|
|
|
|
{
|
|
|
|
if (bufferIndex >= bufferSize) {
|
|
|
|
RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
|
|
|
|
|
|
|
|
ARRAY_COPY(newBuffer, buffer, bufferSize);
|
|
|
|
|
|
|
|
if (buffer != defaultBuffer) {
|
|
|
|
DELETE_ARRAY(buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
buffer = newBuffer;
|
|
|
|
bufferSize += BUFFER_GROW;
|
|
|
|
}
|
|
|
|
|
|
|
|
buffer[bufferIndex].ce = ce;
|
|
|
|
buffer[bufferIndex].low = ixLow;
|
|
|
|
buffer[bufferIndex].high = ixHigh;
|
|
|
|
|
|
|
|
bufferIndex += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
const RCEI *RCEBuffer::get()
|
|
|
|
{
|
|
|
|
if (bufferIndex > 0) {
|
|
|
|
return &buffer[--bufferIndex];
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct PCEI
|
|
|
|
{
|
|
|
|
uint64_t ce;
|
|
|
|
int32_t low;
|
|
|
|
int32_t high;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct PCEBuffer
|
|
|
|
{
|
|
|
|
PCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
|
|
|
|
PCEI *buffer;
|
|
|
|
int32_t bufferIndex;
|
|
|
|
int32_t bufferSize;
|
|
|
|
|
|
|
|
PCEBuffer();
|
|
|
|
~PCEBuffer();
|
|
|
|
|
|
|
|
void reset();
|
|
|
|
UBool empty() const;
|
|
|
|
void put(uint64_t ce, int32_t ixLow, int32_t ixHigh);
|
|
|
|
const PCEI *get();
|
|
|
|
};
|
|
|
|
|
|
|
|
PCEBuffer::PCEBuffer()
|
|
|
|
{
|
|
|
|
buffer = defaultBuffer;
|
|
|
|
bufferIndex = 0;
|
|
|
|
bufferSize = DEFAULT_BUFFER_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
PCEBuffer::~PCEBuffer()
|
|
|
|
{
|
|
|
|
if (buffer != defaultBuffer) {
|
|
|
|
DELETE_ARRAY(buffer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void PCEBuffer::reset()
|
|
|
|
{
|
|
|
|
bufferIndex = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
UBool PCEBuffer::empty() const
|
|
|
|
{
|
|
|
|
return bufferIndex <= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh)
|
|
|
|
{
|
|
|
|
if (bufferIndex >= bufferSize) {
|
|
|
|
PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
|
|
|
|
|
|
|
|
ARRAY_COPY(newBuffer, buffer, bufferSize);
|
|
|
|
|
|
|
|
if (buffer != defaultBuffer) {
|
|
|
|
DELETE_ARRAY(buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
buffer = newBuffer;
|
|
|
|
bufferSize += BUFFER_GROW;
|
|
|
|
}
|
|
|
|
|
|
|
|
buffer[bufferIndex].ce = ce;
|
|
|
|
buffer[bufferIndex].low = ixLow;
|
|
|
|
buffer[bufferIndex].high = ixHigh;
|
|
|
|
|
|
|
|
bufferIndex += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
const PCEI *PCEBuffer::get()
|
|
|
|
{
|
|
|
|
if (bufferIndex > 0) {
|
|
|
|
return &buffer[--bufferIndex];
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This inherits from UObject so that
|
|
|
|
* it can be allocated by new and the
|
|
|
|
* constructor for PCEBuffer is called.
|
|
|
|
*/
|
|
|
|
struct UCollationPCE : public UObject
|
|
|
|
{
|
|
|
|
PCEBuffer pceBuffer;
|
|
|
|
UCollationStrength strength;
|
|
|
|
UBool toShift;
|
|
|
|
UBool isShifted;
|
|
|
|
uint32_t variableTop;
|
|
|
|
|
|
|
|
UCollationPCE(UCollationElements *elems);
|
|
|
|
~UCollationPCE();
|
|
|
|
|
|
|
|
void init(const UCollator *coll);
|
|
|
|
|
|
|
|
virtual UClassID getDynamicClassID() const;
|
|
|
|
static UClassID getStaticClassID();
|
|
|
|
};
|
|
|
|
|
|
|
|
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE)
|
|
|
|
|
|
|
|
UCollationPCE::UCollationPCE(UCollationElements *elems)
|
|
|
|
{
|
|
|
|
init(elems->iteratordata_.coll);
|
|
|
|
}
|
|
|
|
|
|
|
|
void UCollationPCE::init(const UCollator *coll)
|
|
|
|
{
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
strength = ucol_getStrength(coll);
|
|
|
|
toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
|
|
|
|
isShifted = FALSE;
|
|
|
|
variableTop = coll->variableTopValue << 16;
|
|
|
|
}
|
|
|
|
|
|
|
|
UCollationPCE::~UCollationPCE()
|
|
|
|
{
|
|
|
|
// nothing to do
|
|
|
|
}
|
|
|
|
|
2008-06-06 21:51:25 +00:00
|
|
|
|
|
|
|
U_NAMESPACE_END
|
|
|
|
|
|
|
|
|
2008-05-23 04:22:28 +00:00
|
|
|
inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
|
|
|
|
{
|
|
|
|
uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
|
|
|
|
|
|
|
|
// This is clean, but somewhat slow...
|
|
|
|
// We could apply the mask to ce and then
|
|
|
|
// just get all three orders...
|
|
|
|
switch(elems->pce->strength) {
|
|
|
|
default:
|
|
|
|
tertiary = ucol_tertiaryOrder(ce);
|
|
|
|
/* note fall-through */
|
|
|
|
|
|
|
|
case UCOL_SECONDARY:
|
|
|
|
secondary = ucol_secondaryOrder(ce);
|
|
|
|
/* note fall-through */
|
|
|
|
|
|
|
|
case UCOL_PRIMARY:
|
|
|
|
primary = ucol_primaryOrder(ce);
|
|
|
|
}
|
|
|
|
|
2009-01-22 00:24:48 +00:00
|
|
|
// **** This should probably handle continuations too. ****
|
|
|
|
// **** That means that we need 24 bits for the primary ****
|
|
|
|
// **** instead of the 16 that we're currently using. ****
|
|
|
|
// **** So we can lay out the 64 bits as: 24.12.12.16. ****
|
|
|
|
// **** Another complication with continuations is that ****
|
|
|
|
// **** the *second* CE is marked as a continuation, so ****
|
|
|
|
// **** we always have to peek ahead to know how long ****
|
|
|
|
// **** the primary is... ****
|
2010-07-12 18:03:29 +00:00
|
|
|
if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0)
|
2008-05-23 04:22:28 +00:00
|
|
|
|| (elems->pce->isShifted && primary == 0)) {
|
|
|
|
|
|
|
|
if (primary == 0) {
|
|
|
|
return UCOL_IGNORABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (elems->pce->strength >= UCOL_QUATERNARY) {
|
|
|
|
quaternary = primary;
|
|
|
|
}
|
|
|
|
|
|
|
|
primary = secondary = tertiary = 0;
|
|
|
|
elems->pce->isShifted = TRUE;
|
|
|
|
} else {
|
|
|
|
if (elems->pce->strength >= UCOL_QUATERNARY) {
|
|
|
|
quaternary = 0xFFFF;
|
|
|
|
}
|
|
|
|
|
|
|
|
elems->pce->isShifted = FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
uprv_init_pce(const UCollationElements *elems)
|
|
|
|
{
|
|
|
|
if (elems->pce != NULL) {
|
|
|
|
elems->pce->init(elems->iteratordata_.coll);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-06-06 21:51:25 +00:00
|
|
|
|
|
|
|
|
2001-02-20 00:26:50 +00:00
|
|
|
/* public methods ---------------------------------------------------- */
|
2001-01-15 19:02:30 +00:00
|
|
|
|
2001-11-21 01:08:55 +00:00
|
|
|
U_CAPI UCollationElements* U_EXPORT2
|
2001-02-20 00:26:50 +00:00
|
|
|
ucol_openElements(const UCollator *coll,
|
|
|
|
const UChar *text,
|
|
|
|
int32_t textLength,
|
|
|
|
UErrorCode *status)
|
2001-01-15 19:02:30 +00:00
|
|
|
{
|
2008-02-10 20:17:14 +00:00
|
|
|
if (U_FAILURE(*status)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2001-02-20 00:26:50 +00:00
|
|
|
|
2010-01-06 23:50:03 +00:00
|
|
|
UCollationElements *result = new UCollationElements;
|
2008-02-10 20:17:14 +00:00
|
|
|
if (result == NULL) {
|
|
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
return NULL;
|
|
|
|
}
|
2010-01-06 23:50:03 +00:00
|
|
|
|
2008-06-09 21:18:46 +00:00
|
|
|
result->reset_ = TRUE;
|
|
|
|
result->isWritable = FALSE;
|
|
|
|
result->pce = NULL;
|
2001-06-20 18:14:51 +00:00
|
|
|
|
2008-02-10 20:17:14 +00:00
|
|
|
if (text == NULL) {
|
|
|
|
textLength = 0;
|
|
|
|
}
|
2010-01-06 23:50:03 +00:00
|
|
|
uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status);
|
2001-02-20 00:26:50 +00:00
|
|
|
|
2008-02-10 20:17:14 +00:00
|
|
|
return result;
|
2001-01-15 19:02:30 +00:00
|
|
|
}
|
|
|
|
|
2009-01-22 00:24:48 +00:00
|
|
|
|
2001-11-21 01:08:55 +00:00
|
|
|
U_CAPI void U_EXPORT2
|
2001-01-15 19:02:30 +00:00
|
|
|
ucol_closeElements(UCollationElements *elems)
|
|
|
|
{
|
2008-05-23 04:22:28 +00:00
|
|
|
if (elems != NULL) {
|
|
|
|
collIterate *ci = &elems->iteratordata_;
|
|
|
|
|
2010-01-06 23:50:03 +00:00
|
|
|
if (ci->extendCEs) {
|
|
|
|
uprv_free(ci->extendCEs);
|
|
|
|
}
|
2008-05-23 04:22:28 +00:00
|
|
|
|
2010-01-06 23:50:03 +00:00
|
|
|
if (ci->offsetBuffer) {
|
|
|
|
uprv_free(ci->offsetBuffer);
|
2008-05-23 04:22:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (elems->isWritable && elems->iteratordata_.string != NULL)
|
|
|
|
{
|
2010-01-06 23:50:03 +00:00
|
|
|
uprv_free((UChar *)elems->iteratordata_.string);
|
2008-05-23 04:22:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (elems->pce != NULL) {
|
|
|
|
delete elems->pce;
|
|
|
|
}
|
|
|
|
|
2010-01-06 23:50:03 +00:00
|
|
|
delete elems;
|
2008-05-23 04:22:28 +00:00
|
|
|
}
|
2001-01-15 19:02:30 +00:00
|
|
|
}
|
|
|
|
|
2001-11-21 01:08:55 +00:00
|
|
|
U_CAPI void U_EXPORT2
|
2001-01-15 19:02:30 +00:00
|
|
|
ucol_reset(UCollationElements *elems)
|
|
|
|
{
|
2008-02-10 20:17:14 +00:00
|
|
|
collIterate *ci = &(elems->iteratordata_);
|
|
|
|
elems->reset_ = TRUE;
|
|
|
|
ci->pos = ci->string;
|
|
|
|
if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
|
|
|
|
ci->endp = ci->string + u_strlen(ci->string);
|
|
|
|
}
|
|
|
|
ci->CEpos = ci->toReturn = ci->CEs;
|
2009-01-22 00:24:48 +00:00
|
|
|
ci->flags = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN;
|
2008-02-10 20:17:14 +00:00
|
|
|
if (ci->coll->normalizationMode == UCOL_ON) {
|
|
|
|
ci->flags |= UCOL_ITER_NORM;
|
|
|
|
}
|
|
|
|
|
2010-01-06 23:50:03 +00:00
|
|
|
ci->writableBuffer.remove();
|
2008-02-10 20:17:14 +00:00
|
|
|
ci->fcdPosition = NULL;
|
2008-05-23 04:22:28 +00:00
|
|
|
|
|
|
|
//ci->offsetReturn = ci->offsetStore = NULL;
|
|
|
|
ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
|
2001-01-15 19:02:30 +00:00
|
|
|
}
|
|
|
|
|
2009-01-22 00:24:48 +00:00
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status)
|
|
|
|
{
|
|
|
|
if (U_FAILURE(*status)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (elems == NULL) {
|
|
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT;
|
|
|
|
}
|
|
|
|
|
2001-11-21 01:08:55 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2
|
2001-04-26 01:15:34 +00:00
|
|
|
ucol_next(UCollationElements *elems,
|
2001-02-20 00:26:50 +00:00
|
|
|
UErrorCode *status)
|
2001-01-15 19:02:30 +00:00
|
|
|
{
|
2008-02-10 20:17:14 +00:00
|
|
|
int32_t result;
|
|
|
|
if (U_FAILURE(*status)) {
|
|
|
|
return UCOL_NULLORDER;
|
|
|
|
}
|
|
|
|
|
|
|
|
elems->reset_ = FALSE;
|
|
|
|
|
|
|
|
result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
|
|
|
|
&elems->iteratordata_,
|
|
|
|
status);
|
|
|
|
|
|
|
|
if (result == UCOL_NO_MORE_CES) {
|
|
|
|
result = UCOL_NULLORDER;
|
|
|
|
}
|
|
|
|
return result;
|
2001-01-15 19:02:30 +00:00
|
|
|
}
|
|
|
|
|
2008-05-23 04:22:28 +00:00
|
|
|
U_CAPI int64_t U_EXPORT2
|
|
|
|
ucol_nextProcessed(UCollationElements *elems,
|
|
|
|
int32_t *ixLow,
|
|
|
|
int32_t *ixHigh,
|
|
|
|
UErrorCode *status)
|
|
|
|
{
|
|
|
|
const UCollator *coll = elems->iteratordata_.coll;
|
|
|
|
int64_t result = UCOL_IGNORABLE;
|
|
|
|
uint32_t low = 0, high = 0;
|
|
|
|
|
|
|
|
if (U_FAILURE(*status)) {
|
|
|
|
return UCOL_PROCESSED_NULLORDER;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (elems->pce == NULL) {
|
|
|
|
elems->pce = new UCollationPCE(elems);
|
|
|
|
} else {
|
|
|
|
elems->pce->pceBuffer.reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
elems->reset_ = FALSE;
|
|
|
|
|
|
|
|
do {
|
|
|
|
low = ucol_getOffset(elems);
|
|
|
|
uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status);
|
|
|
|
high = ucol_getOffset(elems);
|
|
|
|
|
|
|
|
if (ce == UCOL_NO_MORE_CES) {
|
|
|
|
result = UCOL_PROCESSED_NULLORDER;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
result = processCE(elems, ce);
|
|
|
|
} while (result == UCOL_IGNORABLE);
|
|
|
|
|
|
|
|
if (ixLow != NULL) {
|
|
|
|
*ixLow = low;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ixHigh != NULL) {
|
|
|
|
*ixHigh = high;
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2001-11-21 01:08:55 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2
|
2001-02-20 00:26:50 +00:00
|
|
|
ucol_previous(UCollationElements *elems,
|
|
|
|
UErrorCode *status)
|
2001-01-15 19:02:30 +00:00
|
|
|
{
|
2008-02-10 20:17:14 +00:00
|
|
|
if(U_FAILURE(*status)) {
|
|
|
|
return UCOL_NULLORDER;
|
2001-04-12 00:08:26 +00:00
|
|
|
}
|
2008-02-10 20:17:14 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
int32_t result;
|
|
|
|
|
|
|
|
if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) {
|
|
|
|
if (elems->iteratordata_.endp == NULL) {
|
|
|
|
elems->iteratordata_.endp = elems->iteratordata_.string +
|
|
|
|
u_strlen(elems->iteratordata_.string);
|
|
|
|
elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
|
|
|
|
}
|
|
|
|
elems->iteratordata_.pos = elems->iteratordata_.endp;
|
|
|
|
elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
|
|
|
|
}
|
2001-02-23 23:36:42 +00:00
|
|
|
|
2008-02-10 20:17:14 +00:00
|
|
|
elems->reset_ = FALSE;
|
2001-02-23 23:36:42 +00:00
|
|
|
|
2008-02-10 20:17:14 +00:00
|
|
|
result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
|
|
|
|
&(elems->iteratordata_),
|
|
|
|
status);
|
2001-04-20 22:29:53 +00:00
|
|
|
|
2008-02-10 20:17:14 +00:00
|
|
|
if (result == UCOL_NO_MORE_CES) {
|
|
|
|
result = UCOL_NULLORDER;
|
|
|
|
}
|
2001-02-21 01:58:55 +00:00
|
|
|
|
2008-02-10 20:17:14 +00:00
|
|
|
return result;
|
|
|
|
}
|
2001-01-15 19:02:30 +00:00
|
|
|
}
|
|
|
|
|
2008-05-23 04:22:28 +00:00
|
|
|
U_CAPI int64_t U_EXPORT2
|
|
|
|
ucol_previousProcessed(UCollationElements *elems,
|
|
|
|
int32_t *ixLow,
|
|
|
|
int32_t *ixHigh,
|
|
|
|
UErrorCode *status)
|
|
|
|
{
|
|
|
|
const UCollator *coll = elems->iteratordata_.coll;
|
|
|
|
int64_t result = UCOL_IGNORABLE;
|
|
|
|
// int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
|
|
|
|
// UCollationStrength strength = ucol_getStrength(coll);
|
|
|
|
// UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
|
|
|
|
// uint32_t variableTop = coll->variableTopValue;
|
2008-05-26 19:18:33 +00:00
|
|
|
int32_t low = 0, high = 0;
|
2008-05-23 04:22:28 +00:00
|
|
|
|
|
|
|
if (U_FAILURE(*status)) {
|
|
|
|
return UCOL_PROCESSED_NULLORDER;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (elems->reset_ &&
|
|
|
|
(elems->iteratordata_.pos == elems->iteratordata_.string)) {
|
|
|
|
if (elems->iteratordata_.endp == NULL) {
|
|
|
|
elems->iteratordata_.endp = elems->iteratordata_.string +
|
|
|
|
u_strlen(elems->iteratordata_.string);
|
|
|
|
elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
|
|
|
|
}
|
|
|
|
|
|
|
|
elems->iteratordata_.pos = elems->iteratordata_.endp;
|
|
|
|
elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (elems->pce == NULL) {
|
|
|
|
elems->pce = new UCollationPCE(elems);
|
|
|
|
} else {
|
|
|
|
//elems->pce->pceBuffer.reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
elems->reset_ = FALSE;
|
|
|
|
|
|
|
|
while (elems->pce->pceBuffer.empty()) {
|
|
|
|
// buffer raw CEs up to non-ignorable primary
|
|
|
|
RCEBuffer rceb;
|
|
|
|
uint32_t ce;
|
|
|
|
|
|
|
|
// **** do we need to reset rceb, or will it always be empty at this point ****
|
|
|
|
do {
|
|
|
|
high = ucol_getOffset(elems);
|
|
|
|
ce = ucol_getPrevCE(coll, &elems->iteratordata_, status);
|
|
|
|
low = ucol_getOffset(elems);
|
|
|
|
|
|
|
|
if (ce == UCOL_NO_MORE_CES) {
|
|
|
|
if (! rceb.empty()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
|
|
|
|
rceb.put(ce, low, high);
|
|
|
|
} while ((ce & UCOL_PRIMARYMASK) == 0);
|
|
|
|
|
|
|
|
// process the raw CEs
|
|
|
|
while (! rceb.empty()) {
|
|
|
|
const RCEI *rcei = rceb.get();
|
|
|
|
|
|
|
|
result = processCE(elems, rcei->ce);
|
|
|
|
|
|
|
|
if (result != UCOL_IGNORABLE) {
|
|
|
|
elems->pce->pceBuffer.put(result, rcei->low, rcei->high);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
finish:
|
|
|
|
if (elems->pce->pceBuffer.empty()) {
|
|
|
|
// **** Is -1 the right value for ixLow, ixHigh? ****
|
|
|
|
if (ixLow != NULL) {
|
|
|
|
*ixLow = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ixHigh != NULL) {
|
|
|
|
*ixHigh = -1
|
|
|
|
;
|
|
|
|
}
|
|
|
|
return UCOL_PROCESSED_NULLORDER;
|
|
|
|
}
|
|
|
|
|
|
|
|
const PCEI *pcei = elems->pce->pceBuffer.get();
|
|
|
|
|
|
|
|
if (ixLow != NULL) {
|
|
|
|
*ixLow = pcei->low;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ixHigh != NULL) {
|
|
|
|
*ixHigh = pcei->high;
|
|
|
|
}
|
|
|
|
|
|
|
|
return pcei->ce;
|
|
|
|
}
|
|
|
|
|
2001-11-21 01:08:55 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2
|
2001-02-20 00:26:50 +00:00
|
|
|
ucol_getMaxExpansion(const UCollationElements *elems,
|
|
|
|
int32_t order)
|
2001-01-15 19:02:30 +00:00
|
|
|
{
|
2008-02-10 20:17:14 +00:00
|
|
|
uint8_t result;
|
2008-05-23 04:22:28 +00:00
|
|
|
|
|
|
|
#if 0
|
2008-02-10 20:17:14 +00:00
|
|
|
UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
|
2008-05-23 04:22:28 +00:00
|
|
|
#else
|
|
|
|
const UCollator *coll = elems->iteratordata_.coll;
|
|
|
|
const uint32_t *start;
|
|
|
|
const uint32_t *limit;
|
|
|
|
const uint32_t *mid;
|
|
|
|
uint32_t strengthMask = 0;
|
|
|
|
uint32_t mOrder = (uint32_t) order;
|
|
|
|
|
|
|
|
switch (coll->strength)
|
|
|
|
{
|
|
|
|
default:
|
|
|
|
strengthMask |= UCOL_TERTIARYORDERMASK;
|
|
|
|
/* fall through */
|
|
|
|
|
|
|
|
case UCOL_SECONDARY:
|
|
|
|
strengthMask |= UCOL_SECONDARYORDERMASK;
|
|
|
|
/* fall through */
|
|
|
|
|
|
|
|
case UCOL_PRIMARY:
|
|
|
|
strengthMask |= UCOL_PRIMARYORDERMASK;
|
|
|
|
}
|
|
|
|
|
|
|
|
mOrder &= strengthMask;
|
|
|
|
start = (coll)->endExpansionCE;
|
|
|
|
limit = (coll)->lastEndExpansionCE;
|
|
|
|
|
|
|
|
while (start < limit - 1) {
|
|
|
|
mid = start + ((limit - start) >> 1);
|
|
|
|
if (mOrder <= (*mid & strengthMask)) {
|
|
|
|
limit = mid;
|
|
|
|
} else {
|
|
|
|
start = mid;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: with a masked search, there might be more than one hit,
|
|
|
|
// so we need to look forward and backward from the match to find all
|
|
|
|
// of the hits...
|
|
|
|
if ((*start & strengthMask) == mOrder) {
|
|
|
|
result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE));
|
|
|
|
} else if ((*limit & strengthMask) == mOrder) {
|
|
|
|
result = *(coll->expansionCESize + (limit - coll->endExpansionCE));
|
|
|
|
} else if ((mOrder & 0xFFFF) == 0x00C0) {
|
|
|
|
result = 2;
|
|
|
|
} else {
|
|
|
|
result = 1;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-02-10 20:17:14 +00:00
|
|
|
return result;
|
2001-01-15 19:02:30 +00:00
|
|
|
}
|
2001-03-03 04:06:43 +00:00
|
|
|
|
2001-11-21 01:08:55 +00:00
|
|
|
U_CAPI void U_EXPORT2
|
2001-02-20 00:26:50 +00:00
|
|
|
ucol_setText( UCollationElements *elems,
|
|
|
|
const UChar *text,
|
|
|
|
int32_t textLength,
|
|
|
|
UErrorCode *status)
|
2001-01-15 19:02:30 +00:00
|
|
|
{
|
2008-02-10 20:17:14 +00:00
|
|
|
if (U_FAILURE(*status)) {
|
|
|
|
return;
|
|
|
|
}
|
2001-06-20 18:14:51 +00:00
|
|
|
|
2008-02-10 20:17:14 +00:00
|
|
|
if (elems->isWritable && elems->iteratordata_.string != NULL)
|
|
|
|
{
|
2010-01-06 23:50:03 +00:00
|
|
|
uprv_free((UChar *)elems->iteratordata_.string);
|
2008-02-10 20:17:14 +00:00
|
|
|
}
|
2001-03-03 04:06:43 +00:00
|
|
|
|
2008-02-10 20:17:14 +00:00
|
|
|
if (text == NULL) {
|
|
|
|
textLength = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
elems->isWritable = FALSE;
|
2008-06-09 21:18:46 +00:00
|
|
|
|
|
|
|
/* free offset buffer to avoid memory leak before initializing. */
|
2009-03-27 00:37:55 +00:00
|
|
|
ucol_freeOffsetBuffer(&(elems->iteratordata_));
|
2010-03-12 20:08:20 +00:00
|
|
|
/* Ensure that previously allocated extendCEs is freed before setting to NULL. */
|
|
|
|
if (elems->iteratordata_.extendCEs != NULL) {
|
|
|
|
uprv_free(elems->iteratordata_.extendCEs);
|
|
|
|
}
|
2008-02-10 20:17:14 +00:00
|
|
|
uprv_init_collIterate(elems->iteratordata_.coll, text, textLength,
|
2010-01-06 23:50:03 +00:00
|
|
|
&elems->iteratordata_, status);
|
2008-02-10 20:17:14 +00:00
|
|
|
|
|
|
|
elems->reset_ = TRUE;
|
2001-01-15 19:02:30 +00:00
|
|
|
}
|
|
|
|
|
2002-03-12 01:32:42 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2
|
2001-01-15 19:02:30 +00:00
|
|
|
ucol_getOffset(const UCollationElements *elems)
|
|
|
|
{
|
2008-05-23 04:22:28 +00:00
|
|
|
const collIterate *ci = &(elems->iteratordata_);
|
|
|
|
|
|
|
|
if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) {
|
|
|
|
return ci->offsetRepeatValue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ci->offsetReturn != NULL) {
|
|
|
|
return *ci->offsetReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// while processing characters in normalization buffer getOffset will
|
|
|
|
// return the next non-normalized character.
|
|
|
|
// should be inline with the old implementation since the old codes uses
|
|
|
|
// nextDecomp in normalizer which also decomposes the string till the
|
|
|
|
// first base character is found.
|
|
|
|
if (ci->flags & UCOL_ITER_INNORMBUF) {
|
|
|
|
if (ci->fcdPosition == NULL) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return (int32_t)(ci->fcdPosition - ci->string);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return (int32_t)(ci->pos - ci->string);
|
|
|
|
}
|
2001-01-15 19:02:30 +00:00
|
|
|
}
|
|
|
|
|
2001-11-21 01:08:55 +00:00
|
|
|
U_CAPI void U_EXPORT2
|
2001-02-20 00:26:50 +00:00
|
|
|
ucol_setOffset(UCollationElements *elems,
|
2002-03-12 01:32:42 +00:00
|
|
|
int32_t offset,
|
2001-02-20 00:26:50 +00:00
|
|
|
UErrorCode *status)
|
2001-01-15 19:02:30 +00:00
|
|
|
{
|
2008-02-10 20:17:14 +00:00
|
|
|
if (U_FAILURE(*status)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// this methods will clean up any use of the writable buffer and points to
|
|
|
|
// the original string
|
|
|
|
collIterate *ci = &(elems->iteratordata_);
|
|
|
|
ci->pos = ci->string + offset;
|
|
|
|
ci->CEpos = ci->toReturn = ci->CEs;
|
|
|
|
if (ci->flags & UCOL_ITER_INNORMBUF) {
|
|
|
|
ci->flags = ci->origFlags;
|
|
|
|
}
|
|
|
|
if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
|
|
|
|
ci->endp = ci->string + u_strlen(ci->string);
|
|
|
|
ci->flags |= UCOL_ITER_HASLEN;
|
|
|
|
}
|
|
|
|
ci->fcdPosition = NULL;
|
|
|
|
elems->reset_ = FALSE;
|
2008-05-23 04:22:28 +00:00
|
|
|
|
|
|
|
ci->offsetReturn = NULL;
|
|
|
|
ci->offsetStore = ci->offsetBuffer;
|
|
|
|
ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
|
2001-01-15 19:02:30 +00:00
|
|
|
}
|
|
|
|
|
2003-04-23 04:47:00 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
ucol_primaryOrder (int32_t order)
|
|
|
|
{
|
2008-02-10 20:17:14 +00:00
|
|
|
order &= UCOL_PRIMARYMASK;
|
|
|
|
return (order >> UCOL_PRIMARYORDERSHIFT);
|
2003-04-23 04:47:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
ucol_secondaryOrder (int32_t order)
|
|
|
|
{
|
2008-02-10 20:17:14 +00:00
|
|
|
order &= UCOL_SECONDARYMASK;
|
|
|
|
return (order >> UCOL_SECONDARYORDERSHIFT);
|
2003-04-23 04:47:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
ucol_tertiaryOrder (int32_t order)
|
|
|
|
{
|
2008-02-10 20:17:14 +00:00
|
|
|
return (order & UCOL_TERTIARYMASK);
|
2003-04-23 04:47:00 +00:00
|
|
|
}
|
|
|
|
|
2009-03-27 00:37:55 +00:00
|
|
|
|
|
|
|
void ucol_freeOffsetBuffer(collIterate *s) {
|
|
|
|
if (s != NULL && s->offsetBuffer != NULL) {
|
|
|
|
uprv_free(s->offsetBuffer);
|
|
|
|
s->offsetBuffer = NULL;
|
|
|
|
s->offsetBufferSize = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-09-20 01:54:48 +00:00
|
|
|
#endif /* #if !UCONFIG_NO_COLLATION */
|