scuffed-code/icu4c/source/i18n/coleitr.cpp

410 lines
13 KiB
C++
Raw Normal View History

1999-08-16 21:50:52 +00:00
/*
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
1999-08-16 21:50:52 +00:00
*/
/*
* File coleitr.cpp
*
*
*
* Created by: Helena Shih
*
* Modification History:
*
* Date Name Description
*
* 6/23/97 helena Adding comments to make code more readable.
* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
* 12/10/99 aliu Ported Thai collation support from Java.
* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
* 02/19/01 swquek Removed CollationElementsIterator() since it is
* private constructor and no calls are made to it
*/
#include "unicode/coleitr.h"
#include "ucol_imp.h"
#include "cmemory.h"
#include "unicode/ustring.h"
1999-08-16 21:50:52 +00:00
/* Constants --------------------------------------------------------------- */
1999-08-16 21:50:52 +00:00
/* synwee : public can't remove */
1999-08-16 21:50:52 +00:00
int32_t const CollationElementIterator::NULLORDER = 0xffffffff;
/* CollationElementIterator public constructor/destructor ------------------ */
1999-08-16 21:50:52 +00:00
CollationElementIterator::CollationElementIterator(
const CollationElementIterator& other)
: isDataOwned_(TRUE)
{
2001-03-15 02:54:01 +00:00
UErrorCode status = U_ZERO_ERROR;
m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
&status);
*this = other;
}
1999-08-16 21:50:52 +00:00
CollationElementIterator::~CollationElementIterator()
1999-08-16 21:50:52 +00:00
{
2001-03-15 02:54:01 +00:00
if (isDataOwned_) {
ucol_closeElements(m_data_);
2001-03-15 02:54:01 +00:00
}
1999-08-16 21:50:52 +00:00
}
/* CollationElementIterator public methods --------------------------------- */
1999-08-16 21:50:52 +00:00
UTextOffset CollationElementIterator::getOffset() const
1999-08-16 21:50:52 +00:00
{
return ucol_getOffset(m_data_);
1999-08-16 21:50:52 +00:00
}
/**
* Get the ordering priority of the next character in the string.
* @return the next character's ordering. Returns NULLORDER if an error has
* occured or if the end of string has been reached
*/
int32_t CollationElementIterator::next(UErrorCode& status)
1999-08-16 21:50:52 +00:00
{
return ucol_next(m_data_, &status);
1999-08-16 21:50:52 +00:00
}
UBool CollationElementIterator::operator!=(
const CollationElementIterator& other) const
1999-08-16 21:50:52 +00:00
{
return !(*this == other);
1999-08-16 21:50:52 +00:00
}
UBool CollationElementIterator::operator==(
const CollationElementIterator& that) const
1999-08-16 21:50:52 +00:00
{
UBool result = TRUE;
if (this == &that) {
return TRUE;
}
if (m_data_ == that.m_data_) {
return TRUE;
}
// option comparison
result = this->m_data_->reset_ == that.m_data_->reset_ &&
this->m_data_->iteratordata_.coll ==
that.m_data_->iteratordata_.coll;
int thislength = 0;
if (this->m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) {
thislength = this->m_data_->iteratordata_.endp -
this->m_data_->iteratordata_.string;
}
else {
thislength = u_strlen(this->m_data_->iteratordata_.string);
}
int thatlength = 0;
if (that.m_data_->iteratordata_.endp != NULL) {
thatlength = that.m_data_->iteratordata_.endp -
that.m_data_->iteratordata_.string;
}
else {
thatlength = u_strlen(that.m_data_->iteratordata_.string);
}
if (thislength != thatlength) {
return FALSE;
}
result = result && (uprv_memcmp(this->m_data_->iteratordata_.string,
that.m_data_->iteratordata_.string,
thislength * sizeof(UChar)) == 0);
result = result && (this->getOffset() == that.getOffset());
return result;
1999-08-16 21:50:52 +00:00
}
/**
* Get the ordering priority of the previous collation element in the string.
* @param status the error code status.
* @return the previous element's ordering. Returns NULLORDER if an error has
* occured or if the start of string has been reached.
*/
int32_t CollationElementIterator::previous(UErrorCode& status)
1999-08-16 21:50:52 +00:00
{
return ucol_previous(m_data_, &status);
1999-08-16 21:50:52 +00:00
}
/**
* Resets the cursor to the beginning of the string.
*/
void CollationElementIterator::reset()
1999-08-16 21:50:52 +00:00
{
ucol_reset(m_data_);
1999-08-16 21:50:52 +00:00
}
void CollationElementIterator::setOffset(UTextOffset newOffset,
UErrorCode& status)
1999-08-16 21:50:52 +00:00
{
ucol_setOffset(m_data_, newOffset, &status);
1999-08-16 21:50:52 +00:00
}
/**
* Sets the source to the new source string.
*/
void CollationElementIterator::setText(const UnicodeString& source,
UErrorCode& status)
1999-08-16 21:50:52 +00:00
{
if (U_FAILURE(status)) {
return;
}
int32_t length = source.length();
UChar *string = NULL;
if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
uprv_free(m_data_->iteratordata_.string);
}
m_data_->isWritable = TRUE;
if (length > 0) {
string = (UChar *)uprv_malloc(sizeof(UChar) * length);
source.extract(0, length, string);
}
else {
string = (UChar *)uprv_malloc(sizeof(UChar));
*string = 0;
}
init_collIterate(m_data_->iteratordata_.coll, string, length,
&m_data_->iteratordata_);
m_data_->reset_ = TRUE;
1999-08-16 21:50:52 +00:00
}
// Sets the source to the new character iterator.
void CollationElementIterator::setText(CharacterIterator& source,
UErrorCode& status)
1999-08-16 21:50:52 +00:00
{
if (U_FAILURE(status))
return;
1999-08-16 21:50:52 +00:00
int32_t length = source.getLength();
UChar *buffer = NULL;
if (length == 0) {
buffer = (UChar *)uprv_malloc(sizeof(UChar));
*buffer = 0;
}
else {
buffer = (UChar *)uprv_malloc(sizeof(UChar) * length);
/*
Using this constructor will prevent buffer from being removed when
string gets removed
*/
UnicodeString string;
source.getText(string);
string.extract(0, length, buffer);
}
if (m_data_->isWritable && m_data_->iteratordata_.string != NULL)
uprv_free(m_data_->iteratordata_.string);
m_data_->isWritable = TRUE;
init_collIterate(m_data_->iteratordata_.coll, buffer, length,
&m_data_->iteratordata_);
m_data_->reset_ = TRUE;
}
1999-08-16 21:50:52 +00:00
int32_t CollationElementIterator::strengthOrder(int32_t order) const
{
2001-03-15 02:54:01 +00:00
UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
// Mask off the unwanted differences.
if (s == UCOL_PRIMARY)
order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
else
if (s == UCOL_SECONDARY)
order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
1999-08-16 21:50:52 +00:00
return order;
}
1999-08-16 21:50:52 +00:00
/* CollationElementIterator private constructors/destructors --------------- */
1999-08-16 21:50:52 +00:00
/**
* This is the "real" constructor for this class; it constructs an iterator
* over the source text using the specified collator
*/
CollationElementIterator::CollationElementIterator(
const UnicodeString& sourceText,
const RuleBasedCollator* order,
UErrorCode& status)
: isDataOwned_(TRUE)
1999-08-16 21:50:52 +00:00
{
if (U_FAILURE(status))
return;
int32_t length = sourceText.length();
UChar *string = NULL;
if (length > 0) {
string = (UChar *)uprv_malloc(sizeof(UChar) * length);
/*
Using this constructor will prevent buffer from being removed when
string gets removed
*/
sourceText.extract(0, length, string);
}
else {
string = (UChar *)uprv_malloc(sizeof(UChar));
*string = 0;
}
m_data_ = ucol_openElements(order->ucollator, string, length, &status);
m_data_->isWritable = TRUE;
1999-08-16 21:50:52 +00:00
}
/**
* This is the "real" constructor for this class; it constructs an iterator over
* the source text using the specified collator
*/
CollationElementIterator::CollationElementIterator(
const CharacterIterator& sourceText,
const RuleBasedCollator* order,
UErrorCode& status)
: isDataOwned_(TRUE)
1999-08-16 21:50:52 +00:00
{
if (U_FAILURE(status))
return;
// **** should I just drop this test? ****
/*
if ( sourceText.endIndex() != 0 )
{
// A CollationElementIterator is really a two-layered beast.
// Internally it uses a Normalizer to munge the source text into a form
// where all "composed" Unicode characters (such as <20>) are split into a
// normal character and a combining accent character.
// Afterward, CollationElementIterator does its own processing to handle
// expanding and contracting collation sequences, ignorables, and so on.
Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
? Normalizer::NO_OP : order->getDecomposition();
text = new Normalizer(sourceText, decomp);
if (text == NULL)
status = U_MEMORY_ALLOCATION_ERROR;
}
*/
int32_t length = sourceText.getLength();
UChar *buffer;
if (length > 0) {
buffer = (UChar *)uprv_malloc(sizeof(UChar) * length);
/*
Using this constructor will prevent buffer from being removed when
string gets removed
*/
UnicodeString string(buffer, length, length);
((CharacterIterator &)sourceText).getText(string);
string.extract(0, length, buffer);
}
else {
buffer = (UChar *)uprv_malloc(sizeof(UChar));
*buffer = 0;
}
m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
m_data_->isWritable = TRUE;
1999-08-16 21:50:52 +00:00
}
/* CollationElementIterator protected methods ----------------------------- */
const CollationElementIterator& CollationElementIterator::operator=(
const CollationElementIterator& other)
1999-08-16 21:50:52 +00:00
{
if (this != &other)
{
UCollationElements *ucolelem = this->m_data_;
UCollationElements *otherucolelem = other.m_data_;
collIterate *coliter = &(ucolelem->iteratordata_);
collIterate *othercoliter = &(otherucolelem->iteratordata_);
int length = 0;
// checking only UCOL_ITER_HASLEN is not enough here as we may be in
// the normalization buffer
if (othercoliter->endp != NULL) {
length = othercoliter->endp - othercoliter->string;
}
else {
if (othercoliter->string == NULL) {
length = 0;
}
else {
length = u_strlen(othercoliter->string);
}
}
ucolelem->reset_ = otherucolelem->reset_;
ucolelem->isWritable = TRUE;
2001-03-15 02:54:01 +00:00
/* create a duplicate of string */
if (length > 0) {
coliter->string = (UChar *)uprv_malloc(length * sizeof(UChar));
uprv_memcpy(coliter->string, othercoliter->string,
length * sizeof(UChar));
}
else {
coliter->string = NULL;
}
/* start and end of string */
coliter->endp = coliter->string + length;
/* handle writable buffer here */
if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
if (wlength < coliter->writableBufSize) {
uprv_memcpy(coliter->stackWritableBuffer,
othercoliter->stackWritableBuffer,
othercoliter->writableBufSize * sizeof(UChar));
}
else {
if (coliter->writableBuffer != coliter->stackWritableBuffer) {
delete coliter->writableBuffer;
}
coliter->writableBuffer = (UChar *)uprv_malloc(
wlength * sizeof(UChar));
uprv_memcpy(coliter->writableBuffer,
othercoliter->writableBuffer,
wlength * sizeof(UChar));
coliter->writableBufSize = wlength;
}
}
/* current position */
if (othercoliter->pos >= othercoliter->string &&
othercoliter->pos <= othercoliter->endp) {
coliter->pos = coliter->string +
(othercoliter->pos - othercoliter->string);
}
else {
coliter->pos = coliter->writableBuffer +
(othercoliter->pos - othercoliter->writableBuffer);
}
/* CE buffer */
uprv_memcpy(coliter->CEs, othercoliter->CEs,
UCOL_EXPAND_CE_BUFFER_SIZE * sizeof(uint32_t));
coliter->toReturn = coliter->CEs +
(othercoliter->toReturn - othercoliter->CEs);
coliter->CEpos = othercoliter->CEs +
(othercoliter->CEpos - othercoliter->CEs);
coliter->fcdPosition = coliter->string +
(othercoliter->fcdPosition - othercoliter->string);
coliter->flags = othercoliter->flags | UCOL_ITER_HASLEN;
coliter->origFlags = othercoliter->origFlags;
coliter->coll = othercoliter->coll;
this->isDataOwned_ = TRUE;
}
1999-08-16 21:50:52 +00:00
return *this;
1999-08-16 21:50:52 +00:00
}