8e6698de87
X-SVN-Rev: 4237
305 lines
10 KiB
C++
305 lines
10 KiB
C++
/*
|
||
*******************************************************************************
|
||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||
* others. All Rights Reserved. *
|
||
*******************************************************************************
|
||
*/
|
||
|
||
/*
|
||
* File coleitr.cpp
|
||
*
|
||
*
|
||
*
|
||
* Created by: Helena Shih
|
||
*
|
||
* Modification History:
|
||
*
|
||
* Date Name Description
|
||
*
|
||
* 6/23/97 helena Adding comments to make code more readable.
|
||
* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
|
||
* 12/10/99 aliu Ported Thai collation support from Java.
|
||
* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
|
||
* 02/19/01 swquek Removed CollationElementsIterator() since it is
|
||
* private constructor and no calls are made to it
|
||
*/
|
||
|
||
#include "unicode/coleitr.h"
|
||
#include "ucol_imp.h"
|
||
#include "cmemory.h"
|
||
|
||
|
||
/* Constants --------------------------------------------------------------- */
|
||
|
||
/* synwee : public can't remove */
|
||
int32_t const CollationElementIterator::NULLORDER = 0xffffffff;
|
||
|
||
/* CollationElementIterator public constructor/destructor ------------------ */
|
||
|
||
CollationElementIterator::CollationElementIterator(
|
||
const CollationElementIterator& other)
|
||
: isDataOwned_(TRUE)
|
||
{
|
||
UErrorCode status = U_ZERO_ERROR;
|
||
m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, &status);
|
||
*this = other;
|
||
}
|
||
|
||
CollationElementIterator::~CollationElementIterator()
|
||
{
|
||
if (isDataOwned_) {
|
||
ucol_closeElements(m_data_);
|
||
}
|
||
}
|
||
|
||
/* CollationElementIterator public methods --------------------------------- */
|
||
|
||
UTextOffset CollationElementIterator::getOffset() const
|
||
{
|
||
return ucol_getOffset(m_data_);
|
||
}
|
||
|
||
/**
|
||
* Get the ordering priority of the next character in the string.
|
||
* @return the next character's ordering. Returns NULLORDER if an error has
|
||
* occured or if the end of string has been reached
|
||
*/
|
||
int32_t CollationElementIterator::next(UErrorCode& status)
|
||
{
|
||
return ucol_next(m_data_, &status);
|
||
}
|
||
|
||
UBool CollationElementIterator::operator!=(
|
||
const CollationElementIterator& other) const
|
||
{
|
||
return !(*this == other);
|
||
}
|
||
|
||
UBool CollationElementIterator::operator==(
|
||
const CollationElementIterator& that) const
|
||
{
|
||
if (this == &that)
|
||
return TRUE;
|
||
|
||
if (m_data_ == that.m_data_)
|
||
return TRUE;
|
||
|
||
return (this->m_data_->normalization_ == that.m_data_->normalization_ &&
|
||
this->m_data_->length_ == that.m_data_->length_ &&
|
||
this->m_data_->reset_ == that.m_data_->reset_ &&
|
||
uprv_memcmp(this->m_data_->iteratordata_.string,
|
||
that.m_data_->iteratordata_.string,
|
||
this->m_data_->length_) == 0 &&
|
||
this->getOffset() == that.getOffset() &&
|
||
this->m_data_->iteratordata_.isThai == that.m_data_->iteratordata_.isThai &&
|
||
this->m_data_->iteratordata_.coll == that.m_data_->iteratordata_.coll);
|
||
}
|
||
|
||
/**
|
||
* Get the ordering priority of the previous collation element in the string.
|
||
* @param status the error code status.
|
||
* @return the previous element's ordering. Returns NULLORDER if an error has
|
||
* occured or if the start of string has been reached.
|
||
*/
|
||
int32_t CollationElementIterator::previous(UErrorCode& status)
|
||
{
|
||
return ucol_previous(m_data_, &status);
|
||
}
|
||
|
||
/**
|
||
* Resets the cursor to the beginning of the string.
|
||
*/
|
||
void CollationElementIterator::reset()
|
||
{
|
||
ucol_reset(m_data_);
|
||
}
|
||
|
||
void CollationElementIterator::setOffset(UTextOffset newOffset,
|
||
UErrorCode& status)
|
||
{
|
||
ucol_setOffset(m_data_, newOffset, &status);
|
||
}
|
||
|
||
/**
|
||
* Sets the source to the new source string.
|
||
*/
|
||
void CollationElementIterator::setText(const UnicodeString& source,
|
||
UErrorCode& status)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return;
|
||
int32_t length = source.length();
|
||
UChar *string = new UChar[length];
|
||
source.extract(0, length, string);
|
||
|
||
m_data_->length_ = length;
|
||
|
||
if (m_data_->iteratordata_.isWritable &&
|
||
m_data_->iteratordata_.string != NULL)
|
||
uprv_free(m_data_->iteratordata_.string);
|
||
init_collIterate(m_data_->iteratordata_.coll, string, length, &m_data_->iteratordata_, TRUE);
|
||
}
|
||
|
||
// Sets the source to the new character iterator.
|
||
void CollationElementIterator::setText(CharacterIterator& source,
|
||
UErrorCode& status)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return;
|
||
|
||
int32_t length = source.getLength();
|
||
UChar *buffer = new UChar[length];
|
||
/*
|
||
Using this constructor will prevent buffer from being removed when
|
||
string gets removed
|
||
*/
|
||
UnicodeString string;
|
||
source.getText(string);
|
||
string.extract(0, length, buffer);
|
||
m_data_->length_ = length;
|
||
|
||
if (m_data_->iteratordata_.isWritable &&
|
||
m_data_->iteratordata_.string != NULL)
|
||
uprv_free(m_data_->iteratordata_.string);
|
||
init_collIterate(m_data_->iteratordata_.coll, buffer, length, &m_data_->iteratordata_, TRUE);
|
||
}
|
||
|
||
int32_t CollationElementIterator::strengthOrder(int32_t order) const
|
||
{
|
||
UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
|
||
// Mask off the unwanted differences.
|
||
if (s == UCOL_PRIMARY)
|
||
order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
|
||
else
|
||
if (s == UCOL_SECONDARY)
|
||
order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
|
||
|
||
return order;
|
||
}
|
||
|
||
/* CollationElementIterator private constructors/destructors --------------- */
|
||
|
||
/*
|
||
This private method will never be called, but it makes the linker happy
|
||
CollationElementIterator::CollationElementIterator() : m_data_(0)
|
||
{
|
||
}
|
||
*/
|
||
|
||
CollationElementIterator::CollationElementIterator(
|
||
const RuleBasedCollator* order)
|
||
: isDataOwned_(TRUE)
|
||
{
|
||
UErrorCode status = U_ZERO_ERROR;
|
||
m_data_ = ucol_openElements(order->ucollator, NULL, 0, &status);
|
||
}
|
||
|
||
/**
|
||
* This is the "real" constructor for this class; it constructs an iterator
|
||
* over the source text using the specified collator
|
||
*/
|
||
CollationElementIterator::CollationElementIterator(
|
||
const UnicodeString& sourceText,
|
||
const RuleBasedCollator* order,
|
||
UErrorCode& status)
|
||
: isDataOwned_(TRUE)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return;
|
||
|
||
int32_t length = sourceText.length();
|
||
UChar *string = new UChar[length];
|
||
/*
|
||
Using this constructor will prevent buffer from being removed when
|
||
string gets removed
|
||
*/
|
||
sourceText.extract(0, length, string);
|
||
|
||
m_data_ = ucol_openElements(order->ucollator, string, length, &status);
|
||
m_data_->iteratordata_.isWritable = TRUE;
|
||
}
|
||
|
||
/**
|
||
* This is the "real" constructor for this class; it constructs an iterator over
|
||
* the source text using the specified collator
|
||
*/
|
||
CollationElementIterator::CollationElementIterator(
|
||
const CharacterIterator& sourceText,
|
||
const RuleBasedCollator* order,
|
||
UErrorCode& status)
|
||
: isDataOwned_(TRUE)
|
||
{
|
||
if (U_FAILURE(status))
|
||
return;
|
||
|
||
// **** should I just drop this test? ****
|
||
/*
|
||
if ( sourceText.endIndex() != 0 )
|
||
{
|
||
// A CollationElementIterator is really a two-layered beast.
|
||
// Internally it uses a Normalizer to munge the source text into a form
|
||
// where all "composed" Unicode characters (such as <20>) are split into a
|
||
// normal character and a combining accent character.
|
||
// Afterward, CollationElementIterator does its own processing to handle
|
||
// expanding and contracting collation sequences, ignorables, and so on.
|
||
|
||
Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
|
||
? Normalizer::NO_OP : order->getDecomposition();
|
||
|
||
text = new Normalizer(sourceText, decomp);
|
||
if (text == NULL)
|
||
status = U_MEMORY_ALLOCATION_ERROR;
|
||
}
|
||
*/
|
||
int32_t length = sourceText.getLength();
|
||
UChar *buffer = new UChar[length];
|
||
/*
|
||
Using this constructor will prevent buffer from being removed when
|
||
string gets removed
|
||
*/
|
||
UnicodeString string(buffer, length, length);
|
||
((CharacterIterator &)sourceText).getText(string);
|
||
string.extract(0, length, buffer);
|
||
|
||
m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
|
||
m_data_->iteratordata_.isWritable = TRUE;
|
||
}
|
||
|
||
/* CollationElementIterator private methods -------------------------------- */
|
||
|
||
const CollationElementIterator& CollationElementIterator::operator=(
|
||
const CollationElementIterator& other)
|
||
{
|
||
if (this != &other)
|
||
{
|
||
this->m_data_->normalization_ = other.m_data_->normalization_;
|
||
this->m_data_->length_ = other.m_data_->length_;
|
||
this->m_data_->reset_ = other.m_data_->reset_;
|
||
|
||
|
||
this->m_data_->iteratordata_.string = other.m_data_->iteratordata_.string;
|
||
this->m_data_->iteratordata_.start = other.m_data_->iteratordata_.start;
|
||
this->m_data_->iteratordata_.len = other.m_data_->iteratordata_.len;
|
||
this->m_data_->iteratordata_.pos = other.m_data_->iteratordata_.pos;
|
||
this->m_data_->iteratordata_.toReturn = other.m_data_->iteratordata_.CEs +
|
||
(other.m_data_->iteratordata_.toReturn - other.m_data_->iteratordata_.CEs);
|
||
this->m_data_->iteratordata_.CEpos = other.m_data_->iteratordata_.CEs +
|
||
(other.m_data_->iteratordata_.CEpos - other.m_data_->iteratordata_.CEs);
|
||
uprv_memcpy(this->m_data_->iteratordata_.CEs, other.m_data_->iteratordata_.CEs,
|
||
UCOL_EXPAND_CE_BUFFER_SIZE * sizeof(uint32_t));
|
||
this->m_data_->iteratordata_.isThai = other.m_data_->iteratordata_.isThai;
|
||
this->m_data_->iteratordata_.isWritable = other.m_data_->iteratordata_.isWritable;
|
||
|
||
uprv_memcpy(this->m_data_->iteratordata_.stackWritableBuffer,
|
||
other.m_data_->iteratordata_.stackWritableBuffer,
|
||
UCOL_WRITABLE_BUFFER_SIZE * sizeof(UChar));
|
||
/* writablebuffer is not used at the moment, not used */
|
||
this->m_data_->iteratordata_.coll = other.m_data_->iteratordata_.coll;
|
||
this->isDataOwned_ = FALSE;
|
||
}
|
||
|
||
return *this;
|
||
}
|
||
|
||
|