ICU-8167 make trie iterators nested classes inside their tries
X-SVN-Rev: 29272
This commit is contained in:
parent
3518ad81e3
commit
397d6f7372
@ -85,7 +85,7 @@ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
|
||||
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
|
||||
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
|
||||
ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \
|
||||
bytestream.o stringpiece.o bytestrie.o \
|
||||
bytestream.o stringpiece.o bytestrie.o bytestrieiterator.o \
|
||||
ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
|
||||
utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
|
||||
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
|
||||
|
@ -21,6 +21,7 @@
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "uassert.h"
|
||||
#include "ustringtrie.h"
|
||||
@ -29,7 +30,8 @@ U_NAMESPACE_BEGIN
|
||||
|
||||
class ByteSink;
|
||||
class BytesTrieBuilder;
|
||||
class BytesTrieIterator;
|
||||
class CharString;
|
||||
class UVector32;
|
||||
|
||||
/**
|
||||
* Light-weight, non-const reader class for a BytesTrie.
|
||||
@ -166,9 +168,95 @@ public:
|
||||
*/
|
||||
int32_t getNextBytes(ByteSink &out) const;
|
||||
|
||||
/**
|
||||
* Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
|
||||
*/
|
||||
class Iterator : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Iterates from the root of a byte-serialized BytesTrie.
|
||||
* @param trieBytes The trie bytes.
|
||||
* @param maxStringLength If 0, the iterator returns full strings/byte sequences.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
Iterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Iterates from the current state of the specified BytesTrie.
|
||||
* @param trie The trie whose state will be copied for iteration.
|
||||
* @param maxStringLength If 0, the iterator returns full strings/byte sequences.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
Iterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
~Iterator();
|
||||
|
||||
/**
|
||||
* Resets this iterator to its initial state.
|
||||
*/
|
||||
Iterator &reset();
|
||||
|
||||
/**
|
||||
* @return TRUE if there are more elements.
|
||||
*/
|
||||
UBool hasNext() const;
|
||||
|
||||
/**
|
||||
* Finds the next (byte sequence, value) pair if there is one.
|
||||
*
|
||||
* If the byte sequence is truncated to the maximum length and does not
|
||||
* have a real value, then the value is set to -1.
|
||||
* In this case, this "not a real value" is indistinguishable from
|
||||
* a real value of -1.
|
||||
* @return TRUE if there is another element.
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return The NUL-terminated byte sequence for the last successful next().
|
||||
*/
|
||||
const StringPiece &getString() const { return sp_; }
|
||||
/**
|
||||
* @return The value for the last successful next().
|
||||
*/
|
||||
int32_t getValue() const { return value_; }
|
||||
|
||||
private:
|
||||
UBool truncateAndStop();
|
||||
|
||||
const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode);
|
||||
|
||||
const uint8_t *bytes_;
|
||||
const uint8_t *pos_;
|
||||
const uint8_t *initialPos_;
|
||||
int32_t remainingMatchLength_;
|
||||
int32_t initialRemainingMatchLength_;
|
||||
|
||||
CharString *str_;
|
||||
StringPiece sp_;
|
||||
int32_t maxLength_;
|
||||
int32_t value_;
|
||||
|
||||
// The stack stores pairs of integers for backtracking to another
|
||||
// outbound edge of a branch node.
|
||||
// The first integer is an offset from BytesTrie.bytes.
|
||||
// The second integer has the str_->length() from before the node in bits 15..0,
|
||||
// and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
|
||||
// (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
|
||||
// but the code looks more confusing that way.)
|
||||
UVector32 *stack_;
|
||||
};
|
||||
|
||||
private:
|
||||
friend class BytesTrieBuilder;
|
||||
friend class BytesTrieIterator;
|
||||
|
||||
inline void stop() {
|
||||
pos_=NULL;
|
||||
|
@ -15,25 +15,51 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "bytestrie.h"
|
||||
#include "bytestrieiterator.h"
|
||||
#include "charstr.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
BytesTrieIterator::BytesTrieIterator(const void *trieBytes, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
: bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
|
||||
pos_(bytes_), initialPos_(bytes_),
|
||||
remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
|
||||
maxLength_(maxStringLength), value_(0), stack_(errorCode) {}
|
||||
str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
// str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
|
||||
// a public API header for which we would want it to depend only on
|
||||
// other public headers.
|
||||
// Unlike BytesTrie itself, its Iterator performs memory allocations anyway
|
||||
// via the CharString and UVector32 implementations, so this additional
|
||||
// cost is minimal.
|
||||
str_=new CharString();
|
||||
stack_=new UVector32(errorCode);
|
||||
if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
BytesTrieIterator::BytesTrieIterator(const BytesTrie &trie, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
: bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
|
||||
remainingMatchLength_(trie.remainingMatchLength_),
|
||||
initialRemainingMatchLength_(trie.remainingMatchLength_),
|
||||
maxLength_(maxStringLength), value_(0), stack_(errorCode) {
|
||||
str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
str_=new CharString();
|
||||
stack_=new UVector32(errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
if(str_==NULL || stack_==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
if(length>=0) {
|
||||
// Pending linear-match node, append remaining bytes to str.
|
||||
@ -41,43 +67,52 @@ BytesTrieIterator::BytesTrieIterator(const BytesTrie &trie, int32_t maxStringLen
|
||||
if(maxLength_>0 && length>maxLength_) {
|
||||
length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
|
||||
}
|
||||
str_.append(reinterpret_cast<const char *>(pos_), length, errorCode);
|
||||
str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
|
||||
pos_+=length;
|
||||
remainingMatchLength_-=length;
|
||||
}
|
||||
}
|
||||
|
||||
BytesTrieIterator &BytesTrieIterator::reset() {
|
||||
BytesTrie::Iterator::~Iterator() {
|
||||
delete str_;
|
||||
delete stack_;
|
||||
}
|
||||
|
||||
BytesTrie::Iterator &
|
||||
BytesTrie::Iterator::reset() {
|
||||
pos_=initialPos_;
|
||||
remainingMatchLength_=initialRemainingMatchLength_;
|
||||
int32_t length=remainingMatchLength_+1; // Remaining match length.
|
||||
if(maxLength_>0 && length>maxLength_) {
|
||||
length=maxLength_;
|
||||
}
|
||||
str_.truncate(length);
|
||||
str_->truncate(length);
|
||||
pos_+=length;
|
||||
remainingMatchLength_-=length;
|
||||
stack_.setSize(0);
|
||||
stack_->setSize(0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
UBool
|
||||
BytesTrieIterator::next(UErrorCode &errorCode) {
|
||||
BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
|
||||
|
||||
UBool
|
||||
BytesTrie::Iterator::next(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
const uint8_t *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
if(stack_.isEmpty()) {
|
||||
if(stack_->isEmpty()) {
|
||||
return FALSE;
|
||||
}
|
||||
// Pop the state off the stack and continue with the next outbound edge of
|
||||
// the branch node.
|
||||
int32_t stackSize=stack_.size();
|
||||
int32_t length=stack_.elementAti(stackSize-1);
|
||||
pos=bytes_+stack_.elementAti(stackSize-2);
|
||||
stack_.setSize(stackSize-2);
|
||||
str_.truncate(length&0xffff);
|
||||
int32_t stackSize=stack_->size();
|
||||
int32_t length=stack_->elementAti(stackSize-1);
|
||||
pos=bytes_+stack_->elementAti(stackSize-2);
|
||||
stack_->setSize(stackSize-2);
|
||||
str_->truncate(length&0xffff);
|
||||
length=(int32_t)((uint32_t)length>>16);
|
||||
if(length>1) {
|
||||
pos=branchNext(pos, length, errorCode);
|
||||
@ -85,7 +120,7 @@ BytesTrieIterator::next(UErrorCode &errorCode) {
|
||||
return TRUE; // Reached a final value.
|
||||
}
|
||||
} else {
|
||||
str_.append((char)*pos++, errorCode);
|
||||
str_->append((char)*pos++, errorCode);
|
||||
}
|
||||
}
|
||||
if(remainingMatchLength_>=0) {
|
||||
@ -99,15 +134,15 @@ BytesTrieIterator::next(UErrorCode &errorCode) {
|
||||
// Deliver value for the byte sequence so far.
|
||||
UBool isFinal=(UBool)(node&BytesTrie::kValueIsFinal);
|
||||
value_=BytesTrie::readValue(pos, node>>1);
|
||||
if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
|
||||
if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
|
||||
pos_=NULL;
|
||||
} else {
|
||||
pos_=BytesTrie::skipValue(pos, node);
|
||||
}
|
||||
sp_.set(str_.data(), str_.length());
|
||||
sp_.set(str_->data(), str_->length());
|
||||
return TRUE;
|
||||
}
|
||||
if(maxLength_>0 && str_.length()==maxLength_) {
|
||||
if(maxLength_>0 && str_->length()==maxLength_) {
|
||||
return truncateAndStop();
|
||||
}
|
||||
if(node<BytesTrie::kMinLinearMatch) {
|
||||
@ -121,25 +156,33 @@ BytesTrieIterator::next(UErrorCode &errorCode) {
|
||||
} else {
|
||||
// Linear-match node, append length bytes to str_.
|
||||
int32_t length=node-BytesTrie::kMinLinearMatch+1;
|
||||
if(maxLength_>0 && str_.length()+length>maxLength_) {
|
||||
str_.append(reinterpret_cast<const char *>(pos),
|
||||
maxLength_-str_.length(), errorCode);
|
||||
if(maxLength_>0 && str_->length()+length>maxLength_) {
|
||||
str_->append(reinterpret_cast<const char *>(pos),
|
||||
maxLength_-str_->length(), errorCode);
|
||||
return truncateAndStop();
|
||||
}
|
||||
str_.append(reinterpret_cast<const char *>(pos), length, errorCode);
|
||||
str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
|
||||
pos+=length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
BytesTrie::Iterator::truncateAndStop() {
|
||||
pos_=NULL;
|
||||
value_=-1; // no real value for str
|
||||
sp_.set(str_->data(), str_->length());
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
// Branch node, needs to take the first outbound edge and push state for the rest.
|
||||
const uint8_t *
|
||||
BytesTrieIterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
|
||||
BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
|
||||
while(length>BytesTrie::kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison byte
|
||||
// Push state for the greater-or-equal edge.
|
||||
stack_.addElement((int32_t)(BytesTrie::skipDelta(pos)-bytes_), errorCode);
|
||||
stack_.addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
|
||||
stack_->addElement((int32_t)(BytesTrie::skipDelta(pos)-bytes_), errorCode);
|
||||
stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
|
||||
// Follow the less-than edge.
|
||||
length>>=1;
|
||||
pos=BytesTrie::jumpByDelta(pos);
|
||||
@ -151,12 +194,12 @@ BytesTrieIterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &er
|
||||
UBool isFinal=(UBool)(node&BytesTrie::kValueIsFinal);
|
||||
int32_t value=BytesTrie::readValue(pos, node>>1);
|
||||
pos=BytesTrie::skipValue(pos, node);
|
||||
stack_.addElement((int32_t)(pos-bytes_), errorCode);
|
||||
stack_.addElement(((length-1)<<16)|str_.length(), errorCode);
|
||||
str_.append((char)trieByte, errorCode);
|
||||
stack_->addElement((int32_t)(pos-bytes_), errorCode);
|
||||
stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
|
||||
str_->append((char)trieByte, errorCode);
|
||||
if(isFinal) {
|
||||
pos_=NULL;
|
||||
sp_.set(str_.data(), str_.length());
|
||||
sp_.set(str_->data(), str_->length());
|
||||
value_=value;
|
||||
return NULL;
|
||||
} else {
|
@ -401,6 +401,7 @@
|
||||
<ClCompile Include="usprep.cpp" />
|
||||
<ClCompile Include="bytestream.cpp" />
|
||||
<ClCompile Include="bytestrie.cpp" />
|
||||
<ClCompile Include="bytestrieiterator.cpp" />
|
||||
<ClCompile Include="chariter.cpp" />
|
||||
<ClCompile Include="charstr.cpp" />
|
||||
<ClCompile Include="cstring.c" />
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "bytestrie.h"
|
||||
#include "bytestriebuilder.h"
|
||||
#include "bytestrieiterator.h"
|
||||
#include "intltest.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
@ -63,7 +62,7 @@ public:
|
||||
void checkNextWithState(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextString(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(BytesTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
};
|
||||
|
||||
extern IntlTest *createBytesTrieTest() {
|
||||
@ -392,8 +391,8 @@ void BytesTrieTest::TestIteratorFromBranch() {
|
||||
trie.next('a');
|
||||
trie.next('n');
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
|
||||
BytesTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
|
||||
BytesTrie::Iterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
|
||||
@ -445,8 +444,8 @@ void BytesTrieTest::TestIteratorFromLinearMatch() {
|
||||
trie.next('u');
|
||||
trie.next('a');
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
|
||||
BytesTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
|
||||
BytesTrie::Iterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
|
||||
@ -468,8 +467,8 @@ void BytesTrieTest::TestTruncatingIteratorFromRoot() {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
|
||||
BytesTrieIterator iter(sp.data(), 4, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
|
||||
BytesTrie::Iterator iter(sp.data(), 4, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
|
||||
@ -525,8 +524,8 @@ void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
||||
trie.next('b');
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
|
||||
// Truncate within the linear-match node.
|
||||
BytesTrieIterator iter(trie, 2, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
|
||||
BytesTrie::Iterator iter(trie, 2, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
static const StringAndValue expected[]={
|
||||
@ -556,8 +555,8 @@ void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
||||
trie.next('c');
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
|
||||
// Truncate after the linear-match node.
|
||||
BytesTrieIterator iter(trie, 3, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trie) constructor")) {
|
||||
BytesTrie::Iterator iter(trie, 3, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
static const StringAndValue expected[]={
|
||||
@ -798,14 +797,14 @@ void BytesTrieTest::checkNextString(const StringPiece &trieBytes,
|
||||
void BytesTrieTest::checkIterator(const StringPiece &trieBytes,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
||||
BytesTrieIterator iter(trieBytes.data(), 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrieIterator(trieBytes) constructor")) {
|
||||
BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trieBytes) constructor")) {
|
||||
return;
|
||||
}
|
||||
checkIterator(iter, data, dataLength);
|
||||
}
|
||||
|
||||
void BytesTrieTest::checkIterator(BytesTrieIterator &iter,
|
||||
void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "unicode/uniset.h"
|
||||
#include "ucharstrie.h"
|
||||
#include "ucharstriebuilder.h"
|
||||
#include "ucharstrieiterator.h"
|
||||
#include "intltest.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
@ -69,7 +68,7 @@ public:
|
||||
void checkNextWithState(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkNextString(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(const UnicodeString &trieUChars, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(UCharsTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
|
||||
};
|
||||
|
||||
extern IntlTest *createUCharsTrieTest() {
|
||||
@ -560,8 +559,8 @@ void UCharsTrieTest::TestIteratorFromBranch() {
|
||||
trie.next(u_a);
|
||||
trie.next(u_n);
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
|
||||
UCharsTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
|
||||
UCharsTrie::Iterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
|
||||
@ -613,8 +612,8 @@ void UCharsTrieTest::TestIteratorFromLinearMatch() {
|
||||
trie.next(u_u);
|
||||
trie.next(u_a);
|
||||
IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
|
||||
UCharsTrieIterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
|
||||
UCharsTrie::Iterator iter(trie, 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the suffixes
|
||||
@ -636,8 +635,8 @@ void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
|
||||
return; // buildTrie() reported an error
|
||||
}
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
|
||||
UCharsTrieIterator iter(trieUChars.getBuffer(), 4, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
|
||||
UCharsTrie::Iterator iter(trieUChars.getBuffer(), 4, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
// Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
|
||||
@ -693,8 +692,8 @@ void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
|
||||
trie.next(u_b);
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
|
||||
// Truncate within the linear-match node.
|
||||
UCharsTrieIterator iter(trie, 2, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
|
||||
UCharsTrie::Iterator iter(trie, 2, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
static const StringAndValue expected[]={
|
||||
@ -724,8 +723,8 @@ void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
|
||||
trie.next(u_c);
|
||||
IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
|
||||
// Truncate after the linear-match node.
|
||||
UCharsTrieIterator iter(trie, 3, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trie) constructor")) {
|
||||
UCharsTrie::Iterator iter(trie, 3, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
|
||||
return;
|
||||
}
|
||||
static const StringAndValue expected[]={
|
||||
@ -989,14 +988,14 @@ void UCharsTrieTest::checkNextString(const UnicodeString &trieUChars,
|
||||
void UCharsTrieTest::checkIterator(const UnicodeString &trieUChars,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
||||
UCharsTrieIterator iter(trieUChars.getBuffer(), 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrieIterator(trieUChars) constructor")) {
|
||||
UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
|
||||
if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
|
||||
return;
|
||||
}
|
||||
checkIterator(iter, data, dataLength);
|
||||
}
|
||||
|
||||
void UCharsTrieTest::checkIterator(UCharsTrieIterator &iter,
|
||||
void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
|
||||
const StringAndValue data[], int32_t dataLength) {
|
||||
IcuTestErrorCode errorCode(*this, "checkIterator()");
|
||||
for(int32_t i=0; i<dataLength; ++i) {
|
||||
|
@ -52,7 +52,7 @@ LDFLAGS += $(LDFLAGSICUTOOLUTIL)
|
||||
LIBS = $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS)
|
||||
|
||||
OBJECTS = filestrm.o package.o pkgitems.o swapimpl.o toolutil.o unewdata.o \
|
||||
stringtriebuilder.o bytestriebuilder.o bytestrieiterator.o \
|
||||
stringtriebuilder.o bytestriebuilder.o \
|
||||
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
|
||||
denseranges.o \
|
||||
ucm.o ucmstate.o uoptions.o uparse.o \
|
||||
|
@ -1,126 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: bytestrieiterator.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010nov03
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __BYTESTRIEITERATOR_H__
|
||||
#define __BYTESTRIEITERATOR_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: BytesTrie iterator for all of its (byte sequence, value) pairs.
|
||||
*/
|
||||
|
||||
// Needed if and when we change the .dat package index to a BytesTrie,
|
||||
// so that icupkg can work with an input package.
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "bytestrie.h"
|
||||
#include "charstr.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
|
||||
*/
|
||||
class U_TOOLUTIL_API BytesTrieIterator : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Iterates from the root of a byte-serialized BytesTrie.
|
||||
* @param trieBytes The trie bytes.
|
||||
* @param maxStringLength If 0, the iterator returns full strings/byte sequences.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
BytesTrieIterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Iterates from the current state of the specified BytesTrie.
|
||||
* @param trie The trie whose state will be copied for iteration.
|
||||
* @param maxStringLength If 0, the iterator returns full strings/byte sequences.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
BytesTrieIterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Resets this iterator to its initial state.
|
||||
*/
|
||||
BytesTrieIterator &reset();
|
||||
|
||||
/**
|
||||
* Finds the next (byte sequence, value) pair if there is one.
|
||||
*
|
||||
* If the byte sequence is truncated to the maximum length and does not
|
||||
* have a real value, then the value is set to -1.
|
||||
* In this case, this "not a real value" is indistinguishable from
|
||||
* a real value of -1.
|
||||
* @return TRUE if there is another element.
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return TRUE if there are more elements.
|
||||
*/
|
||||
UBool hasNext() const { return pos_!=NULL || !stack_.isEmpty(); }
|
||||
|
||||
/**
|
||||
* @return the NUL-terminated byte sequence for the last successful next()
|
||||
*/
|
||||
const StringPiece &getString() const { return sp_; }
|
||||
/**
|
||||
* @return the value for the last successful next()
|
||||
*/
|
||||
int32_t getValue() const { return value_; }
|
||||
|
||||
private:
|
||||
UBool truncateAndStop() {
|
||||
pos_=NULL;
|
||||
value_=-1; // no real value for str
|
||||
sp_.set(str_.data(), str_.length());
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode);
|
||||
|
||||
const uint8_t *bytes_;
|
||||
const uint8_t *pos_;
|
||||
const uint8_t *initialPos_;
|
||||
int32_t remainingMatchLength_;
|
||||
int32_t initialRemainingMatchLength_;
|
||||
|
||||
CharString str_;
|
||||
StringPiece sp_;
|
||||
int32_t maxLength_;
|
||||
int32_t value_;
|
||||
|
||||
// The stack stores pairs of integers for backtracking to another
|
||||
// outbound edge of a branch node.
|
||||
// The first integer is an offset from BytesTrie.bytes.
|
||||
// The second integer has the str.length() from before the node in bits 15..0,
|
||||
// and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
|
||||
// (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
|
||||
// but the code looks more confusing that way.)
|
||||
UVector32 stack_;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __BYTESTRIEITERATOR_H__
|
@ -247,7 +247,6 @@
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="bytestriebuilder.cpp" />
|
||||
<ClCompile Include="bytestrieiterator.cpp" />
|
||||
<ClCompile Include="denseranges.cpp" />
|
||||
<ClCompile Include="filestrm.c" />
|
||||
<ClCompile Include="filetools.cpp" />
|
||||
@ -297,7 +296,6 @@
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="bytestriebuilder.h" />
|
||||
<ClInclude Include="bytestrieiterator.h" />
|
||||
<ClInclude Include="denseranges.h" />
|
||||
<ClInclude Include="filestrm.h" />
|
||||
<ClInclude Include="filetools.h" />
|
||||
@ -314,7 +312,6 @@
|
||||
<ClInclude Include="ucbuf.h" />
|
||||
<ClInclude Include="ucharstrie.h" />
|
||||
<ClInclude Include="ucharstriebuilder.h" />
|
||||
<ClInclude Include="ucharstrieiterator.h" />
|
||||
<ClInclude Include="ucm.h" />
|
||||
<ClInclude Include="unewdata.h" />
|
||||
<ClInclude Include="uoptions.h" />
|
||||
|
@ -22,6 +22,7 @@
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "uassert.h"
|
||||
#include "ustringtrie.h"
|
||||
@ -29,7 +30,7 @@
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UCharsTrieBuilder;
|
||||
class UCharsTrieIterator;
|
||||
class UVector32;
|
||||
|
||||
/**
|
||||
* Base class for objects to which Unicode characters and strings can be appended.
|
||||
@ -229,9 +230,99 @@ public:
|
||||
*/
|
||||
int32_t getNextUChars(Appendable &out) const;
|
||||
|
||||
/**
|
||||
* Iterator for all of the (string, value) pairs in a UCharsTrie.
|
||||
*/
|
||||
class Iterator : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Iterates from the root of a UChar-serialized UCharsTrie.
|
||||
* @param trieUChars The trie UChars.
|
||||
* @param maxStringLength If 0, the iterator returns full strings.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
Iterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Iterates from the current state of the specified UCharsTrie.
|
||||
* @param trie The trie whose state will be copied for iteration.
|
||||
* @param maxStringLength If 0, the iterator returns full strings.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
~Iterator();
|
||||
|
||||
/**
|
||||
* Resets this iterator to its initial state.
|
||||
*/
|
||||
Iterator &reset();
|
||||
|
||||
/**
|
||||
* @return TRUE if there are more elements.
|
||||
*/
|
||||
UBool hasNext() const;
|
||||
|
||||
/**
|
||||
* Finds the next (string, value) pair if there is one.
|
||||
*
|
||||
* If the string is truncated to the maximum length and does not
|
||||
* have a real value, then the value is set to -1.
|
||||
* In this case, this "not a real value" is indistinguishable from
|
||||
* a real value of -1.
|
||||
* @return TRUE if there is another element.
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return The string for the last successful next().
|
||||
*/
|
||||
const UnicodeString &getString() const { return str_; }
|
||||
/**
|
||||
* @return The value for the last successful next().
|
||||
*/
|
||||
int32_t getValue() const { return value_; }
|
||||
|
||||
private:
|
||||
UBool truncateAndStop() {
|
||||
pos_=NULL;
|
||||
value_=-1; // no real value for str
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
const UChar *branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode);
|
||||
|
||||
const UChar *uchars_;
|
||||
const UChar *pos_;
|
||||
const UChar *initialPos_;
|
||||
int32_t remainingMatchLength_;
|
||||
int32_t initialRemainingMatchLength_;
|
||||
UBool skipValue_; // Skip intermediate value which was already delivered.
|
||||
|
||||
UnicodeString str_;
|
||||
int32_t maxLength_;
|
||||
int32_t value_;
|
||||
|
||||
// The stack stores pairs of integers for backtracking to another
|
||||
// outbound edge of a branch node.
|
||||
// The first integer is an offset from ByteTrie.bytes.
|
||||
// The second integer has the str_.length() from before the node in bits 15..0,
|
||||
// and the remaining branch length in bits 31..16.
|
||||
// (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit,
|
||||
// but the code looks more confusing that way.)
|
||||
UVector32 *stack_;
|
||||
};
|
||||
|
||||
private:
|
||||
friend class UCharsTrieBuilder;
|
||||
friend class UCharsTrieIterator;
|
||||
|
||||
inline void stop() {
|
||||
pos_=NULL;
|
||||
|
@ -15,26 +15,50 @@
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "ucharstrie.h"
|
||||
#include "ucharstrieiterator.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UCharsTrieIterator::UCharsTrieIterator(const UChar *trieUChars, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
: uchars_(trieUChars),
|
||||
pos_(uchars_), initialPos_(uchars_),
|
||||
remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
|
||||
skipValue_(FALSE),
|
||||
maxLength_(maxStringLength), value_(0), stack_(errorCode) {}
|
||||
maxLength_(maxStringLength), value_(0), stack_(NULL) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
// stack_ is a pointer so that it's easy to turn ucharstrie.h into
|
||||
// a public API header for which we would want it to depend only on
|
||||
// other public headers.
|
||||
// Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
|
||||
// via the UnicodeString and UVector32 implementations, so this additional
|
||||
// cost is minimal.
|
||||
stack_=new UVector32(errorCode);
|
||||
if(stack_==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
UCharsTrieIterator::UCharsTrieIterator(const UCharsTrie &trie, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
|
||||
UErrorCode &errorCode)
|
||||
: uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
|
||||
remainingMatchLength_(trie.remainingMatchLength_),
|
||||
initialRemainingMatchLength_(trie.remainingMatchLength_),
|
||||
skipValue_(FALSE),
|
||||
maxLength_(maxStringLength), value_(0), stack_(errorCode) {
|
||||
maxLength_(maxStringLength), value_(0), stack_(NULL) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
stack_=new UVector32(errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
if(stack_==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
if(length>=0) {
|
||||
// Pending linear-match node, append remaining UChars to str.
|
||||
@ -48,7 +72,12 @@ UCharsTrieIterator::UCharsTrieIterator(const UCharsTrie &trie, int32_t maxString
|
||||
}
|
||||
}
|
||||
|
||||
UCharsTrieIterator &UCharsTrieIterator::reset() {
|
||||
UCharsTrie::Iterator::~Iterator() {
|
||||
delete stack_;
|
||||
}
|
||||
|
||||
UCharsTrie::Iterator &
|
||||
UCharsTrie::Iterator::reset() {
|
||||
pos_=initialPos_;
|
||||
remainingMatchLength_=initialRemainingMatchLength_;
|
||||
skipValue_=FALSE;
|
||||
@ -59,26 +88,29 @@ UCharsTrieIterator &UCharsTrieIterator::reset() {
|
||||
str_.truncate(length);
|
||||
pos_+=length;
|
||||
remainingMatchLength_-=length;
|
||||
stack_.setSize(0);
|
||||
stack_->setSize(0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
UBool
|
||||
UCharsTrieIterator::next(UErrorCode &errorCode) {
|
||||
UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
|
||||
|
||||
UBool
|
||||
UCharsTrie::Iterator::next(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *pos=pos_;
|
||||
if(pos==NULL) {
|
||||
if(stack_.isEmpty()) {
|
||||
if(stack_->isEmpty()) {
|
||||
return FALSE;
|
||||
}
|
||||
// Pop the state off the stack and continue with the next outbound edge of
|
||||
// the branch node.
|
||||
int32_t stackSize=stack_.size();
|
||||
int32_t length=stack_.elementAti(stackSize-1);
|
||||
pos=uchars_+stack_.elementAti(stackSize-2);
|
||||
stack_.setSize(stackSize-2);
|
||||
int32_t stackSize=stack_->size();
|
||||
int32_t length=stack_->elementAti(stackSize-1);
|
||||
pos=uchars_+stack_->elementAti(stackSize-2);
|
||||
stack_->setSize(stackSize-2);
|
||||
str_.truncate(length&0xffff);
|
||||
length=(int32_t)((uint32_t)length>>16);
|
||||
if(length>1) {
|
||||
@ -149,12 +181,12 @@ UCharsTrieIterator::next(UErrorCode &errorCode) {
|
||||
|
||||
// Branch node, needs to take the first outbound edge and push state for the rest.
|
||||
const UChar *
|
||||
UCharsTrieIterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
|
||||
UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
|
||||
while(length>UCharsTrie::kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison unit
|
||||
// Push state for the greater-or-equal edge.
|
||||
stack_.addElement((int32_t)(UCharsTrie::skipDelta(pos)-uchars_), errorCode);
|
||||
stack_.addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
|
||||
stack_->addElement((int32_t)(UCharsTrie::skipDelta(pos)-uchars_), errorCode);
|
||||
stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
|
||||
// Follow the less-than edge.
|
||||
length>>=1;
|
||||
pos=UCharsTrie::jumpByDelta(pos);
|
||||
@ -166,8 +198,8 @@ UCharsTrieIterator::branchNext(const UChar *pos, int32_t length, UErrorCode &err
|
||||
UBool isFinal=(UBool)(node>>15);
|
||||
int32_t value=UCharsTrie::readValue(pos, node&=0x7fff);
|
||||
pos=UCharsTrie::skipValue(pos, node);
|
||||
stack_.addElement((int32_t)(pos-uchars_), errorCode);
|
||||
stack_.addElement(((length-1)<<16)|str_.length(), errorCode);
|
||||
stack_->addElement((int32_t)(pos-uchars_), errorCode);
|
||||
stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
|
||||
str_.append(trieUnit);
|
||||
if(isFinal) {
|
||||
pos_=NULL;
|
||||
|
@ -1,121 +0,0 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: ucharstrieiterator.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2010nov15
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UCHARSTRIEITERATOR_H__
|
||||
#define __UCHARSTRIEITERATOR_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: UCharsTrie iterator for all of its (string, value) pairs.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "ucharstrie.h"
|
||||
#include "uvectr32.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Iterator for all of the (string, value) pairs in a UCharsTrie.
|
||||
*/
|
||||
class U_TOOLUTIL_API UCharsTrieIterator : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Iterates from the root of a UChar-serialized UCharsTrie.
|
||||
* @param trieUChars The trie UChars.
|
||||
* @param maxStringLength If 0, the iterator returns full strings.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
UCharsTrieIterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Iterates from the current state of the specified UCharsTrie.
|
||||
* @param trie The trie whose state will be copied for iteration.
|
||||
* @param maxStringLength If 0, the iterator returns full strings.
|
||||
* Otherwise, the iterator returns strings with this maximum length.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
*/
|
||||
UCharsTrieIterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Resets this iterator to its initial state.
|
||||
*/
|
||||
UCharsTrieIterator &reset();
|
||||
|
||||
/**
|
||||
* Finds the next (string, value) pair if there is one.
|
||||
*
|
||||
* If the string is truncated to the maximum length and does not
|
||||
* have a real value, then the value is set to -1.
|
||||
* In this case, this "not a real value" is indistinguishable from
|
||||
* a real value of -1.
|
||||
* @return TRUE if there is another element.
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return TRUE if there are more elements.
|
||||
*/
|
||||
UBool hasNext() const { return pos_!=NULL || !stack_.isEmpty(); }
|
||||
|
||||
/**
|
||||
* @return the NUL-terminated string for the last successful next()
|
||||
*/
|
||||
const UnicodeString &getString() const { return str_; }
|
||||
/**
|
||||
* @return the value for the last successful next()
|
||||
*/
|
||||
int32_t getValue() const { return value_; }
|
||||
|
||||
private:
|
||||
UBool truncateAndStop() {
|
||||
pos_=NULL;
|
||||
value_=-1; // no real value for str
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
const UChar *branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode);
|
||||
|
||||
const UChar *uchars_;
|
||||
const UChar *pos_;
|
||||
const UChar *initialPos_;
|
||||
int32_t remainingMatchLength_;
|
||||
int32_t initialRemainingMatchLength_;
|
||||
UBool skipValue_; // Skip intermediate value which was already delivered.
|
||||
|
||||
UnicodeString str_;
|
||||
int32_t maxLength_;
|
||||
int32_t value_;
|
||||
|
||||
// The stack stores pairs of integers for backtracking to another
|
||||
// outbound edge of a branch node.
|
||||
// The first integer is an offset from ByteTrie.bytes.
|
||||
// The second integer has the str.length() from before the node in bits 15..0,
|
||||
// and the remaining branch length in bits 31..16.
|
||||
// (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit,
|
||||
// but the code looks more confusing that way.)
|
||||
UVector32 stack_;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __UCHARSTRIEITERATOR_H__
|
Loading…
Reference in New Issue
Block a user