/* ******************************************************************************* * Copyright (C) 2010, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: bytetrieiterator.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2010nov03 * created by: Markus W. Scherer */ #ifndef __BYTETRIEITERATOR_H__ #define __BYTETRIEITERATOR_H__ /** * \file * \brief C++ API: ByteTrie iterator for all of its (byte sequence, value) pairs. */ // Needed if and when we change the .dat package index to a ByteTrie, // so that icupkg can work with an input package. #include "unicode/utypes.h" #include "unicode/stringpiece.h" #include "bytetrie.h" #include "charstr.h" #include "uvectr32.h" U_NAMESPACE_BEGIN /** * Iterator for all of the (byte sequence, value) pairs in a ByteTrie. */ class U_TOOLUTIL_API ByteTrieIterator : public UMemory { public: /** * Iterates from the root of a byte-serialized ByteTrie. * @param trieBytes The trie bytes. * @param maxStringLength If 0, the iterator returns full strings/byte sequences. * Otherwise, the iterator returns strings with this maximum length. * @param errorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) */ ByteTrieIterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode); /** * Iterates from the current state of the specified ByteTrie. * @param trie The trie whose state will be copied for iteration. * @param maxStringLength If 0, the iterator returns full strings/byte sequences. * Otherwise, the iterator returns strings with this maximum length. * @param errorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) */ ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); /** * Resets this iterator to its initial state. */ ByteTrieIterator &reset(); /** * Finds the next (byte sequence, value) pair if there is one. * * If the byte sequence is truncated to the maximum length and does not * have a real value, then the value is set to -1. * In this case, this "not a real value" is indistinguishable from * a real value of -1. * @return TRUE if there is another element. */ UBool next(UErrorCode &errorCode); /** * @return TRUE if there are more elements. */ UBool hasNext() const { return pos_!=NULL || !stack_.isEmpty(); } /** * @return the NUL-terminated byte sequence for the last successful next() */ const StringPiece &getString() const { return sp_; } /** * @return the value for the last successful next() */ int32_t getValue() const { return value_; } private: UBool truncateAndStop() { pos_=NULL; value_=-1; // no real value for str sp_.set(str_.data(), str_.length()); return TRUE; } const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode); const uint8_t *bytes_; const uint8_t *pos_; const uint8_t *initialPos_; int32_t remainingMatchLength_; int32_t initialRemainingMatchLength_; CharString str_; StringPiece sp_; int32_t maxLength_; int32_t value_; // The stack stores pairs of integers for backtracking to another // outbound edge of a branch node. // The first integer is an offset from ByteTrie.bytes. // The second integer has the str.length() from before the node in bits 15..0, // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.) // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24, // but the code looks more confusing that way.) UVector32 stack_; }; U_NAMESPACE_END #endif // __BYTETRIEITERATOR_H__