/* ********************************************************************** * Copyright (C) 1999 IBM Corp. All rights reserved. ********************************************************************** * Date Name Description * 11/11/99 rgillam Complete port from Java. ********************************************************************** */ #ifndef RBBI_TBL_H #define RBBI_TBL_H #include "unicode/utypes.h" #include "unicode/unistr.h" #include "unicode/brkiter.h" #include "unicode/udata.h" #include "filestrm.h" U_CDECL_BEGIN #ifndef UCMP8_H typedef struct _CompactByteArray CompactByteArray; #endif U_CDECL_END U_NAMESPACE_BEGIN /* forward declarations */ class RuleBasedBreakIterator; class DictionaryBasedBreakIterator; /** * This class contains the internal static tables that are used by the * RuleBasedBreakIterator. Once created, these tables are immutable, * so they can be shared among all break iterators using a particular * set of rules. This class uses a reference-counting scheme to * manage the sharing. * * @author Richard Gillam */ class RuleBasedBreakIteratorTables { private: /** * The number of RuleBasedBreakIterators using this object. */ int16_t refCount; protected: /** * Whether or not we own the storage for the tables (the tables may be * stored in a memory-mapped file) */ UBool ownTables; private: /** * The textual description that was used to create these tables */ UnicodeString description; /** * A table that indexes from character values to character category numbers */ CompactByteArray* charCategoryTable; /** * The table of state transitions used for forward iteration */ int16_t* stateTable; /** * The table of state transitions used to sync up the iterator with the * text in backwards and random-access iteration */ int16_t* backwardsStateTable; /** * A list of flags indicating which states in the state table are accepting * ("end") states */ int8_t* endStates; /** * A list of flags indicating which states in the state table are * lookahead states (states which turn lookahead on and off) */ int8_t* lookaheadStates; /** * The number of character categories (and, thus, the number of columns in * the state tables) */ int32_t numCategories; //======================================================================= // constructor //======================================================================= /** * Creates a tables object, adopting all of the tables that are passed in. */ protected: RuleBasedBreakIteratorTables(); RuleBasedBreakIteratorTables(UDataMemory* memory); UDataMemory *fMemory; private: /** * The copy constructor is declared private and is a no-op. * THIS CLASS MAY NOT BE COPIED. */ RuleBasedBreakIteratorTables(const RuleBasedBreakIteratorTables& that); //======================================================================= // boilerplate //======================================================================= protected: /** * Destructor */ virtual ~RuleBasedBreakIteratorTables(); private: /** * The assignment operator is declared private and is a no-op. * THIS CLASS MAY NOT BE COPIED. */ RuleBasedBreakIteratorTables& operator=(const RuleBasedBreakIteratorTables& that); /** * Equality operator. Returns TRUE if both tables objects are of the * same class, have the same behavior, and iterate over the same text. */ virtual UBool operator==(const RuleBasedBreakIteratorTables& that) const; /** * Not-equal operator. If operator== returns TRUE, this returns FALSE, * and vice versa. */ UBool operator!=(const RuleBasedBreakIteratorTables& that) const; /** * Compute a hash code for these tables * @return A hash code */ virtual int32_t hashCode(void) const; /** * Returns the description used to create these tables */ const UnicodeString& getRules(void) const; //======================================================================= // reference counting //======================================================================= /** * increments the reference count. */ void addReference(void); /** * decrements the reference count and deletes the object if it reaches zero */ void removeReference(void); protected: //======================================================================= // implementation //======================================================================= /** * Looks up a character's category (i.e., its category for breaking purposes, * not its Unicode category) */ virtual int32_t lookupCategory(UChar c, BreakIterator* bi) const; /** * Given a current state and a character category, looks up the * next state to transition to in the state table. */ virtual int32_t lookupState(int32_t state, int32_t category) const; /** * Given a current state and a character category, looks up the * next state to transition to in the backwards state table. */ virtual int32_t lookupBackwardState(int32_t state, int32_t category) const; /** * Returns true if the specified state is an accepting state. */ virtual UBool isEndState(int32_t state) const; /** * Returns true if the specified state is a lookahead state. */ virtual UBool isLookaheadState(int32_t state) const; #ifdef RBBI_DEBUG // // Print out state table and character classes. // For debugging only. // void debugDumpTables() const; #endif friend class RuleBasedBreakIterator; friend class DictionaryBasedBreakIterator; }; inline UBool RuleBasedBreakIteratorTables::operator!=(const RuleBasedBreakIteratorTables& that) const { return !operator==(that); } inline const UnicodeString& RuleBasedBreakIteratorTables::getRules(void) const { return description; } inline void RuleBasedBreakIteratorTables::addReference(void) { ++refCount; } inline void RuleBasedBreakIteratorTables::removeReference(void) { if (--refCount <= 0) delete this; } U_NAMESPACE_END #endif