// file: rbbidata.h // //********************************************************************** // Copyright (C) 1999 IBM Corp. All rights reserved. //********************************************************************** // // RBBI data formats Includes // // Structs that describes the format of the Binary RBBI data, // as it is stored in ICU's data file. // // RBBIDataWrapper - Instances of this class sit between the // raw data structs and the RulesBasedBreakIterator objects // that are created by applications. The wrapper class // provides reference counting for the underlying data, // and direct pointers to data that would not otherwise // be accessible without ugly pointer arithmetic. The // wrapper does not attempt to provide any higher level // abstractions for the data itself. // // There will be only one instance of RBBIDataWrapper for any // set of RBBI run time data being shared by instances // (clones) of RulesBasedBreakIterator. // #ifndef __RBBIDATA_H__ #define __RBBIDATA_H__ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/unistr.h" #include "unicode/udata.h" #include "utrie.h" U_NAMESPACE_BEGIN // // The following structs map exactly onto the raw data from ICU common data file. // struct RBBIDataHeader { uint32_t fMagic; // == 0xbla0 uint32_t fVersion; // == 1 uint32_t fLength; // Total length in bytes of this RBBI Data, // including all sections, not just the header. uint32_t fCatCount; // Number of character categories. // // Offsets and sizes of each of the subsections within the RBBI data. // All offsets are bytes from the start of the RBBIDataHeader. // All sizes are in bytes. // uint32_t fFTable; // forward state transition table. uint32_t fFTableLen; uint32_t fRTable; // Offset to the reverse state transition table. uint32_t fRTableLen; uint32_t fTrie; // Offset to Trie data for character categories uint32_t fTrieLen; uint32_t fRuleSource; // Offset to the source for for the break uint32_t fRuleSourceLen; // rules. Stored UChar *. uint32_t fReserved[8]; // Reserved for expansion }; struct RBBIStateTableRow { int16_t fAccepting; // Non-zero if this row is for an accepting state. // Value is the {nnn} value to return to calling // application. int16_t fLookAhead; // Non-zero if this row is for a state that // corresponds to a '/' in the rule source. // Value is the same as the fAccepting // value for the rule (which will appear // in a different state. int16_t fTag; // Non-zero if this row covers a {tagged} position // from a rule. value is the tag number. int16_t fReserved; uint16_t fNextState[2]; // Next State, indexed by char category. // Array Size is fNumCols from the // state table header. // CAUTION: see RBBITableBuilder::getTableSize() // before changing anything here. }; struct RBBIStateTable { uint32_t fNumStates; // Number of states. uint32_t fRowLen; // Length of a state table row, in bytes. char fTableData[4]; // First RBBIStateTableRow begins here. // (making it char[] simplifies ugly address // arithmetic for indexing variable length rows.) }; // // The reference counting wrapper class // class RBBIDataWrapper : public UObject { public: RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); RBBIDataWrapper(const RBBIDataWrapper &other); ~RBBIDataWrapper(); void init(const RBBIDataHeader *data, UErrorCode &status); RBBIDataWrapper *addReference(); void removeReference(); UBool operator ==(const RBBIDataWrapper &other) const; int32_t hashCode(); const UnicodeString &getRuleSourceString(); void printData(); // // Pointers to items within the data // const RBBIDataHeader *fHeader; const RBBIStateTable *fForwardTable; const RBBIStateTable *fReverseTable; const UChar *fRuleSource; UTrie fTrie; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. * * @draft ICU 2.2 */ virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); } /** * ICU "poor man's RTTI", returns a UClassID for this class. * * @draft ICU 2.2 */ static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; } private: int32_t fRefCount; UDataMemory *fUDataMem; UnicodeString fRuleString; /** * The address of this static class variable serves as this class's ID * for ICU "poor man's RTTI". */ static const char fgClassID; }; U_NAMESPACE_END #endif