2002-06-25 17:23:07 +00:00
|
|
|
// file: rbbidata.h
|
|
|
|
//
|
|
|
|
//**********************************************************************
|
|
|
|
// Copyright (C) 1999 IBM Corp. All rights reserved.
|
|
|
|
//**********************************************************************
|
|
|
|
//
|
|
|
|
// RBBI data formats Includes
|
|
|
|
//
|
|
|
|
// Structs that describes the format of the Binary RBBI data,
|
|
|
|
// as it is stored in ICU's data file.
|
|
|
|
//
|
|
|
|
// RBBIDataWrapper - Instances of this class sit between the
|
|
|
|
// raw data structs and the RulesBasedBreakIterator objects
|
|
|
|
// that are created by applications. The wrapper class
|
|
|
|
// provides reference counting for the underlying data,
|
|
|
|
// and direct pointers to data that would not otherwise
|
|
|
|
// be accessible without ugly pointer arithmetic. The
|
|
|
|
// wrapper does not attempt to provide any higher level
|
|
|
|
// abstractions for the data itself.
|
|
|
|
//
|
|
|
|
// There will be only one instance of RBBIDataWrapper for any
|
|
|
|
// set of RBBI run time data being shared by instances
|
|
|
|
// (clones) of RulesBasedBreakIterator.
|
|
|
|
//
|
|
|
|
|
|
|
|
#ifndef __RBBIDATA_H__
|
|
|
|
#define __RBBIDATA_H__
|
|
|
|
|
2002-06-27 01:19:20 +00:00
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/uobject.h"
|
2002-06-25 17:23:07 +00:00
|
|
|
#include "unicode/unistr.h"
|
|
|
|
#include "unicode/udata.h"
|
|
|
|
#include "utrie.h"
|
|
|
|
|
|
|
|
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
//
|
|
|
|
// The following structs map exactly onto the raw data from ICU common data file.
|
|
|
|
//
|
|
|
|
struct RBBIDataHeader {
|
|
|
|
uint32_t fMagic; // == 0xbla0
|
|
|
|
uint32_t fVersion; // == 1
|
|
|
|
uint32_t fLength; // Total length in bytes of this RBBI Data,
|
|
|
|
// including all sections, not just the header.
|
|
|
|
uint32_t fCatCount; // Number of character categories.
|
|
|
|
|
|
|
|
//
|
|
|
|
// Offsets and sizes of each of the subsections within the RBBI data.
|
|
|
|
// All offsets are bytes from the start of the RBBIDataHeader.
|
|
|
|
// All sizes are in bytes.
|
|
|
|
//
|
|
|
|
uint32_t fFTable; // forward state transition table.
|
|
|
|
uint32_t fFTableLen;
|
|
|
|
uint32_t fRTable; // Offset to the reverse state transition table.
|
|
|
|
uint32_t fRTableLen;
|
2003-11-05 02:03:44 +00:00
|
|
|
uint32_t fSFTable; // safe point forward transition table
|
|
|
|
uint32_t fSFTableLen;
|
|
|
|
uint32_t fSRTable; // safe point reverse transition table
|
|
|
|
uint32_t fSRTableLen;
|
2002-06-25 17:23:07 +00:00
|
|
|
uint32_t fTrie; // Offset to Trie data for character categories
|
|
|
|
uint32_t fTrieLen;
|
|
|
|
uint32_t fRuleSource; // Offset to the source for for the break
|
|
|
|
uint32_t fRuleSourceLen; // rules. Stored UChar *.
|
|
|
|
|
|
|
|
uint32_t fReserved[8]; // Reserved for expansion
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct RBBIStateTableRow {
|
|
|
|
int16_t fAccepting; // Non-zero if this row is for an accepting state.
|
|
|
|
// Value is the {nnn} value to return to calling
|
|
|
|
// application.
|
|
|
|
int16_t fLookAhead; // Non-zero if this row is for a state that
|
|
|
|
// corresponds to a '/' in the rule source.
|
|
|
|
// Value is the same as the fAccepting
|
|
|
|
// value for the rule (which will appear
|
|
|
|
// in a different state.
|
|
|
|
int16_t fTag; // Non-zero if this row covers a {tagged} position
|
|
|
|
// from a rule. value is the tag number.
|
|
|
|
int16_t fReserved;
|
|
|
|
uint16_t fNextState[2]; // Next State, indexed by char category.
|
|
|
|
// Array Size is fNumCols from the
|
|
|
|
// state table header.
|
|
|
|
// CAUTION: see RBBITableBuilder::getTableSize()
|
|
|
|
// before changing anything here.
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct RBBIStateTable {
|
|
|
|
uint32_t fNumStates; // Number of states.
|
|
|
|
uint32_t fRowLen; // Length of a state table row, in bytes.
|
|
|
|
char fTableData[4]; // First RBBIStateTableRow begins here.
|
|
|
|
// (making it char[] simplifies ugly address
|
|
|
|
// arithmetic for indexing variable length rows.)
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// The reference counting wrapper class
|
|
|
|
//
|
2002-10-04 01:23:34 +00:00
|
|
|
class RBBIDataWrapper : public UMemory {
|
2002-06-25 17:23:07 +00:00
|
|
|
public:
|
|
|
|
RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
|
|
|
|
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
|
|
|
|
~RBBIDataWrapper();
|
|
|
|
|
|
|
|
void init(const RBBIDataHeader *data, UErrorCode &status);
|
|
|
|
RBBIDataWrapper *addReference();
|
|
|
|
void removeReference();
|
|
|
|
UBool operator ==(const RBBIDataWrapper &other) const;
|
|
|
|
int32_t hashCode();
|
|
|
|
const UnicodeString &getRuleSourceString();
|
|
|
|
void printData();
|
2003-11-05 02:03:44 +00:00
|
|
|
void printTable(const char *heading, const RBBIStateTable *table);
|
2002-06-25 17:23:07 +00:00
|
|
|
|
|
|
|
//
|
|
|
|
// Pointers to items within the data
|
|
|
|
//
|
|
|
|
const RBBIDataHeader *fHeader;
|
|
|
|
const RBBIStateTable *fForwardTable;
|
|
|
|
const RBBIStateTable *fReverseTable;
|
2003-11-05 02:03:44 +00:00
|
|
|
const RBBIStateTable *fSafeFwdTable;
|
|
|
|
const RBBIStateTable *fSafeRevTable;
|
2002-06-25 17:23:07 +00:00
|
|
|
const UChar *fRuleSource;
|
|
|
|
|
|
|
|
UTrie fTrie;
|
2003-11-07 22:49:38 +00:00
|
|
|
// if fLookAheadHardBreak is true, we will break at the first lookahead match
|
|
|
|
// the search does not go on further to look for a longer match
|
|
|
|
// this also allows breaks at both ends of the string
|
|
|
|
// e.g. rule "ABC / D; ABCDE" and
|
|
|
|
// text "ABCD ABCDE ABC" will give breaks at
|
|
|
|
// 01234567890123
|
|
|
|
// {0, 3, 4, 5, 8, 9, 10, 11, 14}
|
|
|
|
UBool fLookAheadHardBreak;
|
2002-06-25 17:23:07 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
int32_t fRefCount;
|
|
|
|
UDataMemory *fUDataMem;
|
|
|
|
UnicodeString fRuleString;
|
|
|
|
|
2002-10-04 01:23:34 +00:00
|
|
|
RBBIDataWrapper(const RBBIDataWrapper &other); // forbid copying of this class
|
|
|
|
RBBIDataWrapper &operator=(const RBBIDataWrapper &other); // forbid copying of this class
|
2002-06-25 17:23:07 +00:00
|
|
|
};
|
|
|
|
|
2003-09-29 17:24:15 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Swap RBBI data. See udataswp.h.
|
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
ubrk_swap(const UDataSwapper *ds,
|
|
|
|
const void *inData, int32_t length, void *outData,
|
|
|
|
UErrorCode *pErrorCode);
|
|
|
|
|
|
|
|
|
2002-06-25 17:23:07 +00:00
|
|
|
U_NAMESPACE_END
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|