2000-01-08 02:05:05 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
|
|
|
* Copyright (C) 1999 IBM Corp. All rights reserved.
|
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 11/11/99 rgillam Complete port from Java.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef RBBI_TBL_H
|
|
|
|
#define RBBI_TBL_H
|
|
|
|
|
2000-01-11 00:46:58 +00:00
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/unistr.h"
|
2000-01-08 02:05:05 +00:00
|
|
|
#include "unicode/brkiter.h"
|
2000-07-12 05:01:53 +00:00
|
|
|
#include "unicode/udata.h"
|
2000-01-08 02:05:05 +00:00
|
|
|
#include "filestrm.h"
|
2000-01-18 19:57:46 +00:00
|
|
|
|
2000-04-19 22:13:47 +00:00
|
|
|
U_CDECL_BEGIN
|
|
|
|
#ifndef UCMP8_H
|
|
|
|
typedef struct _CompactByteArray CompactByteArray;
|
|
|
|
#endif
|
|
|
|
U_CDECL_END
|
|
|
|
|
2000-01-18 19:57:46 +00:00
|
|
|
/* forward declarations */
|
|
|
|
class RuleBasedBreakIterator;
|
|
|
|
class DictionaryBasedBreakIterator;
|
|
|
|
|
2000-01-08 02:05:05 +00:00
|
|
|
/**
|
|
|
|
* This class contains the internal static tables that are used by the
|
|
|
|
* RuleBasedBreakIterator. Once created, these tables are immutable,
|
|
|
|
* so they can be shared among all break iterators using a particular
|
|
|
|
* set of rules. This class uses a reference-counting scheme to
|
|
|
|
* manage the sharing.
|
|
|
|
*
|
|
|
|
* @author Richard Gillam
|
|
|
|
*/
|
|
|
|
class RuleBasedBreakIteratorTables {
|
|
|
|
|
|
|
|
private:
|
|
|
|
/**
|
|
|
|
* The number of RuleBasedBreakIterators using this object.
|
|
|
|
*/
|
|
|
|
int16_t refCount;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
/**
|
|
|
|
* Whether or not we own the storage for the tables (the tables may be
|
|
|
|
* stored in a memory-mapped file)
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool ownTables;
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
/**
|
|
|
|
* The textual description that was used to create these tables
|
|
|
|
*/
|
|
|
|
UnicodeString description;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A table that indexes from character values to character category numbers
|
|
|
|
*/
|
|
|
|
CompactByteArray* charCategoryTable;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The table of state transitions used for forward iteration
|
|
|
|
*/
|
|
|
|
int16_t* stateTable;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The table of state transitions used to sync up the iterator with the
|
|
|
|
* text in backwards and random-access iteration
|
|
|
|
*/
|
|
|
|
int16_t* backwardsStateTable;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A list of flags indicating which states in the state table are accepting
|
|
|
|
* ("end") states
|
|
|
|
*/
|
|
|
|
int8_t* endStates;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A list of flags indicating which states in the state table are
|
|
|
|
* lookahead states (states which turn lookahead on and off)
|
|
|
|
*/
|
|
|
|
int8_t* lookaheadStates;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The number of character categories (and, thus, the number of columns in
|
|
|
|
* the state tables)
|
|
|
|
*/
|
|
|
|
int32_t numCategories;
|
|
|
|
|
|
|
|
//=======================================================================
|
|
|
|
// constructor
|
|
|
|
//=======================================================================
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a tables object, adopting all of the tables that are passed in.
|
|
|
|
*/
|
|
|
|
protected:
|
|
|
|
RuleBasedBreakIteratorTables();
|
|
|
|
|
2000-07-12 05:01:53 +00:00
|
|
|
RuleBasedBreakIteratorTables(UDataMemory* memory);
|
|
|
|
UDataMemory *fMemory;
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
/**
|
|
|
|
* The copy constructor is declared private and is a no-op.
|
|
|
|
* THIS CLASS MAY NOT BE COPIED.
|
|
|
|
*/
|
|
|
|
RuleBasedBreakIteratorTables(const RuleBasedBreakIteratorTables& that);
|
|
|
|
|
|
|
|
//=======================================================================
|
|
|
|
// boilerplate
|
|
|
|
//=======================================================================
|
|
|
|
|
|
|
|
protected:
|
|
|
|
/**
|
|
|
|
* Destructor
|
|
|
|
*/
|
|
|
|
virtual ~RuleBasedBreakIteratorTables();
|
|
|
|
|
|
|
|
private:
|
|
|
|
/**
|
|
|
|
* The assignment operator is declared private and is a no-op.
|
|
|
|
* THIS CLASS MAY NOT BE COPIED.
|
|
|
|
*/
|
|
|
|
RuleBasedBreakIteratorTables& operator=(const RuleBasedBreakIteratorTables& that);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Equality operator. Returns TRUE if both tables objects are of the
|
|
|
|
* same class, have the same behavior, and iterate over the same text.
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
virtual UBool operator==(const RuleBasedBreakIteratorTables& that) const;
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Not-equal operator. If operator== returns TRUE, this returns FALSE,
|
|
|
|
* and vice versa.
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool operator!=(const RuleBasedBreakIteratorTables& that) const;
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute a hash code for these tables
|
|
|
|
* @return A hash code
|
|
|
|
*/
|
2000-01-14 00:13:59 +00:00
|
|
|
virtual int32_t hashCode(void) const;
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the description used to create these tables
|
|
|
|
*/
|
2000-01-14 00:13:59 +00:00
|
|
|
const UnicodeString& getRules(void) const;
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
//=======================================================================
|
|
|
|
// reference counting
|
|
|
|
//=======================================================================
|
|
|
|
|
|
|
|
/**
|
|
|
|
* increments the reference count.
|
|
|
|
*/
|
2000-01-14 00:13:59 +00:00
|
|
|
void addReference(void);
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* decrements the reference count and deletes the object if it reaches zero
|
|
|
|
*/
|
2000-01-14 00:13:59 +00:00
|
|
|
void removeReference(void);
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
protected:
|
|
|
|
//=======================================================================
|
|
|
|
// implementation
|
|
|
|
//=======================================================================
|
|
|
|
/**
|
|
|
|
* Looks up a character's category (i.e., its category for breaking purposes,
|
|
|
|
* not its Unicode category)
|
|
|
|
*/
|
|
|
|
virtual int32_t lookupCategory(UChar c, BreakIterator* bi) const;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Given a current state and a character category, looks up the
|
|
|
|
* next state to transition to in the state table.
|
|
|
|
*/
|
|
|
|
virtual int32_t lookupState(int32_t state, int32_t category) const;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Given a current state and a character category, looks up the
|
|
|
|
* next state to transition to in the backwards state table.
|
|
|
|
*/
|
|
|
|
virtual int32_t lookupBackwardState(int32_t state, int32_t category) const;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true if the specified state is an accepting state.
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
virtual UBool isEndState(int32_t state) const;
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true if the specified state is a lookahead state.
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
virtual UBool isLookaheadState(int32_t state) const;
|
2000-01-08 02:05:05 +00:00
|
|
|
|
|
|
|
friend class RuleBasedBreakIterator;
|
|
|
|
friend class DictionaryBasedBreakIterator;
|
|
|
|
};
|
|
|
|
|
2000-05-18 22:08:39 +00:00
|
|
|
inline UBool
|
2000-01-08 02:05:05 +00:00
|
|
|
RuleBasedBreakIteratorTables::operator!=(const RuleBasedBreakIteratorTables& that) const {
|
|
|
|
return !operator==(that);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline const UnicodeString&
|
2000-01-14 00:13:59 +00:00
|
|
|
RuleBasedBreakIteratorTables::getRules(void) const {
|
2000-01-08 02:05:05 +00:00
|
|
|
return description;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline void
|
2000-01-14 00:13:59 +00:00
|
|
|
RuleBasedBreakIteratorTables::addReference(void) {
|
2000-01-08 02:05:05 +00:00
|
|
|
++refCount;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline void
|
2000-01-14 00:13:59 +00:00
|
|
|
RuleBasedBreakIteratorTables::removeReference(void) {
|
2000-01-08 02:05:05 +00:00
|
|
|
if (--refCount <= 0)
|
|
|
|
delete this;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|