scuffed-code/icu4c/source/common/rbbisetb.h
2002-06-29 00:04:16 +00:00

129 lines
4.7 KiB
C++

//
// rbbisetb.h
/*
**********************************************************************
* Copyright (c) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#ifndef RBBISETB_H
#define RBBISETB_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "rbbirb.h"
#include "uvector.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
//
// RBBISetBuilder Derives the character categories used by the runtime RBBI engine
// from the Unicode Sets appearing in the source RBBI rules, and
// creates the TRIE table used to map from Unicode to the
// character categories.
//
//
// RangeDescriptor
//
// Each of the non-overlapping character ranges gets one of these descriptors.
// All of them are strung together in a linked list, which is kept in order
// (by character)
//
struct RangeDescriptor {
UChar32 fStartChar; // Start of range, unicode 32 bit value.
UChar32 fEndChar; // End of range, unicode 32 bit value.
int32_t fNum; // runtime-mapped input value for this range.
UVector *fIncludesSets; // vector of the the original
// Unicode sets that include this range.
// (Contains ptrs to uset nodes)
RangeDescriptor *fNext; // Next RangeDescriptor in the linked list.
RangeDescriptor(UErrorCode &status);
RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
~RangeDescriptor();
void split(UChar32 where, UErrorCode &status); // Spit this range in two at "where", with
// where appearing in the second (higher) part.
void setDictionaryFlag(); // Check whether this range appears as part of
// the Unicode set named "dictionary"
};
//
// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules.
//
// Starting with the rules parse tree from the scanner,
//
// - Enumerate the set of UnicodeSets that are referenced
// by the RBBI rules.
// - compute a derived set of non-overlapping UnicodeSets
// that will correspond to columns in the state table for
// the RBBI execution engine.
// - construct the trie table that maps input characters
// to set numbers in the non-overlapping set of sets.
//
class RBBISetBuilder : public UObject {
public:
RBBISetBuilder(RBBIRuleBuilder *rb);
~RBBISetBuilder();
void build(); // TODO: needs an out parameter for the TRIE.
void addValToSets(UVector *sets, uint32_t val);
int32_t getNumCharCategories(); // CharCategories are the same as input symbol set to the
// runtime state machine, which are the same as
// columns in the DFA state table
int32_t getTrieSize(); // Size in bytes of the serialized Trie.
void serializeTrie(uint8_t *where); // write out the serialized Trie.
void printSets();
void printRanges();
void printRangeGroups();
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*
* @draft ICU 2.2
*/
virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*
* @draft ICU 2.2
*/
static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
private:
void numberSets();
RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us.
UErrorCode *fStatus;
RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
UNewTrie *fTrie; // The mapping TRIE that is the end result of processin
uint32_t fTrieSize; // the Unicode Sets.
// Groups correspond to character categories -
// groups of ranges that are in the same original UnicodeSets.
// fGroupCount is the index of the last used group.
// The value is also the number of columns in the RBBI state table being compiled.
// Index 0 is not used. Funny counting.
int32_t fGroupCount;
/**
* The address of this static class variable serves as this class's ID
* for ICU "poor man's RTTI".
*/
static const char fgClassID;
};
U_NAMESPACE_END
#endif