ICU-2924 rbbi builder, better handling of !!lookAheadHardBreak option

X-SVN-Rev: 13994
This commit is contained in:
Andy Heninger 2003-12-04 02:12:42 +00:00
parent 257637d600
commit 840affc200
8 changed files with 61 additions and 53 deletions

View File

@ -998,10 +998,11 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
UChar32 c = fText->previous32();
// previous character
int32_t result = fText->getIndex();
int32_t lookaheadStatus = 0;//[] = {0, 0, 0, 0, 0};
int32_t lookaheadResult = 0;//[] = {0, 0, 0, 0, 0};
int32_t lookaheadTag = 0;//[] = {0, 0, 0, 0, 0};
/*int32_t lookaheadCount = 0;*/
int32_t lookaheadStatus = 0;
int32_t lookaheadResult = 0;
int32_t lookaheadTag = 0;
UBool lookAheadHardBreak = (statetable->fFlags & RBBI_LOOKAHEAD_HARD_BREAK) != 0;
RBBIStateTableRow *row;
row = (RBBIStateTableRow *)
@ -1081,7 +1082,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
/// syn wee todo hard coded for line breaks stuff
/// needs to provide a tag in rules to ensure a stop.
if (fData->fLookAheadHardBreak == TRUE) {
if (lookAheadHardBreak) {
fText->setIndex(result);
return result;
}

View File

@ -107,15 +107,6 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
fRefCount = 1;
/// todo: maybe add this formally to the builder
UnicodeString hardbreak = UNICODE_STRING_SIMPLE("!!lookAheadHardBreak");
if (fRuleString.indexOf(hardbreak) >= 0) {
fLookAheadHardBreak = TRUE;
}
else {
fLookAheadHardBreak = FALSE;
}
#ifdef RBBI_DEBUG
char *debugEnv = getenv("U_RBBIDEBUG");
if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
@ -356,37 +347,44 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
uprv_memset(outBytes, 0, length);
}
// Forward state table. Two int32_t vars at the start, then all int16_ts.
//
// Each state table begins with several 32 bit fields. Calculate the size
// in bytes of these.
//
RBBIStateTable *stp = NULL;
int32_t topSize = (char *)stp->fTableData - (char *)stp;
// Forward state table.
tableStartOffset = ds->readUInt32(rbbiDH->fFTable);
tableLength = ds->readUInt32(rbbiDH->fFTableLen);
ds->swapArray32(ds, inBytes+tableStartOffset, 8,
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+8, tableLength-8,
outBytes+tableStartOffset+8, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
// Reverse state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fRTable);
tableLength = ds->readUInt32(rbbiDH->fRTableLen);
ds->swapArray32(ds, inBytes+tableStartOffset, 8,
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+8, tableLength-8,
outBytes+tableStartOffset+8, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
// Safe Forward state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fSFTable);
tableLength = ds->readUInt32(rbbiDH->fSFTableLen);
ds->swapArray32(ds, inBytes+tableStartOffset, 8,
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+8, tableLength-8,
outBytes+tableStartOffset+8, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
// Safe Reverse state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fSRTable);
tableLength = ds->readUInt32(rbbiDH->fSRTableLen);
ds->swapArray32(ds, inBytes+tableStartOffset, 8,
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+8, tableLength-8,
outBytes+tableStartOffset+8, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
// Trie table for character categories
utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),

View File

@ -53,9 +53,9 @@ ubrk_swap(const UDataSwapper *ds,
U_NAMESPACE_BEGIN
/* */
/* The following structs map exactly onto the raw data from ICU common data file. */
/* */
/*
* The following structs map exactly onto the raw data from ICU common data file.
*/
struct RBBIDataHeader {
uint32_t fMagic; /* == 0xbla0 */
uint32_t fVersion; /* == 1 */
@ -108,13 +108,19 @@ struct RBBIStateTableRow {
struct RBBIStateTable {
uint32_t fNumStates; /* Number of states. */
uint32_t fRowLen; /* Length of a state table row, in bytes. */
char fTableData[4]; /* First RBBIStateTableRow begins here. */
/* (making it char[] simplifies ugly address */
uint32_t fNumStates; /* Number of states. */
uint32_t fRowLen; /* Length of a state table row, in bytes. */
uint32_t fFlags; /* Option Flags for this state table */
uint32_t fReserved; /* reserved */
char fTableData[4]; /* First RBBIStateTableRow begins here. */
/* (making it char[] simplifies ugly address */
/* arithmetic for indexing variable length rows.) */
};
typedef enum {
RBBI_LOOKAHEAD_HARD_BREAK = 1
} RBBIStateTableFlags;
/* */
/* The reference counting wrapper class */
@ -145,14 +151,6 @@ public:
const UChar *fRuleSource;
UTrie fTrie;
/* if fLookAheadHardBreak is true, we will break at the first lookahead match */
/* the search does not go on further to look for a longer match */
/* this also allows breaks at both ends of the string */
/* e.g. rule "ABC / D; ABCDE" and */
/* text "ABCD ABCDE ABC" will give breaks at */
/* 01234567890123 */
/* {0, 3, 4, 5, 8, 9, 10, 11, 14} */
UBool fLookAheadHardBreak;
private:
int32_t fRefCount;

View File

@ -66,6 +66,7 @@ RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
fSafeRevTables = NULL;
fChainRules = FALSE;
fLBCMNoChain = FALSE;
fLookAheadHardBreak = FALSE;
UErrorCode oldstatus = status;

View File

@ -134,6 +134,10 @@ public:
UBool fLBCMNoChain; // True: suppress chaining of rules on
// chars with LineBreak property == CM.
UBool fLookAheadHardBreak; // True: Look ahead matches cause an
// immediate break, no continuing for the
// longest match.
RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
UVector *fUSetNodes; // Vector of all uset nodes.

View File

@ -32,6 +32,7 @@
#include "rbbirb.h"
#include "rbbinode.h"
#include "rbbiscan.h"
#include "rbbitblb.h"
#include "uassert.h"
@ -473,17 +474,15 @@ UBool RBBIRuleScanner::doParseActions(EParseAction action)
} else if (opt == "LBCMNoChain") {
fRB->fLBCMNoChain = TRUE;
} else if (opt == "forward") {
fRB->fDefaultTree = &fRB->fForwardTree;
fRB->fDefaultTree = &fRB->fForwardTree;
} else if (opt == "reverse") {
fRB->fDefaultTree = &fRB->fReverseTree;
fRB->fDefaultTree = &fRB->fReverseTree;
} else if (opt == "safe_forward") {
fRB->fDefaultTree = &fRB->fSafeFwdTree;
fRB->fDefaultTree = &fRB->fSafeFwdTree;
} else if (opt == "safe_reverse") {
fRB->fDefaultTree = &fRB->fSafeRevTree;
fRB->fDefaultTree = &fRB->fSafeRevTree;
} else if (opt == "lookAheadHardBreak") {
// at the moment do nothing for this
// the code is handled in rbbi.cpp
// todo: think about how to handle this
fRB->fLookAheadHardBreak = TRUE;
} else {
error(U_BRK_UNRECOGNIZED_OPTION);
}

View File

@ -25,10 +25,10 @@ U_NAMESPACE_BEGIN
RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode) :
fTree(*rootNode) {
fRB = rb;
fStatus = fRB->fStatus;
UErrorCode status = U_ZERO_ERROR;
fDStates = new UVector(status);
fRB = rb;
fStatus = fRB->fStatus;
UErrorCode status = U_ZERO_ERROR;
fDStates = new UVector(status);
if (U_FAILURE(*fStatus)) {
return;
}
@ -805,6 +805,11 @@ void RBBITableBuilder::exportTable(void *where) {
table->fRowLen = sizeof(RBBIStateTableRow) +
sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2);
table->fNumStates = fDStates->size();
table->fFlags = 0;
if (fRB->fLookAheadHardBreak) {
table->fFlags |= RBBI_LOOKAHEAD_HARD_BREAK;
}
table->fReserved = 0;
for (state=0; state<table->fNumStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);

View File

@ -44,6 +44,7 @@ public:
// Sufficient memory must exist at
// the specified location.
private:
void calcNullable(RBBINode *n);
void calcFirstPos(RBBINode *n);
@ -76,6 +77,7 @@ private:
// Index is state number
// Contents are RBBIStateDescriptor pointers.
RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class
RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class
};