ICU-2924 rbbi builder, better handling of !!lookAheadHardBreak option
X-SVN-Rev: 13994
This commit is contained in:
parent
257637d600
commit
840affc200
@ -998,10 +998,11 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
||||
UChar32 c = fText->previous32();
|
||||
// previous character
|
||||
int32_t result = fText->getIndex();
|
||||
int32_t lookaheadStatus = 0;//[] = {0, 0, 0, 0, 0};
|
||||
int32_t lookaheadResult = 0;//[] = {0, 0, 0, 0, 0};
|
||||
int32_t lookaheadTag = 0;//[] = {0, 0, 0, 0, 0};
|
||||
/*int32_t lookaheadCount = 0;*/
|
||||
int32_t lookaheadStatus = 0;
|
||||
int32_t lookaheadResult = 0;
|
||||
int32_t lookaheadTag = 0;
|
||||
UBool lookAheadHardBreak = (statetable->fFlags & RBBI_LOOKAHEAD_HARD_BREAK) != 0;
|
||||
|
||||
RBBIStateTableRow *row;
|
||||
|
||||
row = (RBBIStateTableRow *)
|
||||
@ -1081,7 +1082,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
||||
/// syn wee todo hard coded for line breaks stuff
|
||||
/// needs to provide a tag in rules to ensure a stop.
|
||||
|
||||
if (fData->fLookAheadHardBreak == TRUE) {
|
||||
if (lookAheadHardBreak) {
|
||||
fText->setIndex(result);
|
||||
return result;
|
||||
}
|
||||
|
@ -107,15 +107,6 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
|
||||
|
||||
fRefCount = 1;
|
||||
|
||||
/// todo: maybe add this formally to the builder
|
||||
UnicodeString hardbreak = UNICODE_STRING_SIMPLE("!!lookAheadHardBreak");
|
||||
if (fRuleString.indexOf(hardbreak) >= 0) {
|
||||
fLookAheadHardBreak = TRUE;
|
||||
}
|
||||
else {
|
||||
fLookAheadHardBreak = FALSE;
|
||||
}
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
char *debugEnv = getenv("U_RBBIDEBUG");
|
||||
if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
|
||||
@ -356,37 +347,44 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
|
||||
uprv_memset(outBytes, 0, length);
|
||||
}
|
||||
|
||||
// Forward state table. Two int32_t vars at the start, then all int16_ts.
|
||||
//
|
||||
// Each state table begins with several 32 bit fields. Calculate the size
|
||||
// in bytes of these.
|
||||
//
|
||||
RBBIStateTable *stp = NULL;
|
||||
int32_t topSize = (char *)stp->fTableData - (char *)stp;
|
||||
|
||||
// Forward state table.
|
||||
tableStartOffset = ds->readUInt32(rbbiDH->fFTable);
|
||||
tableLength = ds->readUInt32(rbbiDH->fFTableLen);
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, 8,
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
|
||||
outBytes+tableStartOffset, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+8, tableLength-8,
|
||||
outBytes+tableStartOffset+8, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
|
||||
outBytes+tableStartOffset+topSize, status);
|
||||
|
||||
// Reverse state table. Same layout as forward table, above.
|
||||
tableStartOffset = ds->readUInt32(rbbiDH->fRTable);
|
||||
tableLength = ds->readUInt32(rbbiDH->fRTableLen);
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, 8,
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
|
||||
outBytes+tableStartOffset, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+8, tableLength-8,
|
||||
outBytes+tableStartOffset+8, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
|
||||
outBytes+tableStartOffset+topSize, status);
|
||||
|
||||
// Safe Forward state table. Same layout as forward table, above.
|
||||
tableStartOffset = ds->readUInt32(rbbiDH->fSFTable);
|
||||
tableLength = ds->readUInt32(rbbiDH->fSFTableLen);
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, 8,
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
|
||||
outBytes+tableStartOffset, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+8, tableLength-8,
|
||||
outBytes+tableStartOffset+8, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
|
||||
outBytes+tableStartOffset+topSize, status);
|
||||
|
||||
// Safe Reverse state table. Same layout as forward table, above.
|
||||
tableStartOffset = ds->readUInt32(rbbiDH->fSRTable);
|
||||
tableLength = ds->readUInt32(rbbiDH->fSRTableLen);
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, 8,
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
|
||||
outBytes+tableStartOffset, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+8, tableLength-8,
|
||||
outBytes+tableStartOffset+8, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
|
||||
outBytes+tableStartOffset+topSize, status);
|
||||
|
||||
// Trie table for character categories
|
||||
utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
|
||||
|
@ -53,9 +53,9 @@ ubrk_swap(const UDataSwapper *ds,
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/* */
|
||||
/* The following structs map exactly onto the raw data from ICU common data file. */
|
||||
/* */
|
||||
/*
|
||||
* The following structs map exactly onto the raw data from ICU common data file.
|
||||
*/
|
||||
struct RBBIDataHeader {
|
||||
uint32_t fMagic; /* == 0xbla0 */
|
||||
uint32_t fVersion; /* == 1 */
|
||||
@ -108,13 +108,19 @@ struct RBBIStateTableRow {
|
||||
|
||||
|
||||
struct RBBIStateTable {
|
||||
uint32_t fNumStates; /* Number of states. */
|
||||
uint32_t fRowLen; /* Length of a state table row, in bytes. */
|
||||
char fTableData[4]; /* First RBBIStateTableRow begins here. */
|
||||
/* (making it char[] simplifies ugly address */
|
||||
uint32_t fNumStates; /* Number of states. */
|
||||
uint32_t fRowLen; /* Length of a state table row, in bytes. */
|
||||
uint32_t fFlags; /* Option Flags for this state table */
|
||||
uint32_t fReserved; /* reserved */
|
||||
char fTableData[4]; /* First RBBIStateTableRow begins here. */
|
||||
/* (making it char[] simplifies ugly address */
|
||||
/* arithmetic for indexing variable length rows.) */
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
RBBI_LOOKAHEAD_HARD_BREAK = 1
|
||||
} RBBIStateTableFlags;
|
||||
|
||||
|
||||
/* */
|
||||
/* The reference counting wrapper class */
|
||||
@ -145,14 +151,6 @@ public:
|
||||
const UChar *fRuleSource;
|
||||
|
||||
UTrie fTrie;
|
||||
/* if fLookAheadHardBreak is true, we will break at the first lookahead match */
|
||||
/* the search does not go on further to look for a longer match */
|
||||
/* this also allows breaks at both ends of the string */
|
||||
/* e.g. rule "ABC / D; ABCDE" and */
|
||||
/* text "ABCD ABCDE ABC" will give breaks at */
|
||||
/* 01234567890123 */
|
||||
/* {0, 3, 4, 5, 8, 9, 10, 11, 14} */
|
||||
UBool fLookAheadHardBreak;
|
||||
|
||||
private:
|
||||
int32_t fRefCount;
|
||||
|
@ -66,6 +66,7 @@ RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
|
||||
fSafeRevTables = NULL;
|
||||
fChainRules = FALSE;
|
||||
fLBCMNoChain = FALSE;
|
||||
fLookAheadHardBreak = FALSE;
|
||||
|
||||
UErrorCode oldstatus = status;
|
||||
|
||||
|
@ -134,6 +134,10 @@ public:
|
||||
UBool fLBCMNoChain; // True: suppress chaining of rules on
|
||||
// chars with LineBreak property == CM.
|
||||
|
||||
UBool fLookAheadHardBreak; // True: Look ahead matches cause an
|
||||
// immediate break, no continuing for the
|
||||
// longest match.
|
||||
|
||||
RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
|
||||
UVector *fUSetNodes; // Vector of all uset nodes.
|
||||
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "rbbirb.h"
|
||||
#include "rbbinode.h"
|
||||
#include "rbbiscan.h"
|
||||
#include "rbbitblb.h"
|
||||
|
||||
#include "uassert.h"
|
||||
|
||||
@ -473,17 +474,15 @@ UBool RBBIRuleScanner::doParseActions(EParseAction action)
|
||||
} else if (opt == "LBCMNoChain") {
|
||||
fRB->fLBCMNoChain = TRUE;
|
||||
} else if (opt == "forward") {
|
||||
fRB->fDefaultTree = &fRB->fForwardTree;
|
||||
fRB->fDefaultTree = &fRB->fForwardTree;
|
||||
} else if (opt == "reverse") {
|
||||
fRB->fDefaultTree = &fRB->fReverseTree;
|
||||
fRB->fDefaultTree = &fRB->fReverseTree;
|
||||
} else if (opt == "safe_forward") {
|
||||
fRB->fDefaultTree = &fRB->fSafeFwdTree;
|
||||
fRB->fDefaultTree = &fRB->fSafeFwdTree;
|
||||
} else if (opt == "safe_reverse") {
|
||||
fRB->fDefaultTree = &fRB->fSafeRevTree;
|
||||
fRB->fDefaultTree = &fRB->fSafeRevTree;
|
||||
} else if (opt == "lookAheadHardBreak") {
|
||||
// at the moment do nothing for this
|
||||
// the code is handled in rbbi.cpp
|
||||
// todo: think about how to handle this
|
||||
fRB->fLookAheadHardBreak = TRUE;
|
||||
} else {
|
||||
error(U_BRK_UNRECOGNIZED_OPTION);
|
||||
}
|
||||
|
@ -25,10 +25,10 @@ U_NAMESPACE_BEGIN
|
||||
|
||||
RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode) :
|
||||
fTree(*rootNode) {
|
||||
fRB = rb;
|
||||
fStatus = fRB->fStatus;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fDStates = new UVector(status);
|
||||
fRB = rb;
|
||||
fStatus = fRB->fStatus;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fDStates = new UVector(status);
|
||||
if (U_FAILURE(*fStatus)) {
|
||||
return;
|
||||
}
|
||||
@ -805,6 +805,11 @@ void RBBITableBuilder::exportTable(void *where) {
|
||||
table->fRowLen = sizeof(RBBIStateTableRow) +
|
||||
sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2);
|
||||
table->fNumStates = fDStates->size();
|
||||
table->fFlags = 0;
|
||||
if (fRB->fLookAheadHardBreak) {
|
||||
table->fFlags |= RBBI_LOOKAHEAD_HARD_BREAK;
|
||||
}
|
||||
table->fReserved = 0;
|
||||
|
||||
for (state=0; state<table->fNumStates; state++) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
|
||||
|
@ -44,6 +44,7 @@ public:
|
||||
// Sufficient memory must exist at
|
||||
// the specified location.
|
||||
|
||||
|
||||
private:
|
||||
void calcNullable(RBBINode *n);
|
||||
void calcFirstPos(RBBINode *n);
|
||||
@ -76,6 +77,7 @@ private:
|
||||
// Index is state number
|
||||
// Contents are RBBIStateDescriptor pointers.
|
||||
|
||||
|
||||
RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class
|
||||
RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user