ICU-12507 bump rbbi data version for change to UTrie2; consolidate version checking.
X-SVN-Rev: 40183
This commit is contained in:
parent
9d12b335cc
commit
a9cedfb775
@ -54,9 +54,8 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
|
||||
dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk "
|
||||
dh->info.dataFormat[1] == 0x72 &&
|
||||
dh->info.dataFormat[2] == 0x6b &&
|
||||
dh->info.dataFormat[3] == 0x20)
|
||||
// Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is
|
||||
// validated when checking that.
|
||||
dh->info.dataFormat[3] == 0x20 &&
|
||||
isDataVersionAcceptable(dh->info.formatVersion))
|
||||
) {
|
||||
status = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
@ -67,6 +66,16 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
|
||||
fUDataMem = udm;
|
||||
}
|
||||
|
||||
UBool RBBIDataWrapper::isDataVersionAcceptable(const uint8_t version[]) {
|
||||
for (int i=0; i<UPRV_LENGTHOF(RBBI_DATA_FORMAT_VERSION); ++i) {
|
||||
if (RBBI_DATA_FORMAT_VERSION[i] != version[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// init(). Does most of the work of construction, shared between the
|
||||
@ -92,8 +101,7 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
|
||||
return;
|
||||
}
|
||||
fHeader = data;
|
||||
if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3)
|
||||
{
|
||||
if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) {
|
||||
status = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
@ -308,7 +316,7 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
|
||||
pInfo->dataFormat[1]==0x72 &&
|
||||
pInfo->dataFormat[2]==0x6b &&
|
||||
pInfo->dataFormat[3]==0x20 &&
|
||||
pInfo->formatVersion[0]==3 )) {
|
||||
RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) {
|
||||
udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
pInfo->dataFormat[2], pInfo->dataFormat[3],
|
||||
@ -329,17 +337,11 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
|
||||
//
|
||||
// Get the RRBI Data Header, and check that it appears to be OK.
|
||||
//
|
||||
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
|
||||
// an int32_t with a value of 1. Starting with ICU 3.4,
|
||||
// RBBI's fDataFormat matches the dataFormat field from the
|
||||
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
|
||||
//
|
||||
const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
|
||||
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
|
||||
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
|
||||
rbbiDH->fFormatVersion[0] != 3 ||
|
||||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader))
|
||||
{
|
||||
!RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) ||
|
||||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) {
|
||||
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
|
||||
*status=U_UNSUPPORTED_ERROR;
|
||||
return 0;
|
||||
|
@ -56,6 +56,9 @@ ubrk_swap(const UDataSwapper *ds,
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// The current RBBI data format version.
|
||||
static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {4, 0, 0, 0};
|
||||
|
||||
/*
|
||||
* The following structs map exactly onto the raw data from ICU common data file.
|
||||
*/
|
||||
@ -152,6 +155,8 @@ public:
|
||||
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
|
||||
~RBBIDataWrapper();
|
||||
|
||||
static UBool isDataVersionAcceptable(const uint8_t version[]);
|
||||
|
||||
void init0();
|
||||
void init(const RBBIDataHeader *data, UErrorCode &status);
|
||||
RBBIDataWrapper *addReference();
|
||||
|
@ -177,10 +177,10 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
||||
|
||||
|
||||
data->fMagic = 0xb1a0;
|
||||
data->fFormatVersion[0] = 3;
|
||||
data->fFormatVersion[1] = 1;
|
||||
data->fFormatVersion[2] = 0;
|
||||
data->fFormatVersion[3] = 0;
|
||||
data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
|
||||
data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
|
||||
data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
|
||||
data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
|
||||
data->fLength = totalSize;
|
||||
data->fCatCount = fSetBuilder->getNumCharCategories();
|
||||
|
||||
|
@ -38,14 +38,14 @@ final class RBBIDataWrapper {
|
||||
|
||||
private boolean isBigEndian;
|
||||
|
||||
static final int DATA_FORMAT = 0x42726b20; // "Brk "
|
||||
static final int FORMAT_VERSION = 0x03010000; // 3.1
|
||||
static final int DATA_FORMAT = 0x42726b20; // "Brk "
|
||||
static final int FORMAT_VERSION = 0x04000000; // 4.0.0.0
|
||||
|
||||
private static final class IsAcceptable implements Authenticate {
|
||||
// @Override when we switch to Java 6
|
||||
@Override
|
||||
public boolean isDataVersionAcceptable(byte version[]) {
|
||||
return version[0] == (FORMAT_VERSION >>> 24);
|
||||
int intVersion = (version[0] << 24) + (version[1] << 16) + (version[2] << 8) + version[3];
|
||||
return intVersion == FORMAT_VERSION;
|
||||
}
|
||||
}
|
||||
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
|
||||
@ -104,7 +104,6 @@ final class RBBIDataWrapper {
|
||||
*/
|
||||
final static class RBBIDataHeader {
|
||||
int fMagic; // == 0xbla0
|
||||
int fVersion; // == 1 (for ICU 3.2 and earlier.
|
||||
byte[] fFormatVersion; // For ICU 3.4 and later.
|
||||
int fLength; // Total length in bytes of this RBBI Data,
|
||||
// including all sections, not just the header.
|
||||
@ -162,10 +161,6 @@ final class RBBIDataWrapper {
|
||||
// Read in the RBBI data header...
|
||||
This.fHeader = new RBBIDataHeader();
|
||||
This.fHeader.fMagic = bytes.getInt();
|
||||
// Read the same 4 bytes as an int and as a byte array: The data format could be
|
||||
// the old fVersion=1 (TODO: probably not with a real ICU data header?)
|
||||
// or the new fFormatVersion=3.x.
|
||||
This.fHeader.fVersion = bytes.getInt(bytes.position());
|
||||
This.fHeader.fFormatVersion[0] = bytes.get();
|
||||
This.fHeader.fFormatVersion[1] = bytes.get();
|
||||
This.fHeader.fFormatVersion[2] = bytes.get();
|
||||
@ -189,10 +184,7 @@ final class RBBIDataWrapper {
|
||||
ICUBinary.skipBytes(bytes, 6 * 4); // uint32_t fReserved[6];
|
||||
|
||||
|
||||
if (This.fHeader.fMagic != 0xb1a0 ||
|
||||
! (This.fHeader.fVersion == 1 || // ICU 3.2 and earlier
|
||||
This.fHeader.fFormatVersion[0] == 3) // ICU 3.4
|
||||
) {
|
||||
if (This.fHeader.fMagic != 0xb1a0 || !IS_ACCEPTABLE.isDataVersionAcceptable(This.fHeader.fFormatVersion)) {
|
||||
throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
|
||||
}
|
||||
|
||||
|
@ -25,17 +25,17 @@ class RBBIRuleBuilder {
|
||||
// This is the main class for building (compiling) break rules into the tables
|
||||
// required by the runtime RBBI engine.
|
||||
//
|
||||
|
||||
|
||||
String fDebugEnv; // controls debug trace output
|
||||
String fRules; // The rule string that we are compiling
|
||||
RBBIRuleScanner fScanner; // The scanner.
|
||||
|
||||
|
||||
|
||||
//
|
||||
// There are four separate parse trees generated, one for each of the
|
||||
// forward rules, reverse rules, safe forward rules and safe reverse rules.
|
||||
// This array references the root of each of the trees.
|
||||
//
|
||||
//
|
||||
RBBINode[] fTreeRoots = new RBBINode[4];
|
||||
static final int fForwardTree = 0; // Indexes into the above fTreeRoots array
|
||||
static final int fReverseTree = 1; // for each of the trees.
|
||||
@ -69,7 +69,7 @@ class RBBIRuleBuilder {
|
||||
// Map Value is the runtime array index.
|
||||
|
||||
List<Integer> fRuleStatusVals; // List of Integer objects. Has same layout as the
|
||||
// runtime array of status (tag) values -
|
||||
// runtime array of status (tag) values -
|
||||
// number of values in group 1
|
||||
// first status value in group 1
|
||||
// 2nd status value in group 1
|
||||
@ -84,50 +84,50 @@ class RBBIRuleBuilder {
|
||||
//
|
||||
static final int U_BRK_ERROR_START = 0x10200;
|
||||
/**< Start of codes indicating Break Iterator failures */
|
||||
|
||||
|
||||
static final int U_BRK_INTERNAL_ERROR = 0x10201;
|
||||
/**< An internal error (bug) was detected. */
|
||||
|
||||
|
||||
static final int U_BRK_HEX_DIGITS_EXPECTED = 0x10202;
|
||||
/**< Hex digits expected as part of a escaped char in a rule. */
|
||||
|
||||
|
||||
static final int U_BRK_SEMICOLON_EXPECTED = 0x10203;
|
||||
/**< Missing ';' at the end of a RBBI rule. */
|
||||
|
||||
|
||||
static final int U_BRK_RULE_SYNTAX = 0x10204;
|
||||
/**< Syntax error in RBBI rule. */
|
||||
|
||||
|
||||
static final int U_BRK_UNCLOSED_SET = 0x10205;
|
||||
/**< UnicodeSet witing an RBBI rule missing a closing ']'. */
|
||||
|
||||
|
||||
static final int U_BRK_ASSIGN_ERROR = 0x10206;
|
||||
/**< Syntax error in RBBI rule assignment statement. */
|
||||
|
||||
|
||||
static final int U_BRK_VARIABLE_REDFINITION = 0x10207;
|
||||
/**< RBBI rule $Variable redefined. */
|
||||
|
||||
|
||||
static final int U_BRK_MISMATCHED_PAREN = 0x10208;
|
||||
/**< Mis-matched parentheses in an RBBI rule. */
|
||||
|
||||
|
||||
static final int U_BRK_NEW_LINE_IN_QUOTED_STRING = 0x10209;
|
||||
/**< Missing closing quote in an RBBI rule. */
|
||||
|
||||
|
||||
static final int U_BRK_UNDEFINED_VARIABLE = 0x1020a;
|
||||
/**< Use of an undefined $Variable in an RBBI rule. */
|
||||
|
||||
|
||||
static final int U_BRK_INIT_ERROR = 0x1020b;
|
||||
/**< Initialization failure. Probable missing ICU Data. */
|
||||
|
||||
|
||||
static final int U_BRK_RULE_EMPTY_SET = 0x1020c;
|
||||
/**< Rule contains an empty Unicode Set. */
|
||||
|
||||
|
||||
static final int U_BRK_UNRECOGNIZED_OPTION = 0x1020d;
|
||||
/**< !!option in RBBI rules not recognized. */
|
||||
|
||||
|
||||
static final int U_BRK_MALFORMED_RULE_TAG = 0x1020e;
|
||||
/**< The {nnn} tag on a rule is mal formed */
|
||||
static final int U_BRK_MALFORMED_SET = 0x1020f;
|
||||
|
||||
|
||||
static final int U_BRK_ERROR_LIMIT = 0x10210;
|
||||
/**< This must always be the last value to indicate the limit for Break Iterator failures */
|
||||
|
||||
@ -196,7 +196,7 @@ class RBBIRuleBuilder {
|
||||
//
|
||||
int[] header = new int[RBBIDataWrapper.DH_SIZE]; // sizeof struct RBBIDataHeader
|
||||
header[RBBIDataWrapper.DH_MAGIC] = 0xb1a0;
|
||||
header[RBBIDataWrapper.DH_FORMATVERSION] = 0x03010000; // uint8_t fFormatVersion[4];
|
||||
header[RBBIDataWrapper.DH_FORMATVERSION] = RBBIDataWrapper.FORMAT_VERSION;
|
||||
header[RBBIDataWrapper.DH_LENGTH] = totalSize; // fLength, the total size of all rule sections.
|
||||
header[RBBIDataWrapper.DH_CATCOUNT] = fSetBuilder.getNumCharCategories(); // fCatCount.
|
||||
header[RBBIDataWrapper.DH_FTABLE] = headerSize; // fFTable
|
||||
|
@ -1483,10 +1483,8 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
||||
mainLoop: for (;;) {
|
||||
if (c == DONE32) {
|
||||
// Reached end of input string.
|
||||
if (mode == RBBI_END || fRData.fHeader.fVersion == 1) {
|
||||
// Either this is the old (ICU 3.2 and earlier) format data which
|
||||
// does not support explicit support for matching {eof}, or
|
||||
// we have already done the {eof} iteration. Now is the time
|
||||
if (mode == RBBI_END) {
|
||||
// We have already done the {eof} iteration. Now is the time
|
||||
// to unconditionally bail out.
|
||||
if (result == initialPosition) {
|
||||
// Ran off start, no match found.
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d315546f344483688e78322304130697164e0d0363b20ed00880598630632341
|
||||
size 12128031
|
||||
oid sha256:29b73bb7468ec529b2ad200e0e5e14a16b53d12cc8ba5ac29b9da9de8968adc0
|
||||
size 12128017
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:17fb194e1234c73ab09442acf76f1b872d77d8aa7494a06f5964f1342616d69e
|
||||
oid sha256:644e4eaa7dfdeb72c639d20160274994b0709da05f2b009a306bbc68f440bb87
|
||||
size 92448
|
||||
|
Loading…
Reference in New Issue
Block a user