ICU-12507 bump rbbi data version for change to UTrie2; consolidate version checking.

X-SVN-Rev: 40183
This commit is contained in:
Andy Heninger 2017-06-20 22:11:08 +00:00
parent 9d12b335cc
commit a9cedfb775
8 changed files with 55 additions and 58 deletions

View File

@ -54,9 +54,8 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk "
dh->info.dataFormat[1] == 0x72 &&
dh->info.dataFormat[2] == 0x6b &&
dh->info.dataFormat[3] == 0x20)
// Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is
// validated when checking that.
dh->info.dataFormat[3] == 0x20 &&
isDataVersionAcceptable(dh->info.formatVersion))
) {
status = U_INVALID_FORMAT_ERROR;
return;
@ -67,6 +66,16 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
fUDataMem = udm;
}
UBool RBBIDataWrapper::isDataVersionAcceptable(const uint8_t version[]) {
for (int i=0; i<UPRV_LENGTHOF(RBBI_DATA_FORMAT_VERSION); ++i) {
if (RBBI_DATA_FORMAT_VERSION[i] != version[i]) {
return false;
}
}
return true;
}
//-----------------------------------------------------------------------------
//
// init(). Does most of the work of construction, shared between the
@ -92,8 +101,7 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
return;
}
fHeader = data;
if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3)
{
if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) {
status = U_INVALID_FORMAT_ERROR;
return;
}
@ -308,7 +316,7 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6b &&
pInfo->dataFormat[3]==0x20 &&
pInfo->formatVersion[0]==3 )) {
RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) {
udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
@ -329,17 +337,11 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
//
// Get the RRBI Data Header, and check that it appears to be OK.
//
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
// an int32_t with a value of 1. Starting with ICU 3.4,
// RBBI's fDataFormat matches the dataFormat field from the
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
//
const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
rbbiDH->fFormatVersion[0] != 3 ||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader))
{
!RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) ||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) {
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
*status=U_UNSUPPORTED_ERROR;
return 0;

View File

@ -56,6 +56,9 @@ ubrk_swap(const UDataSwapper *ds,
U_NAMESPACE_BEGIN
// The current RBBI data format version.
static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {4, 0, 0, 0};
/*
* The following structs map exactly onto the raw data from ICU common data file.
*/
@ -152,6 +155,8 @@ public:
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
~RBBIDataWrapper();
static UBool isDataVersionAcceptable(const uint8_t version[]);
void init0();
void init(const RBBIDataHeader *data, UErrorCode &status);
RBBIDataWrapper *addReference();

View File

@ -177,10 +177,10 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
data->fMagic = 0xb1a0;
data->fFormatVersion[0] = 3;
data->fFormatVersion[1] = 1;
data->fFormatVersion[2] = 0;
data->fFormatVersion[3] = 0;
data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories();

View File

@ -38,14 +38,14 @@ final class RBBIDataWrapper {
private boolean isBigEndian;
static final int DATA_FORMAT = 0x42726b20; // "Brk "
static final int FORMAT_VERSION = 0x03010000; // 3.1
static final int DATA_FORMAT = 0x42726b20; // "Brk "
static final int FORMAT_VERSION = 0x04000000; // 4.0.0.0
private static final class IsAcceptable implements Authenticate {
// @Override when we switch to Java 6
@Override
public boolean isDataVersionAcceptable(byte version[]) {
return version[0] == (FORMAT_VERSION >>> 24);
int intVersion = (version[0] << 24) + (version[1] << 16) + (version[2] << 8) + version[3];
return intVersion == FORMAT_VERSION;
}
}
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
@ -104,7 +104,6 @@ final class RBBIDataWrapper {
*/
final static class RBBIDataHeader {
int fMagic; // == 0xbla0
int fVersion; // == 1 (for ICU 3.2 and earlier.
byte[] fFormatVersion; // For ICU 3.4 and later.
int fLength; // Total length in bytes of this RBBI Data,
// including all sections, not just the header.
@ -162,10 +161,6 @@ final class RBBIDataWrapper {
// Read in the RBBI data header...
This.fHeader = new RBBIDataHeader();
This.fHeader.fMagic = bytes.getInt();
// Read the same 4 bytes as an int and as a byte array: The data format could be
// the old fVersion=1 (TODO: probably not with a real ICU data header?)
// or the new fFormatVersion=3.x.
This.fHeader.fVersion = bytes.getInt(bytes.position());
This.fHeader.fFormatVersion[0] = bytes.get();
This.fHeader.fFormatVersion[1] = bytes.get();
This.fHeader.fFormatVersion[2] = bytes.get();
@ -189,10 +184,7 @@ final class RBBIDataWrapper {
ICUBinary.skipBytes(bytes, 6 * 4); // uint32_t fReserved[6];
if (This.fHeader.fMagic != 0xb1a0 ||
! (This.fHeader.fVersion == 1 || // ICU 3.2 and earlier
This.fHeader.fFormatVersion[0] == 3) // ICU 3.4
) {
if (This.fHeader.fMagic != 0xb1a0 || !IS_ACCEPTABLE.isDataVersionAcceptable(This.fHeader.fFormatVersion)) {
throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
}

View File

@ -25,17 +25,17 @@ class RBBIRuleBuilder {
// This is the main class for building (compiling) break rules into the tables
// required by the runtime RBBI engine.
//
String fDebugEnv; // controls debug trace output
String fRules; // The rule string that we are compiling
RBBIRuleScanner fScanner; // The scanner.
//
// There are four separate parse trees generated, one for each of the
// forward rules, reverse rules, safe forward rules and safe reverse rules.
// This array references the root of each of the trees.
//
//
RBBINode[] fTreeRoots = new RBBINode[4];
static final int fForwardTree = 0; // Indexes into the above fTreeRoots array
static final int fReverseTree = 1; // for each of the trees.
@ -69,7 +69,7 @@ class RBBIRuleBuilder {
// Map Value is the runtime array index.
List<Integer> fRuleStatusVals; // List of Integer objects. Has same layout as the
// runtime array of status (tag) values -
// runtime array of status (tag) values -
// number of values in group 1
// first status value in group 1
// 2nd status value in group 1
@ -84,50 +84,50 @@ class RBBIRuleBuilder {
//
static final int U_BRK_ERROR_START = 0x10200;
/**< Start of codes indicating Break Iterator failures */
static final int U_BRK_INTERNAL_ERROR = 0x10201;
/**< An internal error (bug) was detected. */
static final int U_BRK_HEX_DIGITS_EXPECTED = 0x10202;
/**< Hex digits expected as part of a escaped char in a rule. */
static final int U_BRK_SEMICOLON_EXPECTED = 0x10203;
/**< Missing ';' at the end of a RBBI rule. */
static final int U_BRK_RULE_SYNTAX = 0x10204;
/**< Syntax error in RBBI rule. */
static final int U_BRK_UNCLOSED_SET = 0x10205;
/**< UnicodeSet witing an RBBI rule missing a closing ']'. */
static final int U_BRK_ASSIGN_ERROR = 0x10206;
/**< Syntax error in RBBI rule assignment statement. */
static final int U_BRK_VARIABLE_REDFINITION = 0x10207;
/**< RBBI rule $Variable redefined. */
static final int U_BRK_MISMATCHED_PAREN = 0x10208;
/**< Mis-matched parentheses in an RBBI rule. */
static final int U_BRK_NEW_LINE_IN_QUOTED_STRING = 0x10209;
/**< Missing closing quote in an RBBI rule. */
static final int U_BRK_UNDEFINED_VARIABLE = 0x1020a;
/**< Use of an undefined $Variable in an RBBI rule. */
static final int U_BRK_INIT_ERROR = 0x1020b;
/**< Initialization failure. Probable missing ICU Data. */
static final int U_BRK_RULE_EMPTY_SET = 0x1020c;
/**< Rule contains an empty Unicode Set. */
static final int U_BRK_UNRECOGNIZED_OPTION = 0x1020d;
/**< !!option in RBBI rules not recognized. */
static final int U_BRK_MALFORMED_RULE_TAG = 0x1020e;
/**< The {nnn} tag on a rule is mal formed */
static final int U_BRK_MALFORMED_SET = 0x1020f;
static final int U_BRK_ERROR_LIMIT = 0x10210;
/**< This must always be the last value to indicate the limit for Break Iterator failures */
@ -196,7 +196,7 @@ class RBBIRuleBuilder {
//
int[] header = new int[RBBIDataWrapper.DH_SIZE]; // sizeof struct RBBIDataHeader
header[RBBIDataWrapper.DH_MAGIC] = 0xb1a0;
header[RBBIDataWrapper.DH_FORMATVERSION] = 0x03010000; // uint8_t fFormatVersion[4];
header[RBBIDataWrapper.DH_FORMATVERSION] = RBBIDataWrapper.FORMAT_VERSION;
header[RBBIDataWrapper.DH_LENGTH] = totalSize; // fLength, the total size of all rule sections.
header[RBBIDataWrapper.DH_CATCOUNT] = fSetBuilder.getNumCharCategories(); // fCatCount.
header[RBBIDataWrapper.DH_FTABLE] = headerSize; // fFTable

View File

@ -1483,10 +1483,8 @@ public class RuleBasedBreakIterator extends BreakIterator {
mainLoop: for (;;) {
if (c == DONE32) {
// Reached end of input string.
if (mode == RBBI_END || fRData.fHeader.fVersion == 1) {
// Either this is the old (ICU 3.2 and earlier) format data which
// does not support explicit support for matching {eof}, or
// we have already done the {eof} iteration. Now is the time
if (mode == RBBI_END) {
// We have already done the {eof} iteration. Now is the time
// to unconditionally bail out.
if (result == initialPosition) {
// Ran off start, no match found.

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d315546f344483688e78322304130697164e0d0363b20ed00880598630632341
size 12128031
oid sha256:29b73bb7468ec529b2ad200e0e5e14a16b53d12cc8ba5ac29b9da9de8968adc0
size 12128017

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:17fb194e1234c73ab09442acf76f1b872d77d8aa7494a06f5964f1342616d69e
oid sha256:644e4eaa7dfdeb72c639d20160274994b0709da05f2b009a306bbc68f440bb87
size 92448