ICU-3295 rbbi rt port to Java, minor cleanups
X-SVN-Rev: 15476
This commit is contained in:
parent
0628b5df9c
commit
5aefced260
@ -156,6 +156,7 @@
|
||||
srcdir="${src.dir}"
|
||||
destdir="${build.dir}"
|
||||
classpathref="build.classpath"
|
||||
source="1.4"
|
||||
debug="on" deprecation="off"
|
||||
encoding="ascii"/>
|
||||
</target>
|
||||
@ -166,6 +167,7 @@
|
||||
srcdir="${src.dir}"
|
||||
destdir="${build.dir}"
|
||||
classpathref="build.classpath"
|
||||
source="1.4"
|
||||
debug="on" deprecation="off"/>
|
||||
</target>
|
||||
|
||||
@ -213,6 +215,7 @@
|
||||
bottom="<font size=-1>Copyright (c) 2004 IBM Corporation and others.</font>"
|
||||
additionalparam="${doc.params}"
|
||||
link="http://java.sun.com/j2se/1.4/docs/api"
|
||||
source="1.4"
|
||||
/>
|
||||
</target>
|
||||
|
||||
|
@ -121,26 +121,16 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
|
||||
iter = new RuleBasedBreakIterator_Old(rules);
|
||||
}
|
||||
else if (classNames[kind].equals("RuleBasedBreakIterator_New")) {
|
||||
// Class for new RBBI engine.
|
||||
// Open a stream to the .brk file. Path to the brk files has this form:
|
||||
// data/icudt30b/line.brk (30 is version number)
|
||||
try {
|
||||
// Class for new RBBI engine.
|
||||
// Open a stream to the .brk file. Path to the brk files has this form:
|
||||
// data/icudt30b/line.brk (30 is version number)
|
||||
String rulesFileName = UResourceBundle.ICU_BUNDLE +"/"+ KIND_NAMES_2[kind] + ".brk";
|
||||
InputStream is = ICUData.getStream(rulesFileName);
|
||||
if (is == null) {
|
||||
// Temporary!!! Try again with break files named data/icudt28b_char.brk
|
||||
// (or word, line, etc.) This was a temporary location
|
||||
// used during development, this code can be removed once
|
||||
// the data is in the data directory, above. TODO: remove
|
||||
// the following code, make this catch turn around and throw.
|
||||
rulesFileName = UResourceBundle.ICU_BUNDLE + "/" +
|
||||
KIND_NAMES_2[kind] + ".brk";
|
||||
is = ICUData.getRequiredStream(rulesFileName);
|
||||
}
|
||||
iter = RuleBasedBreakIterator_New.getInstanceFromCompiledRules(is);
|
||||
InputStream is = ICUData.getRequiredStream(rulesFileName);
|
||||
iter = RuleBasedBreakIterator_New.getInstanceFromCompiledRules(is);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new IllegalArgumentException(e.toString());
|
||||
throw new IllegalArgumentException(e.toString());
|
||||
}
|
||||
}
|
||||
else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
|
||||
@ -156,10 +146,10 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
|
||||
iter = new DictionaryBasedBreakIterator(rules, dictionary);
|
||||
}
|
||||
catch(IOException e) {
|
||||
System.out.println(e); // debug
|
||||
assert false : e;
|
||||
}
|
||||
catch(MissingResourceException e) {
|
||||
System.out.println(e); // debug
|
||||
assert false : e;
|
||||
}
|
||||
// TODO: we don't have 'bad' resource data, so this should never happen
|
||||
// in our current tests.
|
||||
@ -183,4 +173,5 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
|
||||
iter.setLocale(uloc, uloc);
|
||||
return iter;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -13,6 +13,13 @@ import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Rule Based Break Iterator implementation.
|
||||
* This is a port of the C++ class RuleBasedBreakIterator from ICU4C.
|
||||
*
|
||||
* A note on future plans: Once a new DictionaryBasedBreakIterator implementation
|
||||
* is completed, the archaic implementation class
|
||||
* RuleBasedBreakIterator_Old can be completely removed,
|
||||
* and this class can be renamed to be simply
|
||||
* RuleBasedBreakIterator.
|
||||
* @internal
|
||||
*/
|
||||
public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
||||
@ -451,8 +458,6 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
||||
|
||||
/**
|
||||
* Throw IllegalArgumentException unless begin <= offset < end.
|
||||
* TODO: subclassing interface from old RBBI is not really usable.
|
||||
* What to do with old protected functions tagged as stable?
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
protected static final void checkOffset(int offset, CharacterIterator text) {
|
||||
@ -524,19 +529,14 @@ private void makeRuleStatusValid() {
|
||||
int pa = current();
|
||||
previous();
|
||||
int pb = next();
|
||||
if (pa != pb) {
|
||||
// TODO: comment this out.
|
||||
System.out.println("RuleBasedBreakIterator::makeRuleStatusValid internal error");
|
||||
}
|
||||
assert pa == pb;
|
||||
}
|
||||
}
|
||||
//U_ASSERT(fLastStatusIndexValid == TRUE);
|
||||
//U_ASSERT(fLastRuleStatusIndex >= 0 && fLastRuleStatusIndex < fData->fStatusMaxIdx);
|
||||
|
||||
assert fLastStatusIndexValid == true;
|
||||
assert fLastRuleStatusIndex >= 0 && fLastRuleStatusIndex < fRData.fStatusTable.length;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Return the status tag from the break rule that determined the most recently
|
||||
* returned break position. The values appear in the rule source
|
||||
@ -636,7 +636,6 @@ public int getRuleStatusVec(int[] fillInArray) {
|
||||
|
||||
|
||||
private static int CINext32(CharacterIterator ci) {
|
||||
// TODO: pre-increment is a pain. Redo all to use post-increment.
|
||||
int retVal;
|
||||
int curChar = CICurrent32(ci);
|
||||
ci.next();
|
||||
|
@ -461,135 +461,135 @@ public void debugDumpTables() {
|
||||
* @internal
|
||||
*/
|
||||
public void writeTablesToFile(FileOutputStream file, boolean littleEndian) throws IOException {
|
||||
// NOTE: The format being written here is designed to be compatible with
|
||||
// the ICU udata interfaces and may not be useful for much else
|
||||
DataOutputStream out = new DataOutputStream(file);
|
||||
|
||||
// --- write the file header ---
|
||||
byte[] comment = "Copyright (C) 1999, International Business Machines Corp. and others. All Rights Reserved.".getBytes("US-ASCII");
|
||||
// write the size of the header (rounded up to the next 16-byte boundary)
|
||||
short headerSize = (short)(comment.length + 1 // length of comment
|
||||
+ 24); // size of static header data
|
||||
short realHeaderSize = (short)(headerSize + ((headerSize % 16 == 0) ? 0 : 16 - (headerSize % 16)));
|
||||
writeSwappedShort(realHeaderSize, out, littleEndian);
|
||||
// write magic byte values
|
||||
out.write(0xda);
|
||||
out.write(0x27);
|
||||
// write size of core header data
|
||||
writeSwappedShort((short)20, out, littleEndian);
|
||||
// write reserved bytes
|
||||
writeSwappedShort((short)0, out, littleEndian);
|
||||
|
||||
// write flag indicating whether we're big-endian
|
||||
if (littleEndian) {
|
||||
out.write(0);
|
||||
} else {
|
||||
out.write(1);
|
||||
}
|
||||
|
||||
// write character set family code (0 means ASCII)
|
||||
out.write(0);
|
||||
// write size of UChar in this file
|
||||
out.write(2);
|
||||
// write reserved byte
|
||||
out.write(0);
|
||||
// write data format identifier (this is an array of bytes in ICU, so the value is NOT swapped!)
|
||||
out.writeInt(0x42524b53); // ("BRKS")
|
||||
// write file format version number (NOT swapped!)
|
||||
out.writeInt(0);
|
||||
// write data version number (NOT swapped!)
|
||||
out.writeInt(0);
|
||||
// write copyright notice
|
||||
out.write(comment);
|
||||
out.write(0);
|
||||
// fill in padding bytes
|
||||
while (headerSize < realHeaderSize) {
|
||||
out.write(0);
|
||||
++headerSize;
|
||||
}
|
||||
|
||||
// --- write index to the file ---
|
||||
// write the number of columns in the state table
|
||||
writeSwappedInt(numCategories, out, littleEndian);
|
||||
int fileEnd = 36;
|
||||
// write the location in the file of the BreakIterator description string
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += (description.length() + 1) * 2;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the character category table's index
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += charCategoryTable.getIndexArray().length * 2;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the character category table's values array
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += charCategoryTable.getValueArray().length;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the forward state table
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += stateTable.length * 2;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the backward state table
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += backwardsStateTable.length * 2;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the endStates flags
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += endStates.length;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the lookaheadStates flags
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += lookaheadStates.length;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the length of the file
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
|
||||
// --- write the actual data ---
|
||||
// write description string
|
||||
for (int i = 0; i < description.length(); i++)
|
||||
writeSwappedShort((short)description.charAt(i), out, littleEndian);
|
||||
out.writeShort(0);
|
||||
if ((description.length() + 1) % 2 == 1)
|
||||
out.writeShort(0);
|
||||
// write character category table
|
||||
char[] temp1 = charCategoryTable.getIndexArray();
|
||||
for (int i = 0; i < temp1.length; i++)
|
||||
writeSwappedShort((short)temp1[i], out, littleEndian);
|
||||
if (temp1.length % 2 == 1)
|
||||
out.writeShort(0);
|
||||
byte[] temp2 = charCategoryTable.getValueArray();
|
||||
out.write(temp2);
|
||||
switch (temp2.length % 4) {
|
||||
case 1: out.write(0);
|
||||
case 2: out.write(0);
|
||||
case 3: out.write(0);
|
||||
default: break;
|
||||
}
|
||||
// write the state transition tables
|
||||
for (int i = 0; i < stateTable.length; i++)
|
||||
writeSwappedShort(stateTable[i], out, littleEndian);
|
||||
if (stateTable.length % 2 == 1)
|
||||
out.writeShort(0);
|
||||
for (int i = 0; i < backwardsStateTable.length; i++)
|
||||
writeSwappedShort(backwardsStateTable[i], out, littleEndian);
|
||||
if (backwardsStateTable.length % 2 == 1)
|
||||
out.writeShort(0);
|
||||
// write the flag arrays
|
||||
for (int i = 0; i < endStates.length; i++)
|
||||
out.writeBoolean(endStates[i]);
|
||||
switch (endStates.length % 4) {
|
||||
case 1: out.write(0);
|
||||
case 2: out.write(0);
|
||||
case 3: out.write(0);
|
||||
default: break;
|
||||
}
|
||||
for (int i = 0; i < lookaheadStates.length; i++)
|
||||
out.writeBoolean(lookaheadStates[i]);
|
||||
switch (lookaheadStates.length % 4) {
|
||||
case 1: out.write(0);
|
||||
case 2: out.write(0);
|
||||
case 3: out.write(0);
|
||||
default: break;
|
||||
}
|
||||
// NOTE: The format being written here is designed to be compatible with
|
||||
// the ICU udata interfaces and may not be useful for much else
|
||||
DataOutputStream out = new DataOutputStream(file);
|
||||
|
||||
// --- write the file header ---
|
||||
byte[] comment = "Copyright (C) 1999, International Business Machines Corp. and others. All Rights Reserved.".getBytes("US-ASCII");
|
||||
// write the size of the header (rounded up to the next 16-byte boundary)
|
||||
short headerSize = (short)(comment.length + 1 // length of comment
|
||||
+ 24); // size of static header data
|
||||
short realHeaderSize = (short)(headerSize + ((headerSize % 16 == 0) ? 0 : 16 - (headerSize % 16)));
|
||||
writeSwappedShort(realHeaderSize, out, littleEndian);
|
||||
// write magic byte values
|
||||
out.write(0xda);
|
||||
out.write(0x27);
|
||||
// write size of core header data
|
||||
writeSwappedShort((short)20, out, littleEndian);
|
||||
// write reserved bytes
|
||||
writeSwappedShort((short)0, out, littleEndian);
|
||||
|
||||
// write flag indicating whether we're big-endian
|
||||
if (littleEndian) {
|
||||
out.write(0);
|
||||
} else {
|
||||
out.write(1);
|
||||
}
|
||||
|
||||
// write character set family code (0 means ASCII)
|
||||
out.write(0);
|
||||
// write size of UChar in this file
|
||||
out.write(2);
|
||||
// write reserved byte
|
||||
out.write(0);
|
||||
// write data format identifier (this is an array of bytes in ICU, so the value is NOT swapped!)
|
||||
out.writeInt(0x42524b53); // ("BRKS")
|
||||
// write file format version number (NOT swapped!)
|
||||
out.writeInt(0);
|
||||
// write data version number (NOT swapped!)
|
||||
out.writeInt(0);
|
||||
// write copyright notice
|
||||
out.write(comment);
|
||||
out.write(0);
|
||||
// fill in padding bytes
|
||||
while (headerSize < realHeaderSize) {
|
||||
out.write(0);
|
||||
++headerSize;
|
||||
}
|
||||
|
||||
// --- write index to the file ---
|
||||
// write the number of columns in the state table
|
||||
writeSwappedInt(numCategories, out, littleEndian);
|
||||
int fileEnd = 36;
|
||||
// write the location in the file of the BreakIterator description string
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += (description.length() + 1) * 2;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the character category table's index
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += charCategoryTable.getIndexArray().length * 2;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the character category table's values array
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += charCategoryTable.getValueArray().length;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the forward state table
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += stateTable.length * 2;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the backward state table
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += backwardsStateTable.length * 2;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the endStates flags
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += endStates.length;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the location of the lookaheadStates flags
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
fileEnd += lookaheadStates.length;
|
||||
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
|
||||
// write the length of the file
|
||||
writeSwappedInt(fileEnd, out, littleEndian);
|
||||
|
||||
// --- write the actual data ---
|
||||
// write description string
|
||||
for (int i = 0; i < description.length(); i++)
|
||||
writeSwappedShort((short)description.charAt(i), out, littleEndian);
|
||||
out.writeShort(0);
|
||||
if ((description.length() + 1) % 2 == 1)
|
||||
out.writeShort(0);
|
||||
// write character category table
|
||||
char[] temp1 = charCategoryTable.getIndexArray();
|
||||
for (int i = 0; i < temp1.length; i++)
|
||||
writeSwappedShort((short)temp1[i], out, littleEndian);
|
||||
if (temp1.length % 2 == 1)
|
||||
out.writeShort(0);
|
||||
byte[] temp2 = charCategoryTable.getValueArray();
|
||||
out.write(temp2);
|
||||
switch (temp2.length % 4) {
|
||||
case 1: out.write(0);
|
||||
case 2: out.write(0);
|
||||
case 3: out.write(0);
|
||||
default: break;
|
||||
}
|
||||
// write the state transition tables
|
||||
for (int i = 0; i < stateTable.length; i++)
|
||||
writeSwappedShort(stateTable[i], out, littleEndian);
|
||||
if (stateTable.length % 2 == 1)
|
||||
out.writeShort(0);
|
||||
for (int i = 0; i < backwardsStateTable.length; i++)
|
||||
writeSwappedShort(backwardsStateTable[i], out, littleEndian);
|
||||
if (backwardsStateTable.length % 2 == 1)
|
||||
out.writeShort(0);
|
||||
// write the flag arrays
|
||||
for (int i = 0; i < endStates.length; i++)
|
||||
out.writeBoolean(endStates[i]);
|
||||
switch (endStates.length % 4) {
|
||||
case 1: out.write(0);
|
||||
case 2: out.write(0);
|
||||
case 3: out.write(0);
|
||||
default: break;
|
||||
}
|
||||
for (int i = 0; i < lookaheadStates.length; i++)
|
||||
out.writeBoolean(lookaheadStates[i]);
|
||||
switch (lookaheadStates.length % 4) {
|
||||
case 1: out.write(0);
|
||||
case 2: out.write(0);
|
||||
case 3: out.write(0);
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user