ICU-3295 rbbi rt port to Java, minor cleanups

X-SVN-Rev: 15476
This commit is contained in:
Andy Heninger 2004-05-21 22:26:16 +00:00
parent 0628b5df9c
commit 5aefced260
4 changed files with 151 additions and 158 deletions

View File

@ -156,6 +156,7 @@
srcdir="${src.dir}"
destdir="${build.dir}"
classpathref="build.classpath"
source="1.4"
debug="on" deprecation="off"
encoding="ascii"/>
</target>
@ -166,6 +167,7 @@
srcdir="${src.dir}"
destdir="${build.dir}"
classpathref="build.classpath"
source="1.4"
debug="on" deprecation="off"/>
</target>
@ -213,6 +215,7 @@
bottom="&lt;font size=-1>Copyright (c) 2004 IBM Corporation and others.&lt;/font>"
additionalparam="${doc.params}"
link="http://java.sun.com/j2se/1.4/docs/api"
source="1.4"
/>
</target>

View File

@ -121,26 +121,16 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
iter = new RuleBasedBreakIterator_Old(rules);
}
else if (classNames[kind].equals("RuleBasedBreakIterator_New")) {
// Class for new RBBI engine.
// Open a stream to the .brk file. Path to the brk files has this form:
// data/icudt30b/line.brk (30 is version number)
try {
// Class for new RBBI engine.
// Open a stream to the .brk file. Path to the brk files has this form:
// data/icudt30b/line.brk (30 is version number)
String rulesFileName = UResourceBundle.ICU_BUNDLE +"/"+ KIND_NAMES_2[kind] + ".brk";
InputStream is = ICUData.getStream(rulesFileName);
if (is == null) {
// Temporary!!! Try again with break files named data/icudt28b_char.brk
// (or word, line, etc.) This was a temporary location
// used during development, this code can be removed once
// the data is in the data directory, above. TODO: remove
// the following code, make this catch turn around and throw.
rulesFileName = UResourceBundle.ICU_BUNDLE + "/" +
KIND_NAMES_2[kind] + ".brk";
is = ICUData.getRequiredStream(rulesFileName);
}
iter = RuleBasedBreakIterator_New.getInstanceFromCompiledRules(is);
InputStream is = ICUData.getRequiredStream(rulesFileName);
iter = RuleBasedBreakIterator_New.getInstanceFromCompiledRules(is);
}
catch (IOException e) {
throw new IllegalArgumentException(e.toString());
throw new IllegalArgumentException(e.toString());
}
}
else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
@ -156,10 +146,10 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
iter = new DictionaryBasedBreakIterator(rules, dictionary);
}
catch(IOException e) {
System.out.println(e); // debug
assert false : e;
}
catch(MissingResourceException e) {
System.out.println(e); // debug
assert false : e;
}
// TODO: we don't have 'bad' resource data, so this should never happen
// in our current tests.
@ -183,4 +173,5 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
iter.setLocale(uloc, uloc);
return iter;
}
}

View File

@ -13,6 +13,13 @@ import java.io.InputStream;
/**
* Rule Based Break Iterator implementation.
* This is a port of the C++ class RuleBasedBreakIterator from ICU4C.
*
* A note on future plans: Once a new DictionaryBasedBreakIterator implementation
* is completed, the archaic implementation class
* RuleBasedBreakIterator_Old can be completely removed,
* and this class can be renamed to be simply
* RuleBasedBreakIterator.
* @internal
*/
public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
@ -451,8 +458,6 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
/**
* Throw IllegalArgumentException unless begin <= offset < end.
* TODO: subclassing interface from old RBBI is not really usable.
* What to do with old protected functions tagged as stable?
* @stable ICU 2.0
*/
protected static final void checkOffset(int offset, CharacterIterator text) {
@ -524,19 +529,14 @@ private void makeRuleStatusValid() {
int pa = current();
previous();
int pb = next();
if (pa != pb) {
// TODO: comment this out.
System.out.println("RuleBasedBreakIterator::makeRuleStatusValid internal error");
}
assert pa == pb;
}
}
//U_ASSERT(fLastStatusIndexValid == TRUE);
//U_ASSERT(fLastRuleStatusIndex >= 0 && fLastRuleStatusIndex < fData->fStatusMaxIdx);
assert fLastStatusIndexValid == true;
assert fLastRuleStatusIndex >= 0 && fLastRuleStatusIndex < fRData.fStatusTable.length;
}
/**
* Return the status tag from the break rule that determined the most recently
* returned break position. The values appear in the rule source
@ -636,7 +636,6 @@ public int getRuleStatusVec(int[] fillInArray) {
private static int CINext32(CharacterIterator ci) {
// TODO: pre-increment is a pain. Redo all to use post-increment.
int retVal;
int curChar = CICurrent32(ci);
ci.next();

View File

@ -461,135 +461,135 @@ public void debugDumpTables() {
* @internal
*/
public void writeTablesToFile(FileOutputStream file, boolean littleEndian) throws IOException {
// NOTE: The format being written here is designed to be compatible with
// the ICU udata interfaces and may not be useful for much else
DataOutputStream out = new DataOutputStream(file);
// --- write the file header ---
byte[] comment = "Copyright (C) 1999, International Business Machines Corp. and others. All Rights Reserved.".getBytes("US-ASCII");
// write the size of the header (rounded up to the next 16-byte boundary)
short headerSize = (short)(comment.length + 1 // length of comment
+ 24); // size of static header data
short realHeaderSize = (short)(headerSize + ((headerSize % 16 == 0) ? 0 : 16 - (headerSize % 16)));
writeSwappedShort(realHeaderSize, out, littleEndian);
// write magic byte values
out.write(0xda);
out.write(0x27);
// write size of core header data
writeSwappedShort((short)20, out, littleEndian);
// write reserved bytes
writeSwappedShort((short)0, out, littleEndian);
// write flag indicating whether we're big-endian
if (littleEndian) {
out.write(0);
} else {
out.write(1);
}
// write character set family code (0 means ASCII)
out.write(0);
// write size of UChar in this file
out.write(2);
// write reserved byte
out.write(0);
// write data format identifier (this is an array of bytes in ICU, so the value is NOT swapped!)
out.writeInt(0x42524b53); // ("BRKS")
// write file format version number (NOT swapped!)
out.writeInt(0);
// write data version number (NOT swapped!)
out.writeInt(0);
// write copyright notice
out.write(comment);
out.write(0);
// fill in padding bytes
while (headerSize < realHeaderSize) {
out.write(0);
++headerSize;
}
// --- write index to the file ---
// write the number of columns in the state table
writeSwappedInt(numCategories, out, littleEndian);
int fileEnd = 36;
// write the location in the file of the BreakIterator description string
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += (description.length() + 1) * 2;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the character category table's index
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += charCategoryTable.getIndexArray().length * 2;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the character category table's values array
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += charCategoryTable.getValueArray().length;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the forward state table
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += stateTable.length * 2;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the backward state table
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += backwardsStateTable.length * 2;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the endStates flags
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += endStates.length;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the lookaheadStates flags
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += lookaheadStates.length;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the length of the file
writeSwappedInt(fileEnd, out, littleEndian);
// --- write the actual data ---
// write description string
for (int i = 0; i < description.length(); i++)
writeSwappedShort((short)description.charAt(i), out, littleEndian);
out.writeShort(0);
if ((description.length() + 1) % 2 == 1)
out.writeShort(0);
// write character category table
char[] temp1 = charCategoryTable.getIndexArray();
for (int i = 0; i < temp1.length; i++)
writeSwappedShort((short)temp1[i], out, littleEndian);
if (temp1.length % 2 == 1)
out.writeShort(0);
byte[] temp2 = charCategoryTable.getValueArray();
out.write(temp2);
switch (temp2.length % 4) {
case 1: out.write(0);
case 2: out.write(0);
case 3: out.write(0);
default: break;
}
// write the state transition tables
for (int i = 0; i < stateTable.length; i++)
writeSwappedShort(stateTable[i], out, littleEndian);
if (stateTable.length % 2 == 1)
out.writeShort(0);
for (int i = 0; i < backwardsStateTable.length; i++)
writeSwappedShort(backwardsStateTable[i], out, littleEndian);
if (backwardsStateTable.length % 2 == 1)
out.writeShort(0);
// write the flag arrays
for (int i = 0; i < endStates.length; i++)
out.writeBoolean(endStates[i]);
switch (endStates.length % 4) {
case 1: out.write(0);
case 2: out.write(0);
case 3: out.write(0);
default: break;
}
for (int i = 0; i < lookaheadStates.length; i++)
out.writeBoolean(lookaheadStates[i]);
switch (lookaheadStates.length % 4) {
case 1: out.write(0);
case 2: out.write(0);
case 3: out.write(0);
default: break;
}
// NOTE: The format being written here is designed to be compatible with
// the ICU udata interfaces and may not be useful for much else
DataOutputStream out = new DataOutputStream(file);
// --- write the file header ---
byte[] comment = "Copyright (C) 1999, International Business Machines Corp. and others. All Rights Reserved.".getBytes("US-ASCII");
// write the size of the header (rounded up to the next 16-byte boundary)
short headerSize = (short)(comment.length + 1 // length of comment
+ 24); // size of static header data
short realHeaderSize = (short)(headerSize + ((headerSize % 16 == 0) ? 0 : 16 - (headerSize % 16)));
writeSwappedShort(realHeaderSize, out, littleEndian);
// write magic byte values
out.write(0xda);
out.write(0x27);
// write size of core header data
writeSwappedShort((short)20, out, littleEndian);
// write reserved bytes
writeSwappedShort((short)0, out, littleEndian);
// write flag indicating whether we're big-endian
if (littleEndian) {
out.write(0);
} else {
out.write(1);
}
// write character set family code (0 means ASCII)
out.write(0);
// write size of UChar in this file
out.write(2);
// write reserved byte
out.write(0);
// write data format identifier (this is an array of bytes in ICU, so the value is NOT swapped!)
out.writeInt(0x42524b53); // ("BRKS")
// write file format version number (NOT swapped!)
out.writeInt(0);
// write data version number (NOT swapped!)
out.writeInt(0);
// write copyright notice
out.write(comment);
out.write(0);
// fill in padding bytes
while (headerSize < realHeaderSize) {
out.write(0);
++headerSize;
}
// --- write index to the file ---
// write the number of columns in the state table
writeSwappedInt(numCategories, out, littleEndian);
int fileEnd = 36;
// write the location in the file of the BreakIterator description string
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += (description.length() + 1) * 2;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the character category table's index
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += charCategoryTable.getIndexArray().length * 2;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the character category table's values array
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += charCategoryTable.getValueArray().length;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the forward state table
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += stateTable.length * 2;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the backward state table
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += backwardsStateTable.length * 2;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the endStates flags
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += endStates.length;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the location of the lookaheadStates flags
writeSwappedInt(fileEnd, out, littleEndian);
fileEnd += lookaheadStates.length;
fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4);
// write the length of the file
writeSwappedInt(fileEnd, out, littleEndian);
// --- write the actual data ---
// write description string
for (int i = 0; i < description.length(); i++)
writeSwappedShort((short)description.charAt(i), out, littleEndian);
out.writeShort(0);
if ((description.length() + 1) % 2 == 1)
out.writeShort(0);
// write character category table
char[] temp1 = charCategoryTable.getIndexArray();
for (int i = 0; i < temp1.length; i++)
writeSwappedShort((short)temp1[i], out, littleEndian);
if (temp1.length % 2 == 1)
out.writeShort(0);
byte[] temp2 = charCategoryTable.getValueArray();
out.write(temp2);
switch (temp2.length % 4) {
case 1: out.write(0);
case 2: out.write(0);
case 3: out.write(0);
default: break;
}
// write the state transition tables
for (int i = 0; i < stateTable.length; i++)
writeSwappedShort(stateTable[i], out, littleEndian);
if (stateTable.length % 2 == 1)
out.writeShort(0);
for (int i = 0; i < backwardsStateTable.length; i++)
writeSwappedShort(backwardsStateTable[i], out, littleEndian);
if (backwardsStateTable.length % 2 == 1)
out.writeShort(0);
// write the flag arrays
for (int i = 0; i < endStates.length; i++)
out.writeBoolean(endStates[i]);
switch (endStates.length % 4) {
case 1: out.write(0);
case 2: out.write(0);
case 3: out.write(0);
default: break;
}
for (int i = 0; i < lookaheadStates.length; i++)
out.writeBoolean(lookaheadStates[i]);
switch (lookaheadStates.length % 4) {
case 1: out.write(0);
case 2: out.write(0);
case 3: out.write(0);
default: break;
}
}
/**