ICU-20373 simpler state saving for Java string tries via long not object

This commit is contained in:
Markus Scherer 2019-02-07 11:56:14 -08:00
parent f46605a437
commit ecd0591521
4 changed files with 170 additions and 0 deletions

View File

@ -86,6 +86,40 @@ public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
return this;
}
/**
* Returns the state of this trie as a 64-bit integer.
* The state value is never 0.
*
* @return opaque state value
* @see #resetToState64
* @draft ICU 64
* @provisional This API might change or be removed in a future release.
*/
public long getState64() {
return ((long)remainingMatchLength_ << 32) | pos_;
}
/**
* Resets this trie to the saved state.
* Unlike {@link #resetToState(State)}, the 64-bit state value
* must be from {@link #getState64()} from the same trie object or
* from one initialized the exact same way.
* Because of no validation, this method is faster.
*
* @param state The opaque trie state value from getState64().
* @return this
* @see #getState64
* @see #resetToState
* @see #reset
* @draft ICU 64
* @provisional This API might change or be removed in a future release.
*/
public BytesTrie resetToState64(long state) {
remainingMatchLength_ = (int)(state >> 32);
pos_ = (int)state;
return this;
}
/**
* BytesTrie state object, for saving a trie's current state
* and resetting the trie back to this state later.
@ -120,6 +154,8 @@ public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
/**
* Resets this trie to the saved state.
* Slower than {@link #resetToState64(long)} which does not validate the state value.
*
* @param state The State object which holds a saved trie state.
* @return this
* @throws IllegalArgumentException if the state object contains no state,

View File

@ -89,6 +89,40 @@ public final class CharsTrie implements Cloneable, Iterable<CharsTrie.Entry> {
return this;
}
/**
* Returns the state of this trie as a 64-bit integer.
* The state value is never 0.
*
* @return opaque state value
* @see #resetToState64
* @draft ICU 64
* @provisional This API might change or be removed in a future release.
*/
public long getState64() {
return ((long)remainingMatchLength_ << 32) | pos_;
}
/**
* Resets this trie to the saved state.
* Unlike {@link #resetToState(State)}, the 64-bit state value
* must be from {@link #getState64()} from the same trie object or
* from one initialized the exact same way.
* Because of no validation, this method is faster.
*
* @param state The opaque trie state value from getState64().
* @return this
* @see #getState64
* @see #resetToState
* @see #reset
* @draft ICU 64
* @provisional This API might change or be removed in a future release.
*/
public CharsTrie resetToState64(long state) {
remainingMatchLength_ = (int)(state >> 32);
pos_ = (int)state;
return this;
}
/**
* CharsTrie state object, for saving a trie's current state
* and resetting the trie back to this state later.
@ -123,6 +157,8 @@ public final class CharsTrie implements Cloneable, Iterable<CharsTrie.Entry> {
/**
* Resets this trie to the saved state.
* Slower than {@link #resetToState64(long)} which does not validate the state value.
*
* @param state The State object which holds a saved trie state.
* @return this
* @throws IllegalArgumentException if the state object contains no state,

View File

@ -547,6 +547,7 @@ public class BytesTrieTest extends TestFmwk {
checkFirst(trie, data, dataLength);
checkNext(trie, data, dataLength);
checkNextWithState(trie, data, dataLength);
checkNextWithState64(trie, data, dataLength);
checkNextString(trie, data, dataLength);
checkIterator(trie, data, dataLength);
}
@ -739,6 +740,54 @@ public class BytesTrieTest extends TestFmwk {
}
}
private void checkNextWithState64(BytesTrie trie, StringAndValue data[], int dataLength) {
assertNotEquals("trie(initial state).getState64()!=0", 0, trie.getState64());
for(int i=0; i<dataLength; ++i) {
byte[] expectedString=data[i].bytes;
int stringLength=data[i].s.length();
int partialLength=stringLength/3;
for(int j=0; j<partialLength; ++j) {
if(!trie.next(expectedString[j]).matches()) {
errln("trie.next()=BytesTrie.Result.NO_MATCH for a prefix of "+data[i].s);
return;
}
}
long state = trie.getState64();
assertNotEquals("trie.getState64()!=0", 0, state);
BytesTrie.Result resultAtState=trie.current();
BytesTrie.Result result;
int valueAtState=-99;
if(resultAtState.hasValue()) {
valueAtState=trie.getValue();
}
result=trie.next(0); // mismatch
if(result!=BytesTrie.Result.NO_MATCH || result!=trie.current()) {
errln("trie.next(0) matched after part of "+data[i].s);
}
if( resultAtState!=trie.resetToState64(state).current() ||
(resultAtState.hasValue() && valueAtState!=trie.getValue())
) {
errln("trie.next(part of "+data[i].s+") changes current()/getValue() after "+
"saveState/next(0)/resetToState");
} else if(!(result=trie.next(expectedString, partialLength, stringLength)).hasValue() ||
result!=trie.current()) {
errln("trie.next(rest of "+data[i].s+") does not seem to contain "+data[i].s+" after "+
"saveState/next(0)/resetToState");
} else if(!(result=trie.resetToState64(state).
next(expectedString, partialLength, stringLength)).hasValue() ||
result!=trie.current()) {
errln("trie does not seem to contain "+data[i].s+
" after saveState/next(rest)/resetToState");
} else if(trie.getValue()!=data[i].value) {
errln(String.format("trie value for %s is %d=0x%x instead of expected %d=0x%x",
data[i].s,
trie.getValue(), trie.getValue(),
data[i].value, data[i].value));
}
trie.reset();
}
}
// next(string) is also tested in other functions,
// but here we try to go partway through the string, and then beyond it.
private void checkNextString(BytesTrie trie, StringAndValue data[], int dataLength) {

View File

@ -674,6 +674,7 @@ public class CharsTrieTest extends TestFmwk {
checkFirst(trie, data, dataLength);
checkNext(trie, data, dataLength);
checkNextWithState(trie, data, dataLength);
checkNextWithState64(trie, data, dataLength);
checkNextString(trie, data, dataLength);
checkIterator(trie, data, dataLength);
}
@ -885,6 +886,54 @@ public class CharsTrieTest extends TestFmwk {
}
}
private void checkNextWithState64(CharsTrie trie, StringAndValue[] data, int dataLength) {
assertNotEquals("trie(initial state).getState64()!=0", 0, trie.getState64());
for(int i=0; i<dataLength; ++i) {
String expectedString=data[i].s;
int stringLength=expectedString.length();
int partialLength=stringLength/3;
for(int j=0; j<partialLength; ++j) {
if(!trie.next(expectedString.charAt(j)).matches()) {
errln("trie.next()=BytesTrie.Result.NO_MATCH for a prefix of "+data[i].s);
return;
}
}
long state = trie.getState64();
assertNotEquals("trie.getState64()!=0", 0, state);
BytesTrie.Result resultAtState=trie.current();
BytesTrie.Result result;
int valueAtState=-99;
if(resultAtState.hasValue()) {
valueAtState=trie.getValue();
}
result=trie.next(0); // mismatch
if(result!=BytesTrie.Result.NO_MATCH || result!=trie.current()) {
errln("trie.next(0) matched after part of "+data[i].s);
}
if( resultAtState!=trie.resetToState64(state).current() ||
(resultAtState.hasValue() && valueAtState!=trie.getValue())
) {
errln("trie.next(part of "+data[i].s+") changes current()/getValue() after "+
"saveState/next(0)/resetToState");
} else if(!(result=trie.next(expectedString, partialLength, stringLength)).hasValue() ||
result!=trie.current()) {
errln("trie.next(rest of "+data[i].s+") does not seem to contain "+data[i].s+" after "+
"saveState/next(0)/resetToState");
} else if(!(result=trie.resetToState64(state).
next(expectedString, partialLength, stringLength)).hasValue() ||
result!=trie.current()) {
errln("trie does not seem to contain "+data[i].s+
" after saveState/next(rest)/resetToState");
} else if(trie.getValue()!=data[i].value) {
errln(String.format("trie value for %s is %d=0x%x instead of expected %d=0x%x",
data[i].s,
trie.getValue(), trie.getValue(),
data[i].value, data[i].value));
}
trie.reset();
}
}
// next(string) is also tested in other functions,
// but here we try to go partway through the string, and then beyond it.
private void checkNextString(CharsTrie trie, StringAndValue[] data, int dataLength) {