ICU-7264 Latin-1 special can result in CE_NOT_FOUND_; fall back to UCA, leave TODO to avoid this in the builder

X-SVN-Rev: 28897
This commit is contained in:
Markus Scherer 2010-10-24 18:45:46 +00:00
parent d9de6496a8
commit acafffb5d9
2 changed files with 40 additions and 39 deletions

View File

@ -282,33 +282,36 @@ public final class CollationElementIterator
if (ch <= 0xFF) {
// For latin-1 characters we never need to fall back to the UCA
// table because all of the UCA data is replicated in the
// latinOneMapping array
// latinOneMapping array.
// Except: Special CEs can result in CE_NOT_FOUND_,
// for example if the default entry for a prefix-special is "not found",
// and we do need to fall back to the UCA in such a case.
// TODO: It would be better if tailoring specials never resulted in "not found"
// unless the corresponding UCA result is also "not found".
// That would require a change in the ICU4J collator-from-rule builder.
result = m_collator_.m_trie_.getLatin1LinearValue(ch);
if (RuleBasedCollator.isSpecial(result)) {
result = nextSpecial(m_collator_, result, ch);
}
}
else {
} else {
result = m_collator_.m_trie_.getLeadValue(ch);
//System.out.println(Integer.toHexString(result));
if (RuleBasedCollator.isSpecial(result)) {
// surrogate leads are handled as special ces
result = nextSpecial(m_collator_, result, ch);
}
if (result == CE_NOT_FOUND_ && RuleBasedCollator.UCA_ != null) {
// couldn't find a good CE in the tailoring
// if we got here, the codepoint MUST be over 0xFF - so we look
// directly in the UCA
}
if (!RuleBasedCollator.isSpecial(result)) {
return result;
}
if (result != CE_NOT_FOUND_) {
result = nextSpecial(m_collator_, result, ch);
}
if (result == CE_NOT_FOUND_) {
// couldn't find a good CE in the tailoring
if (RuleBasedCollator.UCA_ != null) {
result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
if (RuleBasedCollator.isSpecial(result)) {
// UCA also gives us a special CE
result = nextSpecial(RuleBasedCollator.UCA_, result, ch);
}
}
}
if(result == CE_NOT_FOUND_) {
// maybe there is no UCA, unlikely in Java, but ported for consistency
result = nextImplicit(ch);
if(result == CE_NOT_FOUND_) {
// maybe there is no UCA, unlikely in Java, but ported for consistency
result = nextImplicit(ch);
}
}
return result;
}

View File

@ -324,20 +324,17 @@ public class CollationMiscTest extends TestFmwk {
String prettify(CollationKey sourceKey) {
int i;
byte[] bytes= sourceKey.toByteArray();
String target = "[";
StringBuilder target = new StringBuilder("[");
for (i = 0; i < bytes.length; i++) {
String numStr = Integer.toHexString(bytes[i]);
if (numStr.length()>2) {
target += numStr.substring(numStr.length()-2);
String numStr = Integer.toHexString(bytes[i] & 0xff);
if (numStr.length() < 2) {
target.append('0');
}
else {
target += numStr;
}
target += " ";
target.append(numStr).append(' ');
}
target += "]";
return target;
target.append(']');
return target.toString();
}
public void TestBeforePrefixFailure() {
@ -2461,8 +2458,8 @@ public class CollationMiscTest extends TestFmwk {
if (j>0) {
CollationKey prevKey = en.getCollationKey(cases[j-1]);
if (key.compareTo(prevKey)<0) {
errln("Error! EN test["+j+"]:"+"source:" + cases[j]+
"is not greater than previous test.");
errln("Error! EN test["+j+"]:source:" + cases[j]+
" is not >= previous test string.");
}
}
/*
@ -2474,7 +2471,7 @@ public class CollationMiscTest extends TestFmwk {
logln("String:"+cases[j]+" Key:"+ prettify(key));
}
} catch (Exception e) {
warnln("Error creating Vietnese collator");
warnln("Error creating English collator");
return;
}
@ -2489,14 +2486,14 @@ public class CollationMiscTest extends TestFmwk {
if (j>0) {
CollationKey prevKey = ja.getCollationKey(cases[j-1]);
if (key.compareTo(prevKey)<0) {
errln("Error! JA test["+j+"]:"+"source:" + cases[j]+
"is not greater than previous test.");
errln("Error! JA test["+j+"]:source:" + cases[j]+
" is not >= previous test string.");
}
}
logln("String:"+cases[j]+" Key:"+ prettify(key));
}
} catch (Exception e) {
warnln("Error creating Vietnese collator");
warnln("Error creating Japanese collator");
return;
}
for(int i = 0; i < rules.length; i++) {
@ -2507,6 +2504,7 @@ public class CollationMiscTest extends TestFmwk {
coll = new RuleBasedCollator(rules[i]);
} catch (Exception e) {
warnln("Unable to open collator with rules " + rules[i]);
continue;
}
for (int j=0; j<cases.length; j++) {
@ -2515,14 +2513,14 @@ public class CollationMiscTest extends TestFmwk {
CollationKey prevKey = coll.getCollationKey(cases[j-1]);
if (i==1 && j==3) {
if (key.compareTo(prevKey)>0) {
errln("Error! Rule:"+rules[i]+" test["+j+"]:"+"source:"+
cases[j]+"is not greater than previous test.");
errln("Error! Rule:"+rules[i]+" test["+j+"]:source:"+
cases[j]+" is not <= previous test string.");
}
}
else {
if (key.compareTo(prevKey)<0) {
errln("Error! Rule:"+rules[i]+" test["+j+"]:"+"source:"+
cases[j]+"is not greater than previous test.");
errln("Error! Rule:"+rules[i]+" test["+j+"]:source:"+
cases[j]+" is not >= previous test string.");
}
}
}