ICU-7264 Latin-1 special can result in CE_NOT_FOUND_; fall back to UCA, leave TODO to avoid this in the builder
X-SVN-Rev: 28897
This commit is contained in:
parent
d9de6496a8
commit
acafffb5d9
@ -282,34 +282,37 @@ public final class CollationElementIterator
|
||||
if (ch <= 0xFF) {
|
||||
// For latin-1 characters we never need to fall back to the UCA
|
||||
// table because all of the UCA data is replicated in the
|
||||
// latinOneMapping array
|
||||
// latinOneMapping array.
|
||||
// Except: Special CEs can result in CE_NOT_FOUND_,
|
||||
// for example if the default entry for a prefix-special is "not found",
|
||||
// and we do need to fall back to the UCA in such a case.
|
||||
// TODO: It would be better if tailoring specials never resulted in "not found"
|
||||
// unless the corresponding UCA result is also "not found".
|
||||
// That would require a change in the ICU4J collator-from-rule builder.
|
||||
result = m_collator_.m_trie_.getLatin1LinearValue(ch);
|
||||
if (RuleBasedCollator.isSpecial(result)) {
|
||||
result = nextSpecial(m_collator_, result, ch);
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
result = m_collator_.m_trie_.getLeadValue(ch);
|
||||
//System.out.println(Integer.toHexString(result));
|
||||
if (RuleBasedCollator.isSpecial(result)) {
|
||||
// surrogate leads are handled as special ces
|
||||
}
|
||||
if (!RuleBasedCollator.isSpecial(result)) {
|
||||
return result;
|
||||
}
|
||||
if (result != CE_NOT_FOUND_) {
|
||||
result = nextSpecial(m_collator_, result, ch);
|
||||
}
|
||||
if (result == CE_NOT_FOUND_ && RuleBasedCollator.UCA_ != null) {
|
||||
if (result == CE_NOT_FOUND_) {
|
||||
// couldn't find a good CE in the tailoring
|
||||
// if we got here, the codepoint MUST be over 0xFF - so we look
|
||||
// directly in the UCA
|
||||
if (RuleBasedCollator.UCA_ != null) {
|
||||
result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
|
||||
if (RuleBasedCollator.isSpecial(result)) {
|
||||
// UCA also gives us a special CE
|
||||
result = nextSpecial(RuleBasedCollator.UCA_, result, ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(result == CE_NOT_FOUND_) {
|
||||
// maybe there is no UCA, unlikely in Java, but ported for consistency
|
||||
result = nextImplicit(ch);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -324,20 +324,17 @@ public class CollationMiscTest extends TestFmwk {
|
||||
String prettify(CollationKey sourceKey) {
|
||||
int i;
|
||||
byte[] bytes= sourceKey.toByteArray();
|
||||
String target = "[";
|
||||
StringBuilder target = new StringBuilder("[");
|
||||
|
||||
for (i = 0; i < bytes.length; i++) {
|
||||
String numStr = Integer.toHexString(bytes[i]);
|
||||
if (numStr.length()>2) {
|
||||
target += numStr.substring(numStr.length()-2);
|
||||
String numStr = Integer.toHexString(bytes[i] & 0xff);
|
||||
if (numStr.length() < 2) {
|
||||
target.append('0');
|
||||
}
|
||||
else {
|
||||
target += numStr;
|
||||
target.append(numStr).append(' ');
|
||||
}
|
||||
target += " ";
|
||||
}
|
||||
target += "]";
|
||||
return target;
|
||||
target.append(']');
|
||||
return target.toString();
|
||||
}
|
||||
|
||||
public void TestBeforePrefixFailure() {
|
||||
@ -2461,8 +2458,8 @@ public class CollationMiscTest extends TestFmwk {
|
||||
if (j>0) {
|
||||
CollationKey prevKey = en.getCollationKey(cases[j-1]);
|
||||
if (key.compareTo(prevKey)<0) {
|
||||
errln("Error! EN test["+j+"]:"+"source:" + cases[j]+
|
||||
"is not greater than previous test.");
|
||||
errln("Error! EN test["+j+"]:source:" + cases[j]+
|
||||
" is not >= previous test string.");
|
||||
}
|
||||
}
|
||||
/*
|
||||
@ -2474,7 +2471,7 @@ public class CollationMiscTest extends TestFmwk {
|
||||
logln("String:"+cases[j]+" Key:"+ prettify(key));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
warnln("Error creating Vietnese collator");
|
||||
warnln("Error creating English collator");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2489,14 +2486,14 @@ public class CollationMiscTest extends TestFmwk {
|
||||
if (j>0) {
|
||||
CollationKey prevKey = ja.getCollationKey(cases[j-1]);
|
||||
if (key.compareTo(prevKey)<0) {
|
||||
errln("Error! JA test["+j+"]:"+"source:" + cases[j]+
|
||||
"is not greater than previous test.");
|
||||
errln("Error! JA test["+j+"]:source:" + cases[j]+
|
||||
" is not >= previous test string.");
|
||||
}
|
||||
}
|
||||
logln("String:"+cases[j]+" Key:"+ prettify(key));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
warnln("Error creating Vietnese collator");
|
||||
warnln("Error creating Japanese collator");
|
||||
return;
|
||||
}
|
||||
for(int i = 0; i < rules.length; i++) {
|
||||
@ -2507,6 +2504,7 @@ public class CollationMiscTest extends TestFmwk {
|
||||
coll = new RuleBasedCollator(rules[i]);
|
||||
} catch (Exception e) {
|
||||
warnln("Unable to open collator with rules " + rules[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int j=0; j<cases.length; j++) {
|
||||
@ -2515,14 +2513,14 @@ public class CollationMiscTest extends TestFmwk {
|
||||
CollationKey prevKey = coll.getCollationKey(cases[j-1]);
|
||||
if (i==1 && j==3) {
|
||||
if (key.compareTo(prevKey)>0) {
|
||||
errln("Error! Rule:"+rules[i]+" test["+j+"]:"+"source:"+
|
||||
cases[j]+"is not greater than previous test.");
|
||||
errln("Error! Rule:"+rules[i]+" test["+j+"]:source:"+
|
||||
cases[j]+" is not <= previous test string.");
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (key.compareTo(prevKey)<0) {
|
||||
errln("Error! Rule:"+rules[i]+" test["+j+"]:"+"source:"+
|
||||
cases[j]+"is not greater than previous test.");
|
||||
errln("Error! Rule:"+rules[i]+" test["+j+"]:source:"+
|
||||
cases[j]+" is not >= previous test string.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user