ICU-7264 Latin-1 special can result in CE_NOT_FOUND_; fall back to UCA, leave TODO to avoid this in the builder

X-SVN-Rev: 28897
2010-10-24 18:45:46 +00:00 · 2010-10-24 18:45:46 +00:00 · acafffb5d9
commit acafffb5d9
parent d9de6496a8
2 changed files with 40 additions and 39 deletions
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java
@ -282,34 +282,37 @@ public final class CollationElementIterator
        if (ch <= 0xFF) {
            // For latin-1 characters we never need to fall back to the UCA
            // table because all of the UCA data is replicated in the
-            // latinOneMapping array
+            // latinOneMapping array.
+            // Except: Special CEs can result in CE_NOT_FOUND_,
+            // for example if the default entry for a prefix-special is "not found",
+            // and we do need to fall back to the UCA in such a case.
+            // TODO: It would be better if tailoring specials never resulted in "not found"
+            // unless the corresponding UCA result is also "not found".
+            // That would require a change in the ICU4J collator-from-rule builder.
            result = m_collator_.m_trie_.getLatin1LinearValue(ch);
-            if (RuleBasedCollator.isSpecial(result)) {
-                result = nextSpecial(m_collator_, result, ch);
-            }
-        }
-        else {
+        } else {
            result = m_collator_.m_trie_.getLeadValue(ch);
-            //System.out.println(Integer.toHexString(result));
-            if (RuleBasedCollator.isSpecial(result)) {
-                // surrogate leads are handled as special ces
+        }
+        if (!RuleBasedCollator.isSpecial(result)) {
+            return result;
+        }
+        if (result != CE_NOT_FOUND_) {
            result = nextSpecial(m_collator_, result, ch);
        }
-            if (result == CE_NOT_FOUND_ && RuleBasedCollator.UCA_ != null) {
+        if (result == CE_NOT_FOUND_) {
            // couldn't find a good CE in the tailoring
-                // if we got here, the codepoint MUST be over 0xFF - so we look
-                // directly in the UCA
+            if (RuleBasedCollator.UCA_ != null) {
                result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
                if (RuleBasedCollator.isSpecial(result)) {
                    // UCA also gives us a special CE
                    result = nextSpecial(RuleBasedCollator.UCA_, result, ch);
                }
            }
-        }
            if(result == CE_NOT_FOUND_) { 
                // maybe there is no UCA, unlikely in Java, but ported for consistency
                result = nextImplicit(ch); 
            }
+        }
        return result;
    }

--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java
@ -324,20 +324,17 @@ public class CollationMiscTest extends TestFmwk {
    String prettify(CollationKey sourceKey) {
        int i;
        byte[] bytes= sourceKey.toByteArray();
-        String target = "[";
+        StringBuilder target = new StringBuilder("[");

        for (i = 0; i < bytes.length; i++) {
-            String numStr = Integer.toHexString(bytes[i]);
-            if (numStr.length()>2) {
-                target += numStr.substring(numStr.length()-2);
+            String numStr = Integer.toHexString(bytes[i] & 0xff);
+            if (numStr.length() < 2) {
+                target.append('0');
            }
-            else {
-                target += numStr;
+            target.append(numStr).append(' ');
        }
-            target += " ";
-        }
-        target += "]";
-        return target;
+        target.append(']');
+        return target.toString();
    }

    public void TestBeforePrefixFailure() {
@ -2461,8 +2458,8 @@ public class CollationMiscTest extends TestFmwk {
                if (j>0) {
                    CollationKey prevKey = en.getCollationKey(cases[j-1]);
                    if (key.compareTo(prevKey)<0) {
-                        errln("Error! EN test["+j+"]:"+"source:" + cases[j]+
-                        "is not greater than previous test.");
+                        errln("Error! EN test["+j+"]:source:" + cases[j]+
+                        " is not >= previous test string.");
                    }
                }
                /*
@ -2474,7 +2471,7 @@ public class CollationMiscTest extends TestFmwk {
                logln("String:"+cases[j]+"   Key:"+  prettify(key));
            }
        } catch (Exception e) {
-            warnln("Error creating Vietnese collator");
+            warnln("Error creating English collator");
            return;
        }

@ -2489,14 +2486,14 @@ public class CollationMiscTest extends TestFmwk {
                if (j>0) {
                    CollationKey prevKey = ja.getCollationKey(cases[j-1]);
                    if (key.compareTo(prevKey)<0) {
-                        errln("Error! JA test["+j+"]:"+"source:" + cases[j]+
-                        "is not greater than previous test.");
+                        errln("Error! JA test["+j+"]:source:" + cases[j]+
+                        " is not >= previous test string.");
                    }
                }
                logln("String:"+cases[j]+"   Key:"+  prettify(key));
            }
        } catch (Exception e) {
-            warnln("Error creating Vietnese collator");
+            warnln("Error creating Japanese collator");
            return;
        }
        for(int i = 0; i < rules.length; i++) {
@ -2507,6 +2504,7 @@ public class CollationMiscTest extends TestFmwk {
                coll = new RuleBasedCollator(rules[i]);
            } catch (Exception e) {
                warnln("Unable to open collator with rules " + rules[i]);
+                continue;
            }

            for (int j=0; j<cases.length; j++) {
@ -2515,14 +2513,14 @@ public class CollationMiscTest extends TestFmwk {
                    CollationKey prevKey = coll.getCollationKey(cases[j-1]);
                    if (i==1 && j==3) {
                        if (key.compareTo(prevKey)>0) {
-                            errln("Error! Rule:"+rules[i]+" test["+j+"]:"+"source:"+
-                            cases[j]+"is not greater than previous test.");
+                            errln("Error! Rule:"+rules[i]+" test["+j+"]:source:"+
+                            cases[j]+" is not <= previous test string.");
                        }
                    }
                    else {
                        if (key.compareTo(prevKey)<0) {
-                            errln("Error! Rule:"+rules[i]+" test["+j+"]:"+"source:"+
-                            cases[j]+"is not greater than previous test.");
+                            errln("Error! Rule:"+rules[i]+" test["+j+"]:source:"+
+                            cases[j]+" is not >= previous test string.");
                        }
                    }
                }