ICU-1533 incorporate Mark's review comments; remove special Jamo handling

X-SVN-Rev: 7057
2001-11-21 20:57:08 +00:00 · 2001-11-21 20:57:08 +00:00 · f61acd7ed7
commit f61acd7ed7
parent 3912ae3f89
2 changed files with 18 additions and 86 deletions
--- a/icu4j/src/com/ibm/icu/text/NormalizationTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/NormalizationTransliterator.java
@ -13,7 +13,7 @@ import java.util.*;

 /**
 * @author Alan Liu
- * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.9 $ $Date: 2001/11/17 20:56:13 $
+ * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.10 $ $Date: 2001/11/21 20:57:08 $
 */
 class NormalizationTransliterator extends Transliterator {

@ -33,6 +33,8 @@ class NormalizationTransliterator extends Transliterator {
     * effectively consider these to be cc!=0, for our purposes.
     *
     * From http://www.macchiato.com/utc/NFUnsafeStart-3.1.1dX.txt
+     *
+     * TODO Update this to 4 separate sets, one for each norm. form.
     */
    static final UnicodeSet UNSAFE_START = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2\u3133\u3135-\u3136\u313A-\u313F\u314F-\u3163\uFF9E-\uFF9F\uFFA3\uFFA5-\uFFA6\uFFAA-\uFFAF\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]", false);

@ -111,51 +113,15 @@ class NormalizationTransliterator extends Transliterator {
        // character b, and pass everything from the start up to the
        // character before b to normalizer.
        if (isIncremental) {
-            // Wrinkle: Jamo has a combining class of zero, but we
-            // don't want to normalize individual Jamo one at a time
-            // if we're composing incrementally.  If we are composing
-            // in incremental mode then we collect up trailing jamo
-            // and save them for next time.
-            boolean doStandardBackup = true;
-            if (mode.compose()) {
-                // As a minor optimization, if there are three or more
-                // trailing jamo, we let the first three through --
-                // these should be handled correctly.
-                char c;
-                while (limit > offsets.start &&
-                       (c=text.charAt(limit-1)) >= 0x1100 &&
-                       c < 0x1200) {
-                    --limit;
-                }
-                // Characters in [limit, offsets.limit) are jamo.
-                // If we have at least 3 jamo, then allow them
-                // to be transliterated.  If we have zero jamo,
-                // then proceed as usual.
-                if (limit < offsets.limit) {
-                    if ((offsets.limit - limit) >= 3) {
-                        limit += 3;
-                    }
-                    doStandardBackup = false;
-                }
-            }
-
-            // A standard backup consists of finding the last
-            // character with cc==0 and passing everything from the
-            // start up to (but not including) that character to
-            // normalizer.  We use the UNSAFE_START set to detect
-            // characters with cc==0 that we want to treat as if they
-            // have cc!=0 (see above).
-            if (doStandardBackup) {
+            --limit;
+            char c;
+            while (limit > start &&
+                   (UCharacter.getCombiningClass(c=text.charAt(limit)) != 0 ||
+                    UNSAFE_START.contains(c))) {
                --limit;
-                char c;
-                while (limit > start &&
-                       (UCharacter.getCombiningClass(c=text.charAt(limit)) != 0 ||
-                        UNSAFE_START.contains(c))) {
-                    --limit;
-                }
            }
        }
-
+        
        if (limit > start) {
            char chars[] = new char[limit - start];
            text.getChars(start, limit, chars, 0);
--- a/icu4j/src/com/ibm/text/NormalizationTransliterator.java
+++ b/icu4j/src/com/ibm/text/NormalizationTransliterator.java
@ -13,7 +13,7 @@ import java.util.*;

 /**
 * @author Alan Liu
- * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.9 $ $Date: 2001/11/17 20:56:13 $
+ * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.10 $ $Date: 2001/11/21 20:57:08 $
 */
 class NormalizationTransliterator extends Transliterator {

@ -33,6 +33,8 @@ class NormalizationTransliterator extends Transliterator {
     * effectively consider these to be cc!=0, for our purposes.
     *
     * From http://www.macchiato.com/utc/NFUnsafeStart-3.1.1dX.txt
+     *
+     * TODO Update this to 4 separate sets, one for each norm. form.
     */
    static final UnicodeSet UNSAFE_START = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2\u3133\u3135-\u3136\u313A-\u313F\u314F-\u3163\uFF9E-\uFF9F\uFFA3\uFFA5-\uFFA6\uFFAA-\uFFAF\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]", false);

@ -111,51 +113,15 @@ class NormalizationTransliterator extends Transliterator {
        // character b, and pass everything from the start up to the
        // character before b to normalizer.
        if (isIncremental) {
-            // Wrinkle: Jamo has a combining class of zero, but we
-            // don't want to normalize individual Jamo one at a time
-            // if we're composing incrementally.  If we are composing
-            // in incremental mode then we collect up trailing jamo
-            // and save them for next time.
-            boolean doStandardBackup = true;
-            if (mode.compose()) {
-                // As a minor optimization, if there are three or more
-                // trailing jamo, we let the first three through --
-                // these should be handled correctly.
-                char c;
-                while (limit > offsets.start &&
-                       (c=text.charAt(limit-1)) >= 0x1100 &&
-                       c < 0x1200) {
-                    --limit;
-                }
-                // Characters in [limit, offsets.limit) are jamo.
-                // If we have at least 3 jamo, then allow them
-                // to be transliterated.  If we have zero jamo,
-                // then proceed as usual.
-                if (limit < offsets.limit) {
-                    if ((offsets.limit - limit) >= 3) {
-                        limit += 3;
-                    }
-                    doStandardBackup = false;
-                }
-            }
-
-            // A standard backup consists of finding the last
-            // character with cc==0 and passing everything from the
-            // start up to (but not including) that character to
-            // normalizer.  We use the UNSAFE_START set to detect
-            // characters with cc==0 that we want to treat as if they
-            // have cc!=0 (see above).
-            if (doStandardBackup) {
+            --limit;
+            char c;
+            while (limit > start &&
+                   (UCharacter.getCombiningClass(c=text.charAt(limit)) != 0 ||
+                    UNSAFE_START.contains(c))) {
                --limit;
-                char c;
-                while (limit > start &&
-                       (UCharacter.getCombiningClass(c=text.charAt(limit)) != 0 ||
-                        UNSAFE_START.contains(c))) {
-                    --limit;
-                }
            }
        }
-
+        
        if (limit > start) {
            char chars[] = new char[limit - start];
            text.getChars(start, limit, chars, 0);