ICU-10688 remove break type dependency from dictionaries in break iterators.

X-SVN-Rev: 40688
2017-12-04 19:27:48 +00:00 · 2017-12-04 19:27:48 +00:00 · b64c563688
commit b64c563688
parent 023e8b289f
6 changed files with 38 additions and 46 deletions
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java
@ -61,7 +61,6 @@ class BurmeseBreakEngine extends DictionaryBreakEngine {
    }

    public BurmeseBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
        setCharacters(fBurmeseWordSet);
        // Initialize dictionary
        fDictionary = DictionaryData.loadDictionaryFor("Mymr");
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java
@ -38,7 +38,6 @@ class CjkBreakEngine extends DictionaryBreakEngine {
    private DictionaryMatcher fDictionary = null;

    public CjkBreakEngine(boolean korean) throws IOException {
-        super(BreakIterator.KIND_WORD);
        fDictionary = DictionaryData.loadDictionaryFor("Hira");
        if (korean) {
            setCharacters(fHangulWordSet);
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java
@ -169,16 +169,11 @@ abstract class DictionaryBreakEngine implements LanguageBreakEngine {
    }

    UnicodeSet fSet = new UnicodeSet();
-    private BitSet fTypes = new BitSet(32);

    /**
-     * @param breakTypes The types of break iterators that can use this engine.
-     *  For example, BreakIterator.KIND_LINE
+     *  Constructor
     */
-    public DictionaryBreakEngine(Integer... breakTypes) {
-        for (Integer type: breakTypes) {
-            fTypes.set(type);
-        }
+    public DictionaryBreakEngine() {
    }

    @Override
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/KhmerBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/KhmerBreakEngine.java
@ -16,7 +16,7 @@ import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;

 class KhmerBreakEngine extends DictionaryBreakEngine {
-    
+
    // Constants for KhmerBreakIterator
    // How many words in a row are "good enough"?
    private static final byte KHMER_LOOKAHEAD = 3;
@ -29,14 +29,14 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
    private static final byte KHMER_MIN_WORD = 2;
    // Minimum number of characters for two words
    private static final byte KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
-    
-    
+
+
    private DictionaryMatcher fDictionary;
    private static UnicodeSet fKhmerWordSet;
    private static UnicodeSet fEndWordSet;
    private static UnicodeSet fBeginWordSet;
    private static UnicodeSet fMarkSet;
-    
+
    static {
        // Initialize UnicodeSets
        fKhmerWordSet = new UnicodeSet();
@ -56,42 +56,42 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
        fMarkSet.compact();
        fEndWordSet.compact();
        fBeginWordSet.compact();
-        
+
        // Freeze the static UnicodeSet
        fKhmerWordSet.freeze();
        fMarkSet.freeze();
        fEndWordSet.freeze();
        fBeginWordSet.freeze();
    }
-    
+
    public KhmerBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
        setCharacters(fKhmerWordSet);
        // Initialize dictionary
        fDictionary = DictionaryData.loadDictionaryFor("Khmr");
    }

+    @Override
    public boolean equals(Object obj) {
        // Normally is a singleton, but it's possible to have duplicates
        //   during initialization. All are equivalent.
        return obj instanceof KhmerBreakEngine;
    }

+    @Override
    public int hashCode() {
        return getClass().hashCode();
    }
- 
-    public boolean handles(int c, int breakType) {
-        if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
-            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
-            return (script == UScript.KHMER);
-        }
-        return false;
+
+    @Override
+    public boolean handles(int c) {
+        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
+        return (script == UScript.KHMER);
    }

-    public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd, 
+    @Override
+    public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
            DequeI foundBreaks) {
-               
+
        if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
            return 0;  // Not enough characters for word
        }
@ -163,7 +163,7 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
                // no preceding word, or the non-word shares less than the minimum threshold
                // of characters with a dictionary word, then scan to resynchronize
                if (words[wordsFound%KHMER_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
-                        (wordLength == 0 || 
+                        (wordLength == 0 ||
                                words[wordsFound%KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) {
                    // Look for a plausible word boundary
                    int remaining = rangeEnd - (current + wordLength);
@ -209,7 +209,7 @@ class KhmerBreakEngine extends DictionaryBreakEngine {

            // Look ahead for possible suffixes if a dictionary word does not follow.
            // We do this in code rather than using a rule so that the heuristic
-            // resynch continues to function. For example, one of the suffix characters 
+            // resynch continues to function. For example, one of the suffix characters
            // could be a typo in the middle of a word.
            // NOT CURRENTLY APPLICABLE TO KHMER

--- a/icu4j/main/classes/core/src/com/ibm/icu/text/LaoBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/LaoBreakEngine.java
@ -64,7 +64,6 @@ class LaoBreakEngine extends DictionaryBreakEngine {
    }

    public LaoBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
        setCharacters(fLaoWordSet);
        // Initialize dictionary
        fDictionary = DictionaryData.loadDictionaryFor("Laoo");
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/ThaiBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ThaiBreakEngine.java
@ -16,7 +16,7 @@ import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;

 class ThaiBreakEngine extends DictionaryBreakEngine {
-    
+
    // Constants for ThaiBreakIterator
    // How many words in a row are "good enough"?
    private static final byte THAI_LOOKAHEAD = 3;
@ -33,14 +33,14 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
    private static final byte THAI_MIN_WORD = 2;
    // Minimum number of characters for two words
    private static final byte THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;
-    
+
    private DictionaryMatcher fDictionary;
    private static UnicodeSet fThaiWordSet;
    private static UnicodeSet fEndWordSet;
    private static UnicodeSet fBeginWordSet;
    private static UnicodeSet fSuffixSet;
    private static UnicodeSet fMarkSet;
-    
+
    static {
        // Initialize UnicodeSets
        fThaiWordSet = new UnicodeSet();
@ -66,7 +66,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
        fEndWordSet.compact();
        fBeginWordSet.compact();
        fSuffixSet.compact();
-        
+
        // Freeze the static UnicodeSet
        fThaiWordSet.freeze();
        fMarkSet.freeze();
@ -74,32 +74,32 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
        fBeginWordSet.freeze();
        fSuffixSet.freeze();
    }
-    
+
    public ThaiBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
        setCharacters(fThaiWordSet);
        // Initialize dictionary
        fDictionary = DictionaryData.loadDictionaryFor("Thai");
    }
-    
+
+    @Override
    public boolean equals(Object obj) {
        // Normally is a singleton, but it's possible to have duplicates
        //   during initialization. All are equivalent.
        return obj instanceof ThaiBreakEngine;
    }

+    @Override
    public int hashCode() {
        return getClass().hashCode();
    }
-    
-    public boolean handles(int c, int breakType) {
-        if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
-            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
-            return (script == UScript.THAI);
-        }
-        return false;
+
+    @Override
+    public boolean handles(int c) {
+        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
+        return (script == UScript.THAI);
    }

+    @Override
    public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
            DequeI foundBreaks) {

@ -112,7 +112,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
        for (int i = 0; i < THAI_LOOKAHEAD; i++) {
            words[i] = new PossibleWord();
        }
-        
+
        int uc;
        fIter.setIndex(rangeStart);
        int current;
@ -156,7 +156,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
                                }
                            } while (words[(wordsFound+1)%THAI_LOOKAHEAD].backUp(fIter));
                        }
-                    } 
+                    }
                    while (words[wordsFound%THAI_LOOKAHEAD].backUp(fIter));
                    // foundBest: end of loop
                }
@ -174,7 +174,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
                // no preceding word, or the non-word shares less than the minimum threshold
                // of characters with a dictionary word, then scan to resynchronize
                if (words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
-                        (wordLength == 0 || 
+                        (wordLength == 0 ||
                                words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) {
                    // Look for a plausible word boundary
                    int remaining = rangeEnd - (current + wordLength);
@ -224,7 +224,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {

            // Look ahead for possible suffixes if a dictionary word does not follow.
            // We do this in code rather than using a rule so that the heuristic
-            // resynch continues to function. For example, one of the suffix characters 
+            // resynch continues to function. For example, one of the suffix characters
            // could be a typo in the middle of a word.
            if (fIter.getIndex() < rangeEnd && wordLength > 0) {
                if (words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&