ICU-7869 Made lazy-evaluated arguments; still a bit more cleanup to come.

X-SVN-Rev: 28507
This commit is contained in:
Mark Davis 2010-08-21 01:15:43 +00:00
parent 29fe431f2a
commit 000d3bbf9d
2 changed files with 243 additions and 114 deletions

View File

@ -25,7 +25,7 @@ import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.Normalizer2.Mode; import com.ibm.icu.text.Normalizer2.Mode;
import com.ibm.icu.util.LocaleData; import com.ibm.icu.util.LocaleData;
import com.ibm.icu.util.ULocale; import com.ibm.icu.util.ULocale;
import com.ibm.icu.text.Index.Bucket; import com.ibm.icu.text.AlphabeticIndex.Bucket;
/** /**
* A class that supports the creation of a UI index appropriate for a given language, such as: * A class that supports the creation of a UI index appropriate for a given language, such as:
@ -45,11 +45,12 @@ import com.ibm.icu.text.Index.Bucket;
* The class can generate a list of labels for use as a UI "index", that is, a list of clickable characters (or * The class can generate a list of labels for use as a UI "index", that is, a list of clickable characters (or
* character sequences) that allow the user to see a segment (bucket) of a larger "target" list. That is, each label * character sequences) that allow the user to see a segment (bucket) of a larger "target" list. That is, each label
* corresponds to a bucket in the target list, where everything in the bucket is greater than or equal to the character * corresponds to a bucket in the target list, where everything in the bucket is greater than or equal to the character
* (according to the locale's collation). Strings can be added to the index; they will be in sorted order in the right bucket. * (according to the locale's collation). Strings can be added to the index; they will be in sorted order in the right
* bucket.
* <p> * <p>
* The class also supports having buckets for strings before the first (underflow), after the last (overflow), and between * The class also supports having buckets for strings before the first (underflow), after the last (overflow), and
* scripts (inflow). For example, if the index is constructed with labels for Russian and English, Greek characters * between scripts (inflow). For example, if the index is constructed with labels for Russian and English, Greek
* would fall into an inflow bucket between the other two scripts. * characters would fall into an inflow bucket between the other two scripts.
* <p> * <p>
* <i>Example</i> * <i>Example</i>
* <p> * <p>
@ -86,7 +87,7 @@ import com.ibm.icu.text.Index.Bucket;
* if its bucket is empty. Small buckets could also be combined based on size, such as: * if its bucket is empty. Small buckets could also be combined based on size, such as:
* *
* <pre> * <pre>
* A-F G-N O-Z * <b> A-F G-N O-Z </b>
* </pre> * </pre>
* *
* <p> * <p>
@ -96,104 +97,146 @@ import com.ibm.icu.text.Index.Bucket;
* class can still be used to get the correct sorting order, but the index characters should be suppressed.</li> * class can still be used to get the correct sorting order, but the index characters should be suppressed.</li>
* <li>Additional collation parameters can be passed in as part of the locale name. For example, German plus numeric * <li>Additional collation parameters can be passed in as part of the locale name. For example, German plus numeric
* sorting would be "de@kn-true". * sorting would be "de@kn-true".
* <li>In the initial version, a limit of 100 items is placed on these lists. This may change or become configureable in * <li>In the initial version, a limit of 100 buckets is placed on these lists. This may change or become configureable in
* the future. When the limit is reached, then every nth value is removed to bring the list down below the limit.</li> * the future. When the limit is reached, then every nth value is removed to bring the list down below the limit.</li>
* </ul> * </ul>
* *
* @author markdavis * @author markdavis
* @draft ICU 4.2 * @draft ICU 4.6
* @provisional This API might change or be removed in a future release. * @provisional This API might change or be removed in a future release.
*/ */
public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> { public final class AlphabeticIndex<V extends Comparable<V>> implements Iterable<Bucket<V>> {
/**
* Internals
*/
private static final char CGJ = '\u034F'; private static final char CGJ = '\u034F';
private static final UnicodeSet ALPHABETIC = new UnicodeSet("[[:alphabetic:]-[:mark:]]"); private static final UnicodeSet ALPHABETIC = new UnicodeSet("[[:alphabetic:]-[:mark:]]");
private static final UnicodeSet HANGUL = new UnicodeSet( private static final UnicodeSet HANGUL = new UnicodeSet(
"[\uAC00 \uB098 \uB2E4 \uB77C \uB9C8 \uBC14 \uC0AC \uC544 \uC790 \uCC28 \uCE74 \uD0C0 \uD30C \uD558]"); "[\uAC00 \uB098 \uB2E4 \uB77C \uB9C8 \uBC14 \uC0AC \uC544 \uC790 \uCC28 \uCE74 \uD0C0 \uD30C \uD558]");
private static final UnicodeSet ETHIOPIC = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"); private static final UnicodeSet ETHIOPIC = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]");
private static final UnicodeSet CORE_LATIN = new UnicodeSet("[a-z]"); private static final UnicodeSet CORE_LATIN = new UnicodeSet("[a-z]");
private final RuleBasedCollator comparator; private final RuleBasedCollator comparator;
private final List<String> indexCharacters; private final List<String> firstScriptCharacters;
// for testing
private final LinkedHashMap<String, Set<String>> alreadyIn = new LinkedHashMap<String, Set<String>>(); private final LinkedHashMap<String, Set<String>> alreadyIn = new LinkedHashMap<String, Set<String>>();
private final List<String> noDistinctSorting = new ArrayList<String>(); private final List<String> noDistinctSorting = new ArrayList<String>();
private final List<String> notAlphabetic = new ArrayList<String>(); private final List<String> notAlphabetic = new ArrayList<String>();
private final List<String> firstScriptCharacters;
private Collection<Record<V>> inputList = new ArrayList<Record<V>>(); // We accumulate these as we build up the input parameters
private BucketList buckets;
private final UnicodeSet initialLabels = new UnicodeSet();
private final Collection<Record<V>> inputList = new ArrayList<Record<V>>();
// Lazy evaluated: null means that we have not built yet.
private List<String> indexCharacters;
private BucketList buckets;
private String overflowLabel = "\u2026";
private String underflowLabel = "\u2026";
private String inflowLabel = "\u2026";
/** /**
* Create the index object. * Create the index object.
* *
* @param locale * @param locale
* The locale for the index. * The locale for the index.
* @draft ICU 4.2
* @provisional This API might change or be removed in a future release.
*/
public Index(ULocale locale) {
this(locale, (RuleBasedCollator) Collator.getInstance(locale), null, null);
}
/**
* Create the index object.
*
* @param locale
* The locale to be passed.
* @param additions
* Additional characters to be added, eg A-Z for non-Latin locales.
* @draft ICU 4.6 * @draft ICU 4.6
* @provisional This API might change or be removed in a future release. * @provisional This API might change or be removed in a future release.
*/ */
public Index(ULocale locale, UnicodeSet additions) { public AlphabeticIndex(ULocale locale) {
this(locale, (RuleBasedCollator) Collator.getInstance(locale), null, additions); this(locale, (RuleBasedCollator) Collator.getInstance(locale), getIndexExemplars(locale));
}
/**
* Create the index object.
*
* @param locale
* The locale for the index.
* @param additionalLocales
* Additional characters to be added based on the index characters for those locales.
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public Index(ULocale locale, ULocale... additionalLocales) {
this(locale, (RuleBasedCollator) Collator.getInstance(locale), null, getIndexExemplars(additionalLocales));
} }
/** /**
* @internal * @internal
* @deprecated This API is ICU internal only, for testing purposes. * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
*/ */
public Index(ULocale locale, RuleBasedCollator collator, UnicodeSet exemplarChars, UnicodeSet additions) { public AlphabeticIndex(ULocale locale, RuleBasedCollator collator, UnicodeSet exemplarChars) {
comparator = (RuleBasedCollator) collator; comparator = (RuleBasedCollator) collator;
comparator.setStrength(Collator.PRIMARY); comparator.setStrength(Collator.PRIMARY);
firstScriptCharacters = FIRST_CHARS_IN_SCRIPTS;
addIndexCharacters(exemplarChars);
}
/**
* Add more index characters (aside from what are in the locale)
* @param additions additional characters to add to the index, such as A-Z.
* @return this, for chaining
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public AlphabeticIndex<V> addIndexCharacters(UnicodeSet additions) {
initialLabels.addAll(additions);
indexCharacters = null;
return this;
}
boolean[] explicitIndexChars = { true }; /**
UnicodeSet exemplars = exemplarChars != null ? exemplarChars : getIndexExemplars(locale, explicitIndexChars); * Add more index characters (aside from what are in the locale)
* @param additions additional characters to add to the index, such as those in Swedish.
if (additions != null) { * @return this, for chaining
exemplars.addAll(additions); * @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public AlphabeticIndex<V> addIndexCharacters(ULocale... additions) {
for (ULocale addition : additions) {
initialLabels.addAll(getIndexExemplars(addition));
} }
indexCharacters = null;
return this;
}
/**
* Set the overflow label
* @param overflowLabel see class description
* @return this, for chaining
*/
public AlphabeticIndex<V> setOverflowLabel(String overflowLabel) {
this.overflowLabel = overflowLabel;
return this;
}
/**
* Set the underflowLabel label
* @param underflowLabel see class description
* @return this, for chaining
*/
public AlphabeticIndex<V> setUnderflowLabel(String underflowLabel) {
this.underflowLabel = underflowLabel;
return this;
}
/**
* Set the inflowLabel label
* @param inflowLabel see class description
* @return this, for chaining
*/
public AlphabeticIndex<V> setInflowLabel(String inflowLabel) {
this.inflowLabel = inflowLabel;
return this;
}
private void initLabels() {
UnicodeSet exemplars = new UnicodeSet(initialLabels);
// first sort them, with an "best" ordering among items that are the same according // first sort them, with an "best" ordering among items that are the same according
// to the collator // to the collator
Set<String> preferenceSorting = new TreeSet<String>(new MultiComparator<Object>(comparator, // The JDK inexplicably didn't make Collators be Comparator<String>!
PREFERENCE_COMPARATOR)); Set<String> preferenceSorting = new TreeSet<String>(new MultiComparator<Object>(comparator, PREFERENCE_COMPARATOR));
exemplars.addAllTo(preferenceSorting); exemplars.addAllTo(preferenceSorting);
TreeSet<String> indexCharacterSet = new TreeSet<String>(comparator); TreeSet<String> indexCharacterSet = new TreeSet<String>(comparator);
// We nw make a sorted array of elements, uppercased // We nw make a sorted array of elements
// Some of the input may, however, be redundant. // Some of the input may, however, be redundant.
// That is, we might have c, ch, d, where "ch" sorts just like "c", "h" // That is, we might have c, ch, d, where "ch" sorts just like "c", "h"
// So we make a pass through, filtering out those cases. // So we make a pass through, filtering out those cases.
for (String item : preferenceSorting) { for (String item : preferenceSorting) {
if (!explicitIndexChars[0]) {
item = UCharacter.toUpperCase(locale, item);
}
if (indexCharacterSet.contains(item)) { if (indexCharacterSet.contains(item)) {
for (String itemAlreadyIn : indexCharacterSet) { for (String itemAlreadyIn : indexCharacterSet) {
if (comparator.compare(item, itemAlreadyIn) == 0) { if (comparator.compare(item, itemAlreadyIn) == 0) {
@ -231,22 +274,22 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
} }
} }
} }
indexCharacters = Collections.unmodifiableList(new ArrayList<String>(indexCharacterSet)); indexCharacters = Collections.unmodifiableList(new ArrayList<String>(indexCharacterSet));
firstScriptCharacters = FIRST_CHARS_IN_SCRIPTS; // TODO, use collation method when fast enough.
// firstStringsInScript(comparator); // firstStringsInScript(comparator);
buckets = new BucketList(indexCharacters); buckets = new BucketList();
} }
private static UnicodeSet getIndexExemplars(ULocale locale, boolean[] explicitIndexChars) { private static UnicodeSet getIndexExemplars(ULocale locale) {
UnicodeSet exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_INDEX); UnicodeSet exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_INDEX);
if (exemplars != null) { if (exemplars != null) {
explicitIndexChars[0] = true;
return exemplars; return exemplars;
} }
explicitIndexChars[0] = false;
// Synthesize the index exemplars
exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_STANDARD); exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_STANDARD);
// get the exemplars, and handle special cases // get the exemplars, and handle special cases
@ -270,16 +313,13 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
} }
} }
} }
return exemplars;
} UnicodeSet uppercased = new UnicodeSet();
for (String item : exemplars) {
private static UnicodeSet getIndexExemplars(ULocale... additionalLocales) { uppercased.add(UCharacter.toUpperCase(locale, item));
UnicodeSet additions = new UnicodeSet();
boolean[] explicitIndexChars = { true };
for (ULocale other : additionalLocales) {
additions.addAll(getIndexExemplars(other, explicitIndexChars));
} }
return additions;
return uppercased;
} }
/* /*
@ -305,10 +345,13 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
* Get the labels. * Get the labels.
* *
* @return A collection including the labels * @return A collection including the labels
* @draft ICU 4.2 * @draft ICU 4.6
* @provisional This API might change or be removed in a future release. * @provisional This API might change or be removed in a future release.
*/ */
public List<String> getLabels() { public List<String> getLabels() {
if (indexCharacters == null) {
initLabels();
}
return indexCharacters; return indexCharacters;
} }
@ -329,15 +372,15 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
} }
/** /**
* Get the default label used for abbreviated buckets <i>between</i> other labels. For example, consider * Get the default label used for abbreviated buckets <i>between</i> other labels. For example, consider the labels
* the labels for Latin and Greek are used: X Y Z &#x0391; &#x0392; &#x0393;. * for Latin and Greek are used: X Y Z &#x0391; &#x0392; &#x0393;.
* *
* @return inflow label * @return inflow label
* @draft ICU 4.6 * @draft ICU 4.6
* @provisional This API might change or be removed in a future release. * @provisional This API might change or be removed in a future release.
*/ */
public String getInflowLabel() { public String getInflowLabel() {
return "\u2026"; // TODO get localized version return inflowLabel; // TODO get localized version
} }
/** /**
@ -348,7 +391,7 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
* @provisional This API might change or be removed in a future release. * @provisional This API might change or be removed in a future release.
*/ */
public String getOverflowLabel() { public String getOverflowLabel() {
return "\u2026"; // TODO get localized version return overflowLabel; // TODO get localized version
} }
/** /**
@ -359,15 +402,44 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
* @provisional This API might change or be removed in a future release. * @provisional This API might change or be removed in a future release.
*/ */
public String getUnderflowLabel() { public String getUnderflowLabel() {
return "\u2026"; // TODO get localized version return underflowLabel; // TODO get localized version
} }
public Index<V> add(CharSequence key, V value) { /**
* Add a record (key and value) to the index.
*
* @param key Key, such as a name
* @param value Value, such as an address or link
* @return this, for chaining
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public AlphabeticIndex<V> add(CharSequence key, V value) {
buckets = null; // invalidate old bucketlist buckets = null; // invalidate old bucketlist
inputList.add(new Record<V>(key, value)); inputList.add(new Record<V>(key, value));
return this; return this;
} }
/**
* Clear the index.
*
* @return this, for chaining
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public AlphabeticIndex<V> clear() {
buckets = null;
inputList.clear();
return this;
}
/**
* Return the number of buckets in the index.
*
* @return number of buckets
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public int size() { public int size() {
if (buckets == null) { if (buckets == null) {
buckets = getIndexBuckets(); buckets = getIndexBuckets();
@ -375,6 +447,24 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
return buckets.bucketList.size(); return buckets.bucketList.size();
} }
/**
* Return the number of buckets in the index.
*
* @return total number of records in buckets
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public int getRecordCount() {
return inputList.size();
}
/**
* Return an iterator over the buckets.
*
* @return iterator over buckets.
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public Iterator<Bucket<V>> iterator() { public Iterator<Bucket<V>> iterator() {
if (buckets == null) { if (buckets == null) {
buckets = getIndexBuckets(); buckets = getIndexBuckets();
@ -395,7 +485,7 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
* @provisional This API might change or be removed in a future release. * @provisional This API might change or be removed in a future release.
*/ */
private BucketList getIndexBuckets() { private BucketList getIndexBuckets() {
BucketList output = new BucketList(indexCharacters); BucketList output = new BucketList();
// Set up an array of sorted intput key/value pairs // Set up an array of sorted intput key/value pairs
comparator.setStrength(Collator.TERTIARY); comparator.setStrength(Collator.TERTIARY);
@ -470,7 +560,7 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
} }
/** /**
* As the index is built, items may be discarded from the exemplars. This contains some of the discards, and is * As the index is built, strings may be discarded from the exemplars. This contains some of the discards, and is
* intended for debugging. * intended for debugging.
* *
* @internal * @internal
@ -481,7 +571,7 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
} }
/** /**
* As the index is built, items may be discarded from the exemplars. This contains some of the discards, and is * As the index is built, strings may be discarded from the exemplars. This contains some of the discards, and is
* intended for debugging. * intended for debugging.
* *
* @internal * @internal
@ -492,7 +582,7 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
} }
/** /**
* As the index is built, items may be discarded from the exemplars. This contains some of the discards, and is * As the index is built, strings may be discarded from the exemplars. This contains some of the discards, and is
* intended for debugging. * intended for debugging.
* *
* @internal * @internal
@ -507,7 +597,7 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
} }
private static final UnicodeSet IGNORE_SCRIPTS = new UnicodeSet( private static final UnicodeSet IGNORE_SCRIPTS = new UnicodeSet(
"[[:sc=Common:][:sc=inherited:][:script=Unknown:][:script=braille:]]").freeze(); "[[:sc=Common:][:sc=inherited:][:script=Unknown:][:script=braille:]]").freeze();
private static final UnicodeSet TO_TRY = new UnicodeSet("[:^nfcqc=no:]").removeAll(IGNORE_SCRIPTS).freeze(); private static final UnicodeSet TO_TRY = new UnicodeSet("[:^nfcqc=no:]").removeAll(IGNORE_SCRIPTS).freeze();
private static final List<String> FIRST_CHARS_IN_SCRIPTS = firstStringsInScript((RuleBasedCollator) Collator private static final List<String> FIRST_CHARS_IN_SCRIPTS = firstStringsInScript((RuleBasedCollator) Collator
@ -578,7 +668,7 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
private int maxCount = 99; private int maxCount = 99;
/** /**
* Comparator that returns "better" items first, where shorter NFKD is better, and otherwise NFKD binary order is * Comparator that returns "better" strings first, where shorter NFKD is better, and otherwise NFKD binary order is
* better, and otherwise binary order is better. * better, and otherwise binary order is better.
*/ */
private static class PreferenceComparator implements Comparator<Object> { private static class PreferenceComparator implements Comparator<Object> {
@ -621,27 +711,42 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
this.value = value; this.value = value;
} }
/**
* Get the key
*
* @return the key
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public CharSequence getKey() { public CharSequence getKey() {
return key; return key;
} }
/**
* Get the value
*
* @return the value
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public V getValue() { public V getValue() {
return value; return value;
} }
@Override
public String toString() { public String toString() {
return key + "=" + value; return key + "=" + value;
} }
} }
/** /**
* A "bucket", containing records sorted under an index string by getIndexBucketCharacters. Is created by the * A "bucket", containing records sorted under an index string by getIndexBucketCharacters. Is created by the
* addBucket method in BucketList. A typical implementation will provide methods getLabel(), getSpecial(), and * addBucket method in BucketList. A typical implementation will provide methods getLabel(), getSpecial(), and
* getValues().<br> * getValues().<br>
* See com.ibm.icu.dev.test.collator.IndexCharactersTest for an example. * See com.ibm.icu.dev.test.collator.IndexCharactersTest for an example.
* *
* @param <V> Value type * @param <V>
* Value type
* @draft ICU 4.6 * @draft ICU 4.6
* @provisional This API might change or be removed in a future release. * @provisional This API might change or be removed in a future release.
*/ */
@ -653,8 +758,13 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
/** /**
* Type of the label * Type of the label
*
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/ */
public enum LabelType {NORMAL, UNDERFLOW, INFLOW, OVERFLOW} public enum LabelType {
NORMAL, UNDERFLOW, INFLOW, OVERFLOW
}
/** /**
* Set up the bucket. * Set up the bucket.
@ -687,15 +797,27 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
* Is an underflow, overflow, or inflow bucket * Is an underflow, overflow, or inflow bucket
* *
* @return is an underflow, overflow, or inflow bucket * @return is an underflow, overflow, or inflow bucket
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/ */
public LabelType getLabelType() { public LabelType getLabelType() {
return labelType; return labelType;
} }
/**
* Get the number of records in the bucket.
*
* @return number of records in bucket
* @draft ICU 4.6
* @provisional This API might change or be removed in a future release.
*/
public int size() { public int size() {
return values.size(); return values.size();
} }
/**
* Iterator over the records in the bucket
*/
public Iterator<Record<V>> iterator() { public Iterator<Record<V>> iterator() {
return values.iterator(); return values.iterator();
} }
@ -704,14 +826,18 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
private class BucketList implements Iterable<Bucket<V>> { private class BucketList implements Iterable<Bucket<V>> {
private ArrayList<Bucket<V>> bucketList = new ArrayList<Bucket<V>>(); private ArrayList<Bucket<V>> bucketList = new ArrayList<Bucket<V>>();
BucketList(List<String> indexChars) { BucketList() {
// initialize indexCharacters;
getLabels();
bucketList.add(new Bucket<V>(getUnderflowLabel(), "", Bucket.LabelType.UNDERFLOW)); bucketList.add(new Bucket<V>(getUnderflowLabel(), "", Bucket.LabelType.UNDERFLOW));
// fix up the list, adding underflow, additions, overflow // fix up the list, adding underflow, additions, overflow
// insert infix labels as needed, using \uFFFF. // insert infix labels as needed, using \uFFFF.
String last = indexChars.get(0); String last = indexCharacters.get(0);
bucketList.add(new Bucket<V>(last, last, Bucket.LabelType.NORMAL)); bucketList.add(new Bucket<V>(last, last, Bucket.LabelType.NORMAL));
UnicodeSet lastSet = getScriptSet(last).removeAll(IGNORE_SCRIPTS); UnicodeSet lastSet = getScriptSet(last).removeAll(IGNORE_SCRIPTS);
for (int i = 1; i < indexCharacters.size(); ++i) { for (int i = 1; i < indexCharacters.size(); ++i) {
String current = indexCharacters.get(i); String current = indexCharacters.get(i);
UnicodeSet set = getScriptSet(current).removeAll(IGNORE_SCRIPTS); UnicodeSet set = getScriptSet(current).removeAll(IGNORE_SCRIPTS);
@ -719,7 +845,8 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
// check for adjacent // check for adjacent
String overflowComparisonString = getOverflowComparisonString(last); String overflowComparisonString = getOverflowComparisonString(last);
if (comparator.compare(overflowComparisonString, current) < 0) { if (comparator.compare(overflowComparisonString, current) < 0) {
bucketList.add(new Bucket<V>(getInflowLabel(), overflowComparisonString, Bucket.LabelType.INFLOW)); bucketList.add(new Bucket<V>(getInflowLabel(), overflowComparisonString,
Bucket.LabelType.INFLOW));
i++; i++;
lastSet = set; lastSet = set;
} }
@ -729,7 +856,9 @@ public class Index<V extends Comparable<V>> implements Iterable<Bucket<V>> {
lastSet = set; lastSet = set;
} }
String limitString = getOverflowComparisonString(last); String limitString = getOverflowComparisonString(last);
bucketList.add(new Bucket<V>(getOverflowLabel(), limitString, Bucket.LabelType.OVERFLOW)); // final, overflow bucket bucketList.add(new Bucket<V>(getOverflowLabel(), limitString, Bucket.LabelType.OVERFLOW)); // final,
// overflow
// bucket
} }
public Iterator<Bucket<V>> iterator() { public Iterator<Bucket<V>> iterator() {

View File

@ -19,7 +19,7 @@ import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.lang.UProperty; import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UScript; import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.Collator; import com.ibm.icu.text.Collator;
import com.ibm.icu.text.Index; import com.ibm.icu.text.AlphabeticIndex;
import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale; import com.ibm.icu.util.ULocale;
@ -28,7 +28,7 @@ import com.ibm.icu.util.ULocale;
* @author markdavis * @author markdavis
* *
*/ */
public class IndexTest extends TestFmwk { public class AlphabeticIndexTest extends TestFmwk {
public static Set<String> KEY_LOCALES = new LinkedHashSet(Arrays.asList( public static Set<String> KEY_LOCALES = new LinkedHashSet(Arrays.asList(
"en", "es", "de", "fr", "ja", "it", "tr", "pt", "zh", "nl", "en", "es", "de", "fr", "ja", "it", "tr", "pt", "zh", "nl",
"pl", "ar", "ru", "zh_Hant", "ko", "th", "sv", "fi", "da", "pl", "ar", "ru", "zh_Hant", "ko", "th", "sv", "fi", "da",
@ -147,11 +147,11 @@ public class IndexTest extends TestFmwk {
}; };
public static void main(String[] args) throws Exception{ public static void main(String[] args) throws Exception{
new IndexTest().run(args); new AlphabeticIndexTest().run(args);
} }
public void TestFirstCharacters() { public void TestFirstCharacters() {
Index indexCharacters = new Index(ULocale.ENGLISH); AlphabeticIndex indexCharacters = new AlphabeticIndex(ULocale.ENGLISH);
RuleBasedCollator collator = indexCharacters.getCollator(); RuleBasedCollator collator = indexCharacters.getCollator();
collator.setStrength(Collator.IDENTICAL); collator.setStrength(Collator.IDENTICAL);
List<String> firsts = indexCharacters.getFirstScriptCharacters(); List<String> firsts = indexCharacters.getFirstScriptCharacters();
@ -184,39 +184,39 @@ public class IndexTest extends TestFmwk {
//"吉田", "山田", "佐々木", "山口", "松本", "井上", "木村", "", "清水" //"吉田", "山田", "佐々木", "山口", "松本", "井上", "木村", "", "清水"
}; };
ULocale additionalLocale = ULocale.ENGLISH; ULocale additionalLocale = ULocale.ENGLISH;
StringBuilder buffer = new StringBuilder(); StringBuilder UI = new StringBuilder();
for (String[] pair : localeAndIndexCharactersLists) { for (String[] pair : localeAndIndexCharactersLists) {
ULocale desiredLocale = new ULocale(pair[0]); ULocale desiredLocale = new ULocale(pair[0]);
// Create a simple index where the values for the strings are Integers, and add the strings // Create a simple index where the values for the strings are Integers, and add the strings
Index<Integer> index = new Index<Integer>(desiredLocale, additionalLocale); AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(desiredLocale).addIndexCharacters(additionalLocale);
int counter = 0; int counter = 0;
for (String item : test) { for (String item : test) {
index.add(item, counter++); index.add(item, counter++);
} }
logln(desiredLocale + "\t" + desiredLocale.getDisplayName(ULocale.ENGLISH) + " - " + desiredLocale.getDisplayName(desiredLocale) + "\t"); logln(desiredLocale + "\t" + desiredLocale.getDisplayName(ULocale.ENGLISH) + " - " + desiredLocale.getDisplayName(desiredLocale) + "\t");
buffer.setLength(0); UI.setLength(0);
buffer.append(desiredLocale + "\t"); UI.append(desiredLocale + "\t");
boolean showAll = true; boolean showAll = true;
// Show index at top. We could skip or gray out empty buckets // Show index at top. We could skip or gray out empty buckets
for (Index.Bucket<Integer> bucket : index) { for (AlphabeticIndex.Bucket<Integer> bucket : index) {
if (showAll || bucket.size() != 0) { if (showAll || bucket.size() != 0) {
showLabelAtTopInUI(buffer, bucket.getLabel()); showLabelAtTop(UI, bucket.getLabel());
} }
} }
logln(buffer.toString()); logln(UI.toString());
// Show the buckets with their contents, skipping empty buckets // Show the buckets with their contents, skipping empty buckets
for (Index.Bucket<Integer> bucket : index) { for (AlphabeticIndex.Bucket<Integer> bucket : index) {
if (bucket.size() != 0) { if (bucket.size() != 0) {
showLabelInUIList(buffer, bucket.getLabel()); showLabelInList(UI, bucket.getLabel());
for (Index.Record<Integer> item : bucket) { for (AlphabeticIndex.Record<Integer> item : bucket) {
showIndexedItemInUI(buffer, item.getKey(), item.getValue()); showIndexedItem(UI, item.getKey(), item.getValue());
} }
logln(buffer.toString()); logln(UI.toString());
if (bucket.getLabel().equals("E")) { if (bucket.getLabel().equals("E")) {
Map<String, Integer> keys = getKeys(bucket); Map<String, Integer> keys = getKeys(bucket);
Integer count = keys.get("edgar"); Integer count = keys.get("edgar");
@ -237,22 +237,22 @@ public class IndexTest extends TestFmwk {
} }
} }
private void showLabelAtTopInUI(StringBuilder buffer, String label) { private void showLabelAtTop(StringBuilder buffer, String label) {
buffer.append(label + " "); buffer.append(label + " ");
} }
private void showIndexedItemInUI(StringBuilder buffer, CharSequence key, Integer value) { private void showIndexedItem(StringBuilder buffer, CharSequence key, Integer value) {
buffer.append("\t " + key + "" + value); buffer.append("\t " + key + "" + value);
} }
private void showLabelInUIList(StringBuilder buffer, String label) { private void showLabelInList(StringBuilder buffer, String label) {
buffer.setLength(0); buffer.setLength(0);
buffer.append(label); buffer.append(label);
} }
private Map<String,Integer> getKeys(Index.Bucket<Integer> entry) { private Map<String,Integer> getKeys(AlphabeticIndex.Bucket<Integer> entry) {
Map<String,Integer> keys = new LinkedHashMap<String,Integer>(); Map<String,Integer> keys = new LinkedHashMap<String,Integer>();
for (Index.Record x : entry) { for (AlphabeticIndex.Record x : entry) {
String key = x.getKey().toString(); String key = x.getKey().toString();
Integer old = keys.get(key); Integer old = keys.get(key);
keys.put(key, old == null ? 1 : old + 1); keys.put(key, old == null ? 1 : old + 1);
@ -264,7 +264,7 @@ public class IndexTest extends TestFmwk {
for (String[] localeAndIndexCharacters : localeAndIndexCharactersLists) { for (String[] localeAndIndexCharacters : localeAndIndexCharactersLists) {
ULocale locale = new ULocale(localeAndIndexCharacters[0]); ULocale locale = new ULocale(localeAndIndexCharacters[0]);
String expectedIndexCharacters = localeAndIndexCharacters[1]; String expectedIndexCharacters = localeAndIndexCharacters[1];
Collection<String> indexCharacters = new Index(locale).getLabels(); Collection<String> indexCharacters = new AlphabeticIndex(locale).getLabels();
// Join the elements of the list to a string with delimiter ":" // Join the elements of the list to a string with delimiter ":"
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -310,7 +310,7 @@ public class IndexTest extends TestFmwk {
if (locale.getCountry().length() != 0) { if (locale.getCountry().length() != 0) {
continue; continue;
} }
Index indexCharacters = new Index(locale); AlphabeticIndex indexCharacters = new AlphabeticIndex(locale);
final Collection mainChars = indexCharacters.getLabels(); final Collection mainChars = indexCharacters.getLabels();
String mainCharString = mainChars.toString(); String mainCharString = mainChars.toString();
if (mainCharString.length() > 500) { if (mainCharString.length() > 500) {