ICU-12410 back to non-static CaseMap with options but no locale; immutable class with setters that return new/other objects
X-SVN-Rev: 39636
This commit is contained in:
parent
f735836e1e
commit
22702a020d
@ -4958,12 +4958,11 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
// Good if no or few changes.
|
||||
// Bad (slow) if many changes.
|
||||
Edits edits = new Edits();
|
||||
StringBuilder replacementChars = com.ibm.icu.text.CaseMap.toLower(
|
||||
locale, com.ibm.icu.text.CaseMap.OMIT_UNCHANGED_TEXT, str,
|
||||
new StringBuilder(), edits);
|
||||
StringBuilder replacementChars = com.ibm.icu.text.CaseMap.toLower().omitUnchangedText().apply(
|
||||
locale, str, new StringBuilder(), edits);
|
||||
return applyEdits(str, replacementChars, edits);
|
||||
} else {
|
||||
return com.ibm.icu.text.CaseMap.toLower(locale, 0, str, new StringBuilder(), null).toString();
|
||||
return com.ibm.icu.text.CaseMap.toLower().apply(locale, str, new StringBuilder(), null).toString();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11,131 +11,323 @@ import com.ibm.icu.lang.UCharacter;
|
||||
// - optimizing strategies for unstyled text: stop after number of changes or length of replacement?
|
||||
|
||||
/**
|
||||
* Low-level case mapping functions.
|
||||
* Low-level case mapping options and methods. Immutable.
|
||||
*
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public final class CaseMap {
|
||||
public abstract class CaseMap {
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Deprecated
|
||||
protected int internalOptions;
|
||||
|
||||
private CaseMap(int opt) { internalOptions = opt; }
|
||||
|
||||
/**
|
||||
* @return Lowercasing object with default options.
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Lower toLower() { return Lower.DEFAULT; }
|
||||
/**
|
||||
* @return Uppercasing object with default options.
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Upper toUpper() { return Upper.DEFAULT; }
|
||||
/**
|
||||
* @return Titlecasing object with default options.
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Title toTitle() { return Title.DEFAULT; }
|
||||
/**
|
||||
* @return Case folding object with default options.
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Fold fold() { return Fold.DEFAULT; }
|
||||
|
||||
/**
|
||||
* Omit unchanged text when case-mapping with {@link Edits}.
|
||||
*
|
||||
* @return an options object with this option.
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public abstract CaseMap omitUnchangedText();
|
||||
|
||||
/**
|
||||
* Lowercasing options and methods. Immutable.
|
||||
*
|
||||
* @see #toLower()
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final class Lower extends CaseMap {
|
||||
private static final Lower DEFAULT = new Lower(0);
|
||||
private static final Lower OMIT_UNCHANGED = new Lower(OMIT_UNCHANGED_TEXT);
|
||||
private Lower(int opt) { super(opt); }
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
@Override
|
||||
public Lower omitUnchangedText() {
|
||||
return OMIT_UNCHANGED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lowercases a string and optionally records edits (see {@link #omitUnchangedText}).
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#toLowerCase(Locale, String)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public <A extends Appendable> A apply(
|
||||
Locale locale, CharSequence src, A dest, Edits edits) {
|
||||
if (locale == null) {
|
||||
locale = Locale.getDefault();
|
||||
}
|
||||
int caseLocale = UCaseProps.getCaseLocale(locale);
|
||||
// TODO: remove package path
|
||||
return com.ibm.icu.impl.CaseMap.toLower(
|
||||
caseLocale, internalOptions, src, dest, edits);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Uppercasing options and methods. Immutable.
|
||||
*
|
||||
* @see #toUpper()
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final class Upper extends CaseMap {
|
||||
private static final Upper DEFAULT = new Upper(0);
|
||||
private static final Upper OMIT_UNCHANGED = new Upper(OMIT_UNCHANGED_TEXT);
|
||||
private Upper(int opt) { super(opt); }
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
@Override
|
||||
public Upper omitUnchangedText() {
|
||||
return OMIT_UNCHANGED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Uppercases a string and optionally records edits (see {@link #omitUnchangedText}).
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#toUpperCase(Locale, String)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public <A extends Appendable> A apply(
|
||||
Locale locale, CharSequence src, A dest, Edits edits) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Titlecasing options and methods. Immutable.
|
||||
*
|
||||
* @see #toTitle()
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final class Title extends CaseMap {
|
||||
private static final Title DEFAULT = new Title(0);
|
||||
private static final Title OMIT_UNCHANGED = new Title(OMIT_UNCHANGED_TEXT);
|
||||
private Title(int opt) { super(opt); }
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
@Override
|
||||
public Title omitUnchangedText() {
|
||||
if (internalOptions == 0 || internalOptions == OMIT_UNCHANGED_TEXT) {
|
||||
return OMIT_UNCHANGED;
|
||||
}
|
||||
return new Title(internalOptions | OMIT_UNCHANGED_TEXT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not lowercase non-initial parts of words when titlecasing.
|
||||
*
|
||||
* <p>By default, titlecasing will titlecase the first cased character
|
||||
* of a word and lowercase all other characters.
|
||||
* With this option, the other characters will not be modified.
|
||||
*
|
||||
* @return an options object with this option.
|
||||
* @see UCharacter#TITLECASE_NO_LOWERCASE
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Title noLowercase() {
|
||||
return new Title(internalOptions | UCharacter.TITLECASE_NO_LOWERCASE);
|
||||
}
|
||||
|
||||
// TODO: update references to the Unicode Standard for recent version
|
||||
/**
|
||||
* Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
|
||||
* titlecase exactly the characters at breaks from the iterator.
|
||||
*
|
||||
* <p>By default, titlecasing will take each break iterator index,
|
||||
* adjust it by looking for the next cased character, and titlecase that one.
|
||||
* Other characters are lowercased.
|
||||
*
|
||||
* <p>This follows Unicode 4 & 5 section 3.13 Default Case Operations:
|
||||
*
|
||||
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
|
||||
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
|
||||
* cased character F. If F exists, map F to default_title(F); then map each
|
||||
* subsequent character C to default_lower(C).
|
||||
*
|
||||
* @return an options object with this option.
|
||||
* @see UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Title noBreakAdjustment() {
|
||||
return new Title(internalOptions | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Titlecases a string and optionally records edits (see {@link #omitUnchangedText}).
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID. Can be null for {@link Locale#getDefault}.
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string (setText())
|
||||
* and used one or more times for iteration (first() and next()).
|
||||
* If null, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#toTitleCase(Locale, String, BreakIterator, int)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public <A extends Appendable> A apply(
|
||||
Locale locale, BreakIterator iter, CharSequence src, A dest, Edits edits) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Case folding options and methods. Immutable.
|
||||
*
|
||||
* @see #fold()
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final class Fold extends CaseMap {
|
||||
private static final Fold DEFAULT = new Fold(0);
|
||||
private static final Fold TURKIC = new Fold(UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I);
|
||||
private static final Fold OMIT_UNCHANGED = new Fold(OMIT_UNCHANGED_TEXT);
|
||||
private static final Fold TURKIC_OMIT_UNCHANGED = new Fold(
|
||||
UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I | OMIT_UNCHANGED_TEXT);
|
||||
private Fold(int opt) { super(opt); }
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
@Override
|
||||
public Fold omitUnchangedText() {
|
||||
return (internalOptions & UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0 ?
|
||||
OMIT_UNCHANGED : TURKIC_OMIT_UNCHANGED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle dotted I and dotless i appropriately for Turkic languages (tr, az).
|
||||
*
|
||||
* <p>Uses the Unicode CaseFolding.txt mappings marked with 'T' that
|
||||
* are to be excluded for default mappings and
|
||||
* included for the Turkic-specific mappings.
|
||||
*
|
||||
* @return an options object with this option.
|
||||
* @see UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Fold turkic() {
|
||||
return (internalOptions & OMIT_UNCHANGED_TEXT) == 0 ?
|
||||
TURKIC : TURKIC_OMIT_UNCHANGED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Case-folds a string and optionally records edits (see {@link #omitUnchangedText}).
|
||||
*
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#foldCase(String, int)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public <A extends Appendable> A apply(CharSequence src, A dest, Edits edits) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Omit unchanged text when case-mapping with Edits.
|
||||
*
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
public static final int OMIT_UNCHANGED_TEXT = 0x4000;
|
||||
|
||||
/**
|
||||
* Lowercases a string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param locale The locale ID.
|
||||
* @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT}.
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#toLowerCase(Locale, String)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static <A extends Appendable> A toLower(
|
||||
Locale locale, int options, CharSequence src, A dest, Edits edits) {
|
||||
if (locale == null) {
|
||||
locale = Locale.getDefault();
|
||||
}
|
||||
int caseLocale = UCaseProps.getCaseLocale(locale);
|
||||
// TODO: remove package path
|
||||
return com.ibm.icu.impl.CaseMap.toLower(caseLocale, options, src, dest, edits);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uppercases a string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param locale The locale ID.
|
||||
* @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT}.
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#toUpperCase(Locale, String)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static <A extends Appendable> A toUpper(
|
||||
Locale locale, int options, CharSequence src, A dest, Edits edits) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Titlecases a string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID.
|
||||
* @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT},
|
||||
* {@link UCharacter#TITLECASE_NO_LOWERCASE},
|
||||
* {@link UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT}.
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string (setText())
|
||||
* and used one or more times for iteration (first() and next()).
|
||||
* If null, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#toTitleCase(Locale, String, BreakIterator, int)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static <A extends Appendable> A toTitle(
|
||||
Locale locale, int options, BreakIterator iter,
|
||||
CharSequence src, A dest, Edits edits) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Case-folds a string and optionally records edits.
|
||||
*
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT},
|
||||
* {@link UCharacter#FOLD_CASE_DEFAULT},
|
||||
* {@link UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I}.
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#foldCase(String, int)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static <A extends Appendable> A foldCase(
|
||||
int options, CharSequence src, A dest, Edits edits) {
|
||||
return null;
|
||||
}
|
||||
@Deprecated
|
||||
public static final int OMIT_UNCHANGED_TEXT = 0x4000; // TODO: move to a non-public class
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user