ICU-12410 start to port class CaseMap to Java; make case mapping impl code work with CharSequence & Appendable
X-SVN-Rev: 39603
This commit is contained in:
parent
8e6641fb3c
commit
2fe503c981
@ -1177,7 +1177,7 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const UChar *s, int32_t i,
|
||||
* for each character.
|
||||
* TODO: Try to re-consolidate one way or another with the non-Greek function.
|
||||
*/
|
||||
int32_t toUpper(int32_t caseLocale, uint32_t options,
|
||||
int32_t toUpper(uint32_t options,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
@ -1305,7 +1305,7 @@ int32_t toUpper(int32_t caseLocale, uint32_t options,
|
||||
}
|
||||
} else {
|
||||
const UChar *s;
|
||||
c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, caseLocale);
|
||||
c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, UCASE_LOC_GREEK);
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
nextIndex - i, options, edits);
|
||||
if (destIndex < 0) {
|
||||
@ -1349,7 +1349,7 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
if (caseLocale == UCASE_LOC_GREEK) {
|
||||
return GreekUpper::toUpper(caseLocale, options, dest, destCapacity, src, srcLength, edits, errorCode);
|
||||
return GreekUpper::toUpper(options, dest, destCapacity, src, srcLength, edits, errorCode);
|
||||
}
|
||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
|
@ -2,6 +2,10 @@
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import com.ibm.icu.text.Edits;
|
||||
import com.ibm.icu.util.ICUUncheckedIOException;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
// TODO: rename to CaseMapImpl
|
||||
@ -13,11 +17,11 @@ public final class CaseMap {
|
||||
public static final class StringContextIterator implements UCaseProps.ContextIterator {
|
||||
/**
|
||||
* Constructor.
|
||||
* @param s String to iterate over.
|
||||
* @param src String to iterate over.
|
||||
*/
|
||||
public StringContextIterator(String s) {
|
||||
this.s=s;
|
||||
limit=s.length();
|
||||
public StringContextIterator(CharSequence src) {
|
||||
this.s=src;
|
||||
limit=src.length();
|
||||
cpStart=cpLimit=index=0;
|
||||
dir=0;
|
||||
}
|
||||
@ -61,7 +65,7 @@ public final class CaseMap {
|
||||
public int nextCaseMapCP() {
|
||||
cpStart=cpLimit;
|
||||
if(cpLimit<limit) {
|
||||
int c=s.codePointAt(cpLimit);
|
||||
int c=Character.codePointAt(s, cpLimit);
|
||||
cpLimit+=Character.charCount(c);
|
||||
return c;
|
||||
} else {
|
||||
@ -85,6 +89,10 @@ public final class CaseMap {
|
||||
return cpLimit;
|
||||
}
|
||||
|
||||
public int getCPLength() {
|
||||
return cpLimit-cpStart;
|
||||
}
|
||||
|
||||
// implement UCaseProps.ContextIterator
|
||||
// The following code is not used anywhere in this private class
|
||||
@Override
|
||||
@ -109,11 +117,11 @@ public final class CaseMap {
|
||||
int c;
|
||||
|
||||
if(dir>0 && index<s.length()) {
|
||||
c=s.codePointAt(index);
|
||||
c=Character.codePointAt(s, index);
|
||||
index+=Character.charCount(c);
|
||||
return c;
|
||||
} else if(dir<0 && index>0) {
|
||||
c=s.codePointBefore(index);
|
||||
c=Character.codePointBefore(s, index);
|
||||
index-=Character.charCount(c);
|
||||
return c;
|
||||
}
|
||||
@ -121,44 +129,107 @@ public final class CaseMap {
|
||||
}
|
||||
|
||||
// variables
|
||||
protected String s;
|
||||
protected CharSequence s;
|
||||
protected int index, limit, cpStart, cpLimit;
|
||||
protected int dir; // 0=initial state >0=forward <0=backward
|
||||
}
|
||||
|
||||
/** Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. */
|
||||
private static final void appendResult(int c, StringBuilder result) {
|
||||
// Decode the result.
|
||||
if (c < 0) {
|
||||
// (not) original code point
|
||||
result.appendCodePoint(~c);
|
||||
} else if (c <= UCaseProps.MAX_STRING_LENGTH) {
|
||||
// The mapping has already been appended to result.
|
||||
private static int appendCodePoint(Appendable a, int c) throws IOException {
|
||||
if (c <= Character.MAX_VALUE) {
|
||||
a.append((char)c);
|
||||
return 1;
|
||||
} else {
|
||||
// Append the single-code point mapping.
|
||||
result.appendCodePoint(c);
|
||||
a.append((char)(0xd7c0 + (c >> 10)));
|
||||
a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff)));
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Move the other string case mapping functions from UCharacter to here, too.
|
||||
/**
|
||||
* Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}.
|
||||
* @throws IOException
|
||||
*/
|
||||
private static void appendResult(int result, Appendable dest,
|
||||
int cpLength, int options, Edits edits) throws IOException {
|
||||
// Decode the result.
|
||||
if (result < 0) {
|
||||
// (not) original code point
|
||||
if (edits != null) {
|
||||
edits.addUnchanged(cpLength);
|
||||
// TODO: remove package path
|
||||
if ((options & com.ibm.icu.text.CaseMap.OMIT_UNCHANGED_TEXT) != 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
appendCodePoint(dest, ~result);
|
||||
} else if (result <= UCaseProps.MAX_STRING_LENGTH) {
|
||||
// The mapping has already been appended to result.
|
||||
if (edits != null) {
|
||||
edits.addReplace(cpLength, result);
|
||||
}
|
||||
} else {
|
||||
// Append the single-code point mapping.
|
||||
int length = appendCodePoint(dest, result);
|
||||
if (edits != null) {
|
||||
edits.addReplace(cpLength, length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final void appendUnchanged(CharSequence src, int start, int length,
|
||||
Appendable dest, int options, Edits edits) throws IOException {
|
||||
if (length > 0) {
|
||||
if (edits != null) {
|
||||
edits.addUnchanged(length);
|
||||
// TODO: remove package path
|
||||
if ((options & com.ibm.icu.text.CaseMap.OMIT_UNCHANGED_TEXT) != 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
dest.append(src, start, start + length);
|
||||
}
|
||||
}
|
||||
|
||||
public static <A extends Appendable> A toLower(int caseLocale, int options,
|
||||
CharSequence src, A dest, Edits edits) {
|
||||
try {
|
||||
if (edits != null) {
|
||||
edits.reset();
|
||||
}
|
||||
StringContextIterator iter = new StringContextIterator(src);
|
||||
int c;
|
||||
while ((c = iter.nextCaseMapCP()) >= 0) {
|
||||
c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
|
||||
appendResult(c, dest, iter.getCPLength(), options, edits);
|
||||
}
|
||||
return dest;
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static String toUpper(ULocale locale, String str) {
|
||||
if (locale == null) {
|
||||
locale = ULocale.getDefault();
|
||||
}
|
||||
int[] locCache = new int[] { UCaseProps.getCaseLocale(locale, null) };
|
||||
if (locCache[0] == UCaseProps.LOC_GREEK) {
|
||||
return GreekUpper.toUpper(str, locCache);
|
||||
}
|
||||
try {
|
||||
int options = 0; Edits edits = null; // TODO
|
||||
if (locale == null) {
|
||||
locale = ULocale.getDefault();
|
||||
}
|
||||
int caseLocale = UCaseProps.getCaseLocale(locale);
|
||||
if (caseLocale == UCaseProps.LOC_GREEK) {
|
||||
return GreekUpper.toUpper(str);
|
||||
}
|
||||
|
||||
StringContextIterator iter = new StringContextIterator(str);
|
||||
StringBuilder result = new StringBuilder(str.length());
|
||||
int c;
|
||||
while((c=iter.nextCaseMapCP())>=0) {
|
||||
c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache);
|
||||
appendResult(c, result);
|
||||
StringContextIterator iter = new StringContextIterator(str);
|
||||
StringBuilder result = new StringBuilder(str.length());
|
||||
int c;
|
||||
while((c=iter.nextCaseMapCP())>=0) {
|
||||
c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, caseLocale);
|
||||
appendResult(c, result, iter.getCPLength(), options, edits);
|
||||
}
|
||||
return result.toString();
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private static final class GreekUpper {
|
||||
@ -662,8 +733,10 @@ public final class CaseMap {
|
||||
* TODO: Try to re-consolidate one way or another with the non-Greek function.
|
||||
*
|
||||
* <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
|
||||
* @throws IOException
|
||||
*/
|
||||
private static String toUpper(CharSequence s, int[] locCache) {
|
||||
private static String toUpper(CharSequence s) throws IOException {
|
||||
int options = 0; Edits edits = null; // TODO
|
||||
StringBuilder result = new StringBuilder(s.length());
|
||||
int state = 0;
|
||||
for (int i = 0; i < s.length();) {
|
||||
@ -747,8 +820,8 @@ public final class CaseMap {
|
||||
--numYpogegrammeni;
|
||||
}
|
||||
} else {
|
||||
c = UCaseProps.INSTANCE.toFullUpper(c, null, result, null, locCache);
|
||||
appendResult(c, result);
|
||||
c = UCaseProps.INSTANCE.toFullUpper(c, null, result, UCaseProps.LOC_GREEK);
|
||||
appendResult(c, result, nextIndex - i, options, edits);
|
||||
}
|
||||
i = nextIndex;
|
||||
state = nextState;
|
||||
|
@ -24,6 +24,7 @@ package com.ibm.icu.impl;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
@ -71,7 +72,7 @@ public final class UCaseProps {
|
||||
// read exceptions[]
|
||||
count=indexes[IX_EXC_LENGTH];
|
||||
if(count>0) {
|
||||
exceptions=ICUBinary.getChars(bytes, count, 0);
|
||||
exceptions=ICUBinary.getString(bytes, count, 0);
|
||||
}
|
||||
|
||||
// read unfold[]
|
||||
@ -150,7 +151,7 @@ public final class UCaseProps {
|
||||
*
|
||||
* @param excWord (in) initial exceptions word
|
||||
* @param index (in) desired slot index
|
||||
* @param excOffset (in) offset into exceptions[] after excWord=exceptions[excOffset++];
|
||||
* @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++);
|
||||
* @return bits 31..0: slot value
|
||||
* 63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
|
||||
*/
|
||||
@ -158,11 +159,11 @@ public final class UCaseProps {
|
||||
long value;
|
||||
if((excWord&EXC_DOUBLE_SLOTS)==0) {
|
||||
excOffset+=slotOffset(excWord, index);
|
||||
value=exceptions[excOffset];
|
||||
value=exceptions.charAt(excOffset);
|
||||
} else {
|
||||
excOffset+=2*slotOffset(excWord, index);
|
||||
value=exceptions[excOffset++];
|
||||
value=(value<<16)|exceptions[excOffset];
|
||||
value=exceptions.charAt(excOffset++);
|
||||
value=(value<<16)|exceptions.charAt(excOffset);
|
||||
}
|
||||
return value |((long)excOffset<<32);
|
||||
}
|
||||
@ -172,11 +173,11 @@ public final class UCaseProps {
|
||||
int value;
|
||||
if((excWord&EXC_DOUBLE_SLOTS)==0) {
|
||||
excOffset+=slotOffset(excWord, index);
|
||||
value=exceptions[excOffset];
|
||||
value=exceptions.charAt(excOffset);
|
||||
} else {
|
||||
excOffset+=2*slotOffset(excWord, index);
|
||||
value=exceptions[excOffset++];
|
||||
value=(value<<16)|exceptions[excOffset];
|
||||
value=exceptions.charAt(excOffset++);
|
||||
value=(value<<16)|exceptions.charAt(excOffset);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
@ -191,7 +192,7 @@ public final class UCaseProps {
|
||||
}
|
||||
} else {
|
||||
int excOffset=getExceptionsOffset(props);
|
||||
int excWord=exceptions[excOffset++];
|
||||
int excWord=exceptions.charAt(excOffset++);
|
||||
if(hasSlot(excWord, EXC_LOWER)) {
|
||||
c=getSlotValue(excWord, EXC_LOWER, excOffset);
|
||||
}
|
||||
@ -207,7 +208,7 @@ public final class UCaseProps {
|
||||
}
|
||||
} else {
|
||||
int excOffset=getExceptionsOffset(props);
|
||||
int excWord=exceptions[excOffset++];
|
||||
int excWord=exceptions.charAt(excOffset++);
|
||||
if(hasSlot(excWord, EXC_UPPER)) {
|
||||
c=getSlotValue(excWord, EXC_UPPER, excOffset);
|
||||
}
|
||||
@ -223,7 +224,7 @@ public final class UCaseProps {
|
||||
}
|
||||
} else {
|
||||
int excOffset=getExceptionsOffset(props);
|
||||
int excWord=exceptions[excOffset++];
|
||||
int excWord=exceptions.charAt(excOffset++);
|
||||
int index;
|
||||
if(hasSlot(excWord, EXC_TITLE)) {
|
||||
index=EXC_TITLE;
|
||||
@ -291,7 +292,7 @@ public final class UCaseProps {
|
||||
*/
|
||||
int excOffset0, excOffset=getExceptionsOffset(props);
|
||||
int closureOffset;
|
||||
int excWord=exceptions[excOffset++];
|
||||
int excWord=exceptions.charAt(excOffset++);
|
||||
int index, closureLength, fullLength, length;
|
||||
|
||||
excOffset0=excOffset;
|
||||
@ -334,7 +335,7 @@ public final class UCaseProps {
|
||||
/* add the full case folding string */
|
||||
length=fullLength&0xf;
|
||||
if(length!=0) {
|
||||
set.add(new String(exceptions, excOffset, length));
|
||||
set.add(exceptions.substring(excOffset, excOffset+length));
|
||||
excOffset+=length;
|
||||
}
|
||||
|
||||
@ -348,8 +349,9 @@ public final class UCaseProps {
|
||||
}
|
||||
|
||||
/* add each code point in the closure string */
|
||||
for(index=0; index<closureLength; index+=UTF16.getCharCount(c)) {
|
||||
c=UTF16.charAt(exceptions, closureOffset, exceptions.length, index);
|
||||
int limit=closureOffset+closureLength;
|
||||
for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
|
||||
c=exceptions.codePointAt(index);
|
||||
set.add(c);
|
||||
}
|
||||
}
|
||||
@ -468,7 +470,7 @@ public final class UCaseProps {
|
||||
if(!propsHasException(props)) {
|
||||
return props&DOT_MASK;
|
||||
} else {
|
||||
return (exceptions[getExceptionsOffset(props)]>>EXC_DOT_SHIFT)&DOT_MASK;
|
||||
return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
@ -605,38 +607,44 @@ public final class UCaseProps {
|
||||
*/
|
||||
public static final int MAX_STRING_LENGTH=0x1f;
|
||||
|
||||
private static final int LOC_UNKNOWN=0;
|
||||
private static final int LOC_ROOT=1;
|
||||
//ivate static final int LOC_UNKNOWN=0;
|
||||
public static final int LOC_ROOT=1;
|
||||
private static final int LOC_TURKISH=2;
|
||||
private static final int LOC_LITHUANIAN=3;
|
||||
static final int LOC_GREEK=4;
|
||||
|
||||
/*
|
||||
* Checks and caches the type of locale ID as it is relevant for case mapping.
|
||||
* If the locCache is not null, then it must be initialized with locCache[0]=0 .
|
||||
*/
|
||||
static final int getCaseLocale(ULocale locale, int[] locCache) {
|
||||
int result;
|
||||
|
||||
if(locCache!=null && (result=locCache[0])!=LOC_UNKNOWN) {
|
||||
return result;
|
||||
public static final int getCaseLocale(Locale locale) {
|
||||
return getCaseLocale(locale.getLanguage());
|
||||
}
|
||||
public static final int getCaseLocale(ULocale locale) {
|
||||
return getCaseLocale(locale.getLanguage());
|
||||
}
|
||||
/** Accepts both 2- and 3-letter language subtags. */
|
||||
private static final int getCaseLocale(String language) {
|
||||
// Check the subtag length to reduce the number of comparisons
|
||||
// for locales without special behavior.
|
||||
// Fastpath for English "en" which is often used for default (=root locale) case mappings,
|
||||
// and for Chinese "zh": Very common but no special case mapping behavior.
|
||||
if(language.length()==2) {
|
||||
if(language.equals("en") || language.charAt(0)>'t') {
|
||||
return LOC_ROOT;
|
||||
} else if(language.equals("tr") || language.equals("az")) {
|
||||
return LOC_TURKISH;
|
||||
} else if(language.equals("el")) {
|
||||
return LOC_GREEK;
|
||||
} else if(language.equals("lt")) {
|
||||
return LOC_LITHUANIAN;
|
||||
}
|
||||
} else if(language.length()==3) {
|
||||
if(language.equals("tur") || language.equals("aze")) {
|
||||
return LOC_TURKISH;
|
||||
} else if(language.equals("ell")) {
|
||||
return LOC_GREEK;
|
||||
} else if(language.equals("lit")) {
|
||||
return LOC_LITHUANIAN;
|
||||
}
|
||||
}
|
||||
|
||||
result=LOC_ROOT;
|
||||
|
||||
String language=locale.getLanguage();
|
||||
if(language.equals("tr") || language.equals("tur") || language.equals("az") || language.equals("aze")) {
|
||||
result=LOC_TURKISH;
|
||||
} else if(language.equals("el") || language.equals("ell")) {
|
||||
result=LOC_GREEK;
|
||||
} else if(language.equals("lt") || language.equals("lit")) {
|
||||
result=LOC_LITHUANIAN;
|
||||
}
|
||||
|
||||
if(locCache!=null) {
|
||||
locCache[0]=result;
|
||||
}
|
||||
return result;
|
||||
return LOC_ROOT;
|
||||
}
|
||||
|
||||
/* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */
|
||||
@ -797,19 +805,14 @@ public final class UCaseProps {
|
||||
* See ContextIterator for details.
|
||||
* If iter==null then a context-independent result is returned.
|
||||
* @param out If the mapping result is a string, then it is appended to out.
|
||||
* @param locale Locale ID for locale-dependent mappings.
|
||||
* @param locCache Initialize locCache[0] to 0; may be used to cache the result of parsing
|
||||
* the locale ID for subsequent calls.
|
||||
* Can be null.
|
||||
* @param caseLocale Case locale value from ucase_getCaseLocale().
|
||||
* @return Output code point or string length, see MAX_STRING_LENGTH.
|
||||
*
|
||||
* @see ContextIterator
|
||||
* @see #MAX_STRING_LENGTH
|
||||
* @internal
|
||||
*/
|
||||
public final int toFullLower(int c, ContextIterator iter,
|
||||
StringBuilder out,
|
||||
ULocale locale, int[] locCache) {
|
||||
public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) {
|
||||
int result, props;
|
||||
|
||||
result=c;
|
||||
@ -820,22 +823,20 @@ public final class UCaseProps {
|
||||
}
|
||||
} else {
|
||||
int excOffset=getExceptionsOffset(props), excOffset2;
|
||||
int excWord=exceptions[excOffset++];
|
||||
int excWord=exceptions.charAt(excOffset++);
|
||||
int full;
|
||||
|
||||
excOffset2=excOffset;
|
||||
|
||||
if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
|
||||
/* use hardcoded conditions and mappings */
|
||||
int loc=getCaseLocale(locale, locCache);
|
||||
|
||||
/*
|
||||
* Test for conditional mappings first
|
||||
* (otherwise the unconditional default mappings are always taken),
|
||||
* then test for characters that have unconditional mappings in SpecialCasing.txt,
|
||||
* then get the UnicodeData.txt mappings.
|
||||
*/
|
||||
if( loc==LOC_LITHUANIAN &&
|
||||
if( caseLocale==LOC_LITHUANIAN &&
|
||||
/* base characters, find accents above */
|
||||
(((c==0x49 || c==0x4a || c==0x12e) &&
|
||||
isFollowedByMoreAbove(iter)) ||
|
||||
@ -858,30 +859,34 @@ public final class UCaseProps {
|
||||
00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
|
||||
*/
|
||||
switch(c) {
|
||||
case 0x49: /* LATIN CAPITAL LETTER I */
|
||||
out.append(iDot);
|
||||
return 2;
|
||||
case 0x4a: /* LATIN CAPITAL LETTER J */
|
||||
out.append(jDot);
|
||||
return 2;
|
||||
case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
|
||||
out.append(iOgonekDot);
|
||||
return 2;
|
||||
case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
|
||||
out.append(iDotGrave);
|
||||
return 3;
|
||||
case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
|
||||
out.append(iDotAcute);
|
||||
return 3;
|
||||
case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
|
||||
out.append(iDotTilde);
|
||||
return 3;
|
||||
default:
|
||||
return 0; /* will not occur */
|
||||
try {
|
||||
switch(c) {
|
||||
case 0x49: /* LATIN CAPITAL LETTER I */
|
||||
out.append(iDot);
|
||||
return 2;
|
||||
case 0x4a: /* LATIN CAPITAL LETTER J */
|
||||
out.append(jDot);
|
||||
return 2;
|
||||
case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
|
||||
out.append(iOgonekDot);
|
||||
return 2;
|
||||
case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
|
||||
out.append(iDotGrave);
|
||||
return 3;
|
||||
case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
|
||||
out.append(iDotAcute);
|
||||
return 3;
|
||||
case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
|
||||
out.append(iDotTilde);
|
||||
return 3;
|
||||
default:
|
||||
return 0; /* will not occur */
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
}
|
||||
/* # Turkish and Azeri */
|
||||
} else if(loc==LOC_TURKISH && c==0x130) {
|
||||
} else if(caseLocale==LOC_TURKISH && c==0x130) {
|
||||
/*
|
||||
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
|
||||
# The following rules handle those cases.
|
||||
@ -890,7 +895,7 @@ public final class UCaseProps {
|
||||
0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
*/
|
||||
return 0x69;
|
||||
} else if(loc==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
|
||||
} else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
|
||||
/*
|
||||
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
|
||||
# This matches the behavior of the canonically equivalent I-dot_above
|
||||
@ -899,7 +904,7 @@ public final class UCaseProps {
|
||||
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
|
||||
*/
|
||||
return 0; /* remove the dot (continue without output) */
|
||||
} else if(loc==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
|
||||
} else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
|
||||
/*
|
||||
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
|
||||
|
||||
@ -913,8 +918,12 @@ public final class UCaseProps {
|
||||
|
||||
0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
*/
|
||||
out.append(iDot);
|
||||
return 2;
|
||||
try {
|
||||
out.append(iDot);
|
||||
return 2;
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
}
|
||||
} else if( c==0x3a3 &&
|
||||
!isFollowedByCasedLetter(iter, 1) &&
|
||||
isFollowedByCasedLetter(iter, -1) /* -1=preceded */
|
||||
@ -936,11 +945,15 @@ public final class UCaseProps {
|
||||
/* start of full case mapping strings */
|
||||
excOffset=(int)(value>>32)+1;
|
||||
|
||||
/* set the output pointer to the lowercase mapping */
|
||||
out.append(exceptions, excOffset, full);
|
||||
try {
|
||||
// append the lowercase mapping
|
||||
out.append(exceptions, excOffset, excOffset+full);
|
||||
|
||||
/* return the string length */
|
||||
return full;
|
||||
/* return the string length */
|
||||
return full;
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -954,8 +967,8 @@ public final class UCaseProps {
|
||||
|
||||
/* internal */
|
||||
private final int toUpperOrTitle(int c, ContextIterator iter,
|
||||
StringBuilder out,
|
||||
ULocale locale, int[] locCache,
|
||||
Appendable out,
|
||||
int loc,
|
||||
boolean upperNotTitle) {
|
||||
int result;
|
||||
int props;
|
||||
@ -968,15 +981,13 @@ public final class UCaseProps {
|
||||
}
|
||||
} else {
|
||||
int excOffset=getExceptionsOffset(props), excOffset2;
|
||||
int excWord=exceptions[excOffset++];
|
||||
int excWord=exceptions.charAt(excOffset++);
|
||||
int full, index;
|
||||
|
||||
excOffset2=excOffset;
|
||||
|
||||
if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
|
||||
/* use hardcoded conditions and mappings */
|
||||
int loc=getCaseLocale(locale, locCache);
|
||||
|
||||
if(loc==LOC_TURKISH && c==0x69) {
|
||||
/*
|
||||
# Turkish and Azeri
|
||||
@ -1026,11 +1037,15 @@ public final class UCaseProps {
|
||||
}
|
||||
|
||||
if(full!=0) {
|
||||
/* set the output pointer to the result string */
|
||||
out.append(exceptions, excOffset, full);
|
||||
try {
|
||||
// append the result string
|
||||
out.append(exceptions, excOffset, excOffset+full);
|
||||
|
||||
/* return the string length */
|
||||
return full;
|
||||
/* return the string length */
|
||||
return full;
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1049,15 +1064,15 @@ public final class UCaseProps {
|
||||
}
|
||||
|
||||
public final int toFullUpper(int c, ContextIterator iter,
|
||||
StringBuilder out,
|
||||
ULocale locale, int[] locCache) {
|
||||
return toUpperOrTitle(c, iter, out, locale, locCache, true);
|
||||
Appendable out,
|
||||
int caseLocale) {
|
||||
return toUpperOrTitle(c, iter, out, caseLocale, true);
|
||||
}
|
||||
|
||||
public final int toFullTitle(int c, ContextIterator iter,
|
||||
StringBuilder out,
|
||||
ULocale locale, int[] locCache) {
|
||||
return toUpperOrTitle(c, iter, out, locale, locCache, false);
|
||||
Appendable out,
|
||||
int caseLocale) {
|
||||
return toUpperOrTitle(c, iter, out, caseLocale, false);
|
||||
}
|
||||
|
||||
/* case folding ------------------------------------------------------------- */
|
||||
@ -1117,7 +1132,7 @@ public final class UCaseProps {
|
||||
}
|
||||
} else {
|
||||
int excOffset=getExceptionsOffset(props);
|
||||
int excWord=exceptions[excOffset++];
|
||||
int excWord=exceptions.charAt(excOffset++);
|
||||
int index;
|
||||
if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
|
||||
/* special case folding mappings, hardcoded */
|
||||
@ -1168,7 +1183,7 @@ public final class UCaseProps {
|
||||
* together in a way that they still fold to common result strings.
|
||||
*/
|
||||
|
||||
public final int toFullFolding(int c, StringBuilder out, int options) {
|
||||
public final int toFullFolding(int c, Appendable out, int options) {
|
||||
int result;
|
||||
int props;
|
||||
|
||||
@ -1180,7 +1195,7 @@ public final class UCaseProps {
|
||||
}
|
||||
} else {
|
||||
int excOffset=getExceptionsOffset(props), excOffset2;
|
||||
int excWord=exceptions[excOffset++];
|
||||
int excWord=exceptions.charAt(excOffset++);
|
||||
int full, index;
|
||||
|
||||
excOffset2=excOffset;
|
||||
@ -1194,8 +1209,12 @@ public final class UCaseProps {
|
||||
return 0x69;
|
||||
} else if(c==0x130) {
|
||||
/* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
|
||||
out.append(iDot);
|
||||
return 2;
|
||||
try {
|
||||
out.append(iDot);
|
||||
return 2;
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Turkic mappings */
|
||||
@ -1219,11 +1238,15 @@ public final class UCaseProps {
|
||||
full=(full>>4)&0xf;
|
||||
|
||||
if(full!=0) {
|
||||
/* set the output pointer to the result string */
|
||||
out.append(exceptions, excOffset, full);
|
||||
try {
|
||||
// append the result string
|
||||
out.append(exceptions, excOffset, excOffset+full);
|
||||
|
||||
/* return the string length */
|
||||
return full;
|
||||
/* return the string length */
|
||||
return full;
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1242,7 +1265,6 @@ public final class UCaseProps {
|
||||
|
||||
/* case mapping properties API ---------------------------------------------- */
|
||||
|
||||
private static final int[] rootLocCache = { LOC_ROOT };
|
||||
/*
|
||||
* We need a StringBuilder for multi-code point output from the
|
||||
* full case mapping functions. However, we do not actually use that output,
|
||||
@ -1282,20 +1304,20 @@ public final class UCaseProps {
|
||||
*/
|
||||
case UProperty.CHANGES_WHEN_LOWERCASED:
|
||||
dummyStringBuilder.setLength(0);
|
||||
return toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
|
||||
return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0;
|
||||
case UProperty.CHANGES_WHEN_UPPERCASED:
|
||||
dummyStringBuilder.setLength(0);
|
||||
return toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
|
||||
return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0;
|
||||
case UProperty.CHANGES_WHEN_TITLECASED:
|
||||
dummyStringBuilder.setLength(0);
|
||||
return toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
|
||||
return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
|
||||
/* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
|
||||
case UProperty.CHANGES_WHEN_CASEMAPPED:
|
||||
dummyStringBuilder.setLength(0);
|
||||
return
|
||||
toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
|
||||
toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
|
||||
toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
|
||||
toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
|
||||
toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
|
||||
toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -1303,7 +1325,7 @@ public final class UCaseProps {
|
||||
|
||||
// data members -------------------------------------------------------- ***
|
||||
private int indexes[];
|
||||
private char exceptions[];
|
||||
private String exceptions;
|
||||
private char unfold[];
|
||||
|
||||
private Trie2_16 trie;
|
||||
|
@ -29,6 +29,7 @@ import com.ibm.icu.impl.UPropertyAliases;
|
||||
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
|
||||
import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.Edits;
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.ibm.icu.util.RangeValueIterator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
@ -4960,29 +4961,37 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
public static String toLowerCase(ULocale locale, String str) {
|
||||
StringContextIterator iter = new StringContextIterator(str);
|
||||
StringBuilder result = new StringBuilder(str.length());
|
||||
int[] locCache = new int[1];
|
||||
int c;
|
||||
|
||||
if (locale == null) {
|
||||
locale = ULocale.getDefault();
|
||||
}
|
||||
locCache[0]=0;
|
||||
|
||||
while((c=iter.nextCaseMapCP())>=0) {
|
||||
c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache);
|
||||
|
||||
/* decode the result */
|
||||
if(c<0) {
|
||||
/* (not) original code point */
|
||||
c=~c;
|
||||
} else if(c<=UCaseProps.MAX_STRING_LENGTH) {
|
||||
/* mapping already appended to result */
|
||||
continue;
|
||||
/* } else { append single-code point mapping */
|
||||
// TODO: remove package path
|
||||
if (str.length() <= 100) {
|
||||
if (str.isEmpty()) {
|
||||
return str;
|
||||
}
|
||||
// Collect and apply only changes.
|
||||
// Good if no or few changes.
|
||||
// Bad (slow) if many changes.
|
||||
Edits edits = new Edits();
|
||||
StringBuilder replacementChars = com.ibm.icu.text.CaseMap.toLower(
|
||||
locale, com.ibm.icu.text.CaseMap.OMIT_UNCHANGED_TEXT, str,
|
||||
new StringBuilder(), edits);
|
||||
return applyEdits(str, replacementChars, edits);
|
||||
} else {
|
||||
return com.ibm.icu.text.CaseMap.toLower(locale, 0, str, new StringBuilder(), null).toString();
|
||||
}
|
||||
}
|
||||
|
||||
private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
|
||||
if (!edits.hasChanges()) {
|
||||
return str;
|
||||
}
|
||||
StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
|
||||
for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
|
||||
if (ei.hasChange()) {
|
||||
int i = ei.replacementIndex();
|
||||
result.append(replacementChars, i, i + ei.newLength());
|
||||
} else {
|
||||
int i = ei.sourceIndex();
|
||||
result.append(str, i, i + ei.oldLength());
|
||||
}
|
||||
result.appendCodePoint(c);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
@ -5063,13 +5072,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
int options) {
|
||||
StringContextIterator iter = new StringContextIterator(str);
|
||||
StringBuilder result = new StringBuilder(str.length());
|
||||
int[] locCache = new int[1];
|
||||
int c, nc, srcLength = str.length();
|
||||
|
||||
if (locale == null) {
|
||||
locale = ULocale.getDefault();
|
||||
}
|
||||
locCache[0]=0;
|
||||
int caseLocale = UCaseProps.getCaseLocale(locale);
|
||||
|
||||
if(titleIter == null) {
|
||||
titleIter = BreakIterator.getWordInstance(locale);
|
||||
@ -5130,7 +5138,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
if(titleStart<index) {
|
||||
FirstIJ = true;
|
||||
/* titlecase c which is from titleStart */
|
||||
c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache);
|
||||
c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, caseLocale);
|
||||
|
||||
/* decode the result and lowercase up to index */
|
||||
for(;;) {
|
||||
@ -5166,8 +5174,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
FirstIJ = false;
|
||||
} else {
|
||||
/* Normal operation: Lowercase the rest of the word. */
|
||||
c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale,
|
||||
locCache);
|
||||
c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, caseLocale);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
|
@ -2,10 +2,140 @@
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import com.ibm.icu.impl.UCaseProps;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
// TODO: issues/questions
|
||||
// - add java.util.Locale overloads when signatures are settled
|
||||
// - optimizing strategies for unstyled text: stop after number of changes or length of replacement?
|
||||
|
||||
/**
|
||||
* Low-level case mapping functions.
|
||||
*
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public final class CaseMap {
|
||||
/**
|
||||
* Omit unchanged text when case-mapping with Edits.
|
||||
*
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final int OMIT_UNCHANGED_TEXT = 0x4000;
|
||||
|
||||
/**
|
||||
* Lowercases a string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param locale The locale ID.
|
||||
* @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT}.
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#toLowerCase(ULocale, String)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static <A extends Appendable> A toLower(
|
||||
ULocale locale, int options, CharSequence src, A dest, Edits edits) {
|
||||
if (locale == null) {
|
||||
locale = ULocale.getDefault();
|
||||
}
|
||||
int caseLocale = UCaseProps.getCaseLocale(locale);
|
||||
// TODO: remove package path
|
||||
return com.ibm.icu.impl.CaseMap.toLower(caseLocale, options, src, dest, edits);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uppercases a string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param locale The locale ID.
|
||||
* @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT}.
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#toUpperCase(ULocale, String)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static <A extends Appendable> A toUpper(
|
||||
ULocale locale, int options, CharSequence src, A dest, Edits edits) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Titlecases a string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with options bits.)
|
||||
*
|
||||
* @param locale The locale ID.
|
||||
* @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT},
|
||||
* {@link UCharacter#TITLECASE_NO_LOWERCASE},
|
||||
* {@link UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT}.
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string (setText())
|
||||
* and used one or more times for iteration (first() and next()).
|
||||
* If null, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#toTitleCase(ULocale, String, BreakIterator, int)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static <A extends Appendable> A toTitle(
|
||||
ULocale locale, int options, BreakIterator iter,
|
||||
CharSequence src, A dest, Edits edits) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Case-folds a string and optionally records edits.
|
||||
*
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT},
|
||||
* {@link UCharacter#FOLD_CASE_DEFAULT},
|
||||
* {@link UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I}.
|
||||
* @param src The original string.
|
||||
* @param dest A buffer for the result string. Must not be null.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any).
|
||||
* This function calls edits.reset() first. edits can be null.
|
||||
* @return dest with the result string (or only changes) appended.
|
||||
*
|
||||
* @see UCharacter#foldCase(String, int)
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static <A extends Appendable> A foldCase(
|
||||
int options, CharSequence src, A dest, Edits edits) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -10,9 +10,6 @@ import java.util.Arrays;
|
||||
* Supports replacements, insertions, deletions in linear progression.
|
||||
* Does not support moving/reordering of text.
|
||||
*
|
||||
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
|
||||
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
|
||||
*
|
||||
* @draft ICU 59
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
|
@ -3866,7 +3866,6 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
||||
int n = getRangeCount();
|
||||
int result;
|
||||
StringBuilder full = new StringBuilder();
|
||||
int locCache[] = new int[1];
|
||||
|
||||
for (int i=0; i<n; ++i) {
|
||||
int start = getRangeStart(i);
|
||||
@ -3881,13 +3880,13 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
||||
// add case mappings
|
||||
// (does not add long s for regular s, or Kelvin for k, for example)
|
||||
for (int cp=start; cp<=end; ++cp) {
|
||||
result = csp.toFullLower(cp, null, full, root, locCache);
|
||||
result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
|
||||
addCaseMapping(foldSet, result, full);
|
||||
|
||||
result = csp.toFullTitle(cp, null, full, root, locCache);
|
||||
result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
|
||||
addCaseMapping(foldSet, result, full);
|
||||
|
||||
result = csp.toFullUpper(cp, null, full, root, locCache);
|
||||
result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
|
||||
addCaseMapping(foldSet, result, full);
|
||||
|
||||
result = csp.toFullFolding(cp, full, 0);
|
||||
@ -3906,6 +3905,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
||||
} else {
|
||||
BreakIterator bi = BreakIterator.getWordInstance(root);
|
||||
for (String str : strings) {
|
||||
// TODO: call lower-level functions
|
||||
foldSet.add(UCharacter.toLowerCase(root, str));
|
||||
foldSet.add(UCharacter.toTitleCase(root, str, bi));
|
||||
foldSet.add(UCharacter.toUpperCase(root, str));
|
||||
|
@ -44,7 +44,7 @@ class LowercaseTransliterator extends Transliterator{
|
||||
private final UCaseProps csp;
|
||||
private ReplaceableContextIterator iter;
|
||||
private StringBuilder result;
|
||||
private int[] locCache;
|
||||
private int caseLocale;
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
@ -56,8 +56,7 @@ class LowercaseTransliterator extends Transliterator{
|
||||
csp=UCaseProps.INSTANCE;
|
||||
iter=new ReplaceableContextIterator();
|
||||
result = new StringBuilder();
|
||||
locCache = new int[1];
|
||||
locCache[0]=0;
|
||||
caseLocale = UCaseProps.getCaseLocale(locale);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -85,7 +84,7 @@ class LowercaseTransliterator extends Transliterator{
|
||||
iter.setLimit(offsets.limit);
|
||||
iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
|
||||
while((c=iter.nextCaseMapCP())>=0) {
|
||||
c=csp.toFullLower(c, iter, result, locale, locCache);
|
||||
c=csp.toFullLower(c, iter, result, caseLocale);
|
||||
|
||||
if(iter.didReachLimit() && isIncremental) {
|
||||
// the case mapping function tried to look beyond the context limit
|
||||
|
@ -42,7 +42,7 @@ class TitlecaseTransliterator extends Transliterator {
|
||||
private final UCaseProps csp;
|
||||
private ReplaceableContextIterator iter;
|
||||
private StringBuilder result;
|
||||
private int[] locCache;
|
||||
private int caseLocale;
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
@ -55,8 +55,7 @@ class TitlecaseTransliterator extends Transliterator {
|
||||
csp=UCaseProps.INSTANCE;
|
||||
iter=new ReplaceableContextIterator();
|
||||
result = new StringBuilder();
|
||||
locCache = new int[1];
|
||||
locCache[0]=0;
|
||||
caseLocale = UCaseProps.getCaseLocale(locale);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -119,9 +118,9 @@ class TitlecaseTransliterator extends Transliterator {
|
||||
type=csp.getTypeOrIgnorable(c);
|
||||
if(type>=0) { // not case-ignorable
|
||||
if(doTitle) {
|
||||
c=csp.toFullTitle(c, iter, result, locale, locCache);
|
||||
c=csp.toFullTitle(c, iter, result, caseLocale);
|
||||
} else {
|
||||
c=csp.toFullLower(c, iter, result, locale, locCache);
|
||||
c=csp.toFullLower(c, iter, result, caseLocale);
|
||||
}
|
||||
doTitle = type==0; // doTitle=isUncased
|
||||
|
||||
|
@ -41,7 +41,7 @@ class UppercaseTransliterator extends Transliterator {
|
||||
private final UCaseProps csp;
|
||||
private ReplaceableContextIterator iter;
|
||||
private StringBuilder result;
|
||||
private int[] locCache;
|
||||
private int caseLocale;
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
@ -52,8 +52,7 @@ class UppercaseTransliterator extends Transliterator {
|
||||
csp=UCaseProps.INSTANCE;
|
||||
iter=new ReplaceableContextIterator();
|
||||
result = new StringBuilder();
|
||||
locCache = new int[1];
|
||||
locCache[0]=0;
|
||||
caseLocale = UCaseProps.getCaseLocale(locale);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -81,7 +80,7 @@ class UppercaseTransliterator extends Transliterator {
|
||||
iter.setLimit(offsets.limit);
|
||||
iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
|
||||
while((c=iter.nextCaseMapCP())>=0) {
|
||||
c=csp.toFullUpper(c, iter, result, locale, locCache);
|
||||
c=csp.toFullUpper(c, iter, result, caseLocale);
|
||||
|
||||
if(iter.didReachLimit() && isIncremental) {
|
||||
// the case mapping function tried to look beyond the context limit
|
||||
|
Loading…
Reference in New Issue
Block a user