ICU-7273 finish Normalizer2Impl port to Java (normalizeAndAppend() and boundary tests), and port changes to top-level Normalizer.compare()
X-SVN-Rev: 27485
This commit is contained in:
parent
7a8d49ed32
commit
f9a9d47489
@ -148,7 +148,7 @@ public final class Norm2AllModes {
|
||||
@Override
|
||||
protected void normalizeAndAppend(
|
||||
CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) {
|
||||
impl.decomposeAndAppend(src, 0, src.length(), doNormalize, buffer);
|
||||
impl.decomposeAndAppend(src, doNormalize, buffer);
|
||||
}
|
||||
@Override
|
||||
public int spanQuickCheckYes(CharSequence s) {
|
||||
@ -179,7 +179,7 @@ public final class Norm2AllModes {
|
||||
@Override
|
||||
protected void normalizeAndAppend(
|
||||
CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) {
|
||||
impl.composeAndAppend(src, 0, src.length(), doNormalize, onlyContiguous, buffer);
|
||||
impl.composeAndAppend(src, doNormalize, onlyContiguous, buffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -234,7 +234,7 @@ public final class Norm2AllModes {
|
||||
@Override
|
||||
protected void normalizeAndAppend(
|
||||
CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) {
|
||||
impl.makeFCDAndAppend(src, 0, src.length(), doNormalize, buffer);
|
||||
impl.makeFCDAndAppend(src, doNormalize, buffer);
|
||||
}
|
||||
@Override
|
||||
public int spanQuickCheckYes(CharSequence s) {
|
||||
|
@ -711,10 +711,31 @@ public final class Normalizer2Impl {
|
||||
}
|
||||
return src;
|
||||
}
|
||||
public void decomposeAndAppend(CharSequence s, int src, int limit,
|
||||
boolean doDecompose,
|
||||
ReorderingBuffer buffer) {
|
||||
throw new UnsupportedOperationException(); // TODO
|
||||
public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) {
|
||||
int limit=s.length();
|
||||
if(limit==0) {
|
||||
return;
|
||||
}
|
||||
if(doDecompose) {
|
||||
decompose(s, 0, limit, buffer);
|
||||
return;
|
||||
}
|
||||
// Just merge the strings at the boundary.
|
||||
int c=Character.codePointAt(s, 0);
|
||||
int src=0;
|
||||
int firstCC, prevCC, cc;
|
||||
firstCC=prevCC=cc=getCC(getNorm16(c));
|
||||
while(cc!=0) {
|
||||
prevCC=cc;
|
||||
src+=Character.charCount(c);
|
||||
if(src>=limit) {
|
||||
break;
|
||||
}
|
||||
c=Character.codePointAt(s, src);
|
||||
cc=getCC(getNorm16(c));
|
||||
};
|
||||
buffer.append(s, 0, src, firstCC, prevCC);
|
||||
buffer.append(s, src, limit);
|
||||
}
|
||||
// Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
|
||||
// doCompose: normalize
|
||||
@ -1062,11 +1083,30 @@ public final class Normalizer2Impl {
|
||||
return prevBoundary<<1; // "no"
|
||||
}
|
||||
}
|
||||
public void composeAndAppend(CharSequence s, int src, int limit,
|
||||
public void composeAndAppend(CharSequence s,
|
||||
boolean doCompose,
|
||||
boolean onlyContiguous,
|
||||
ReorderingBuffer buffer) {
|
||||
throw new UnsupportedOperationException(); // TODO
|
||||
int src=0, limit=s.length();
|
||||
if(!buffer.isEmpty()) {
|
||||
int firstStarterInSrc=findNextCompBoundary(s, 0, limit);
|
||||
if(0!=firstStarterInSrc) {
|
||||
int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(),
|
||||
buffer.length());
|
||||
StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+
|
||||
firstStarterInSrc+16);
|
||||
middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
|
||||
buffer.removeSuffix(buffer.length()-lastStarterInDest);
|
||||
middle.append(s, 0, firstStarterInSrc);
|
||||
compose(middle, 0, middle.length(), onlyContiguous, true, buffer);
|
||||
src=firstStarterInSrc;
|
||||
}
|
||||
}
|
||||
if(doCompose) {
|
||||
compose(s, src, limit, onlyContiguous, true, buffer);
|
||||
} else {
|
||||
buffer.append(s, src, limit);
|
||||
}
|
||||
}
|
||||
public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) {
|
||||
// Note: In this function we use buffer->appendZeroCC() because we track
|
||||
@ -1195,14 +1235,65 @@ public final class Normalizer2Impl {
|
||||
}
|
||||
return src;
|
||||
}
|
||||
public void makeFCDAndAppend(CharSequence s, int src, int limit,
|
||||
boolean doMakeFCD,
|
||||
ReorderingBuffer buffer) {
|
||||
throw new UnsupportedOperationException(); // TODO
|
||||
public void makeFCDAndAppend(CharSequence s, boolean doMakeFCD, ReorderingBuffer buffer) {
|
||||
int src=0, limit=s.length();
|
||||
if(!buffer.isEmpty()) {
|
||||
int firstBoundaryInSrc=findNextFCDBoundary(s, 0, limit);
|
||||
if(0!=firstBoundaryInSrc) {
|
||||
int lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStringBuilder(),
|
||||
buffer.length());
|
||||
StringBuilder middle=new StringBuilder((buffer.length()-lastBoundaryInDest)+
|
||||
firstBoundaryInSrc+16);
|
||||
middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length());
|
||||
buffer.removeSuffix(buffer.length()-lastBoundaryInDest);
|
||||
middle.append(s, 0, firstBoundaryInSrc);
|
||||
makeFCD(middle, 0, middle.length(), buffer);
|
||||
src=firstBoundaryInSrc;
|
||||
}
|
||||
}
|
||||
if(doMakeFCD) {
|
||||
makeFCD(s, src, limit, buffer);
|
||||
} else {
|
||||
buffer.append(s, src, limit);
|
||||
}
|
||||
}
|
||||
|
||||
// Note: hasDecompBoundary() could be implemented as aliases to
|
||||
// hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
|
||||
// at the cost of building the FCD trie for a decomposition normalizer.
|
||||
public boolean hasDecompBoundary(int c, boolean before) {
|
||||
throw new UnsupportedOperationException(); // TODO
|
||||
for(;;) {
|
||||
if(c<minDecompNoCP) {
|
||||
return true;
|
||||
}
|
||||
int norm16=getNorm16(c);
|
||||
if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) {
|
||||
return true;
|
||||
} else if(norm16>MIN_NORMAL_MAYBE_YES) {
|
||||
return false; // ccc!=0
|
||||
} else if(isDecompNoAlgorithmic(norm16)) {
|
||||
c=mapAlgorithmic(c, norm16);
|
||||
} else {
|
||||
// c decomposes, get everything from the variable-length extra data
|
||||
int firstUnit=extraData.charAt(norm16++);
|
||||
if((firstUnit&MAPPING_LENGTH_MASK)==0) {
|
||||
return false;
|
||||
}
|
||||
if(!before) {
|
||||
// decomp after-boundary: same as hasFCDBoundaryAfter(),
|
||||
// fcd16<=1 || trailCC==0
|
||||
if(firstUnit>0x1ff) {
|
||||
return false; // trailCC>1
|
||||
}
|
||||
if(firstUnit<=0xff) {
|
||||
return true; // trailCC==0
|
||||
}
|
||||
// if(trailCC==1) test leadCC==0, same as checking for before-boundary
|
||||
}
|
||||
// true if leadCC==0 (hasFCDBoundaryBefore())
|
||||
return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(norm16)&0xff00)==0;
|
||||
}
|
||||
}
|
||||
}
|
||||
public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); }
|
||||
|
||||
@ -1210,7 +1301,33 @@ public final class Normalizer2Impl {
|
||||
return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c));
|
||||
}
|
||||
public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous, boolean testInert) {
|
||||
throw new UnsupportedOperationException(); // TODO
|
||||
for(;;) {
|
||||
int norm16=getNorm16(c);
|
||||
if(isInert(norm16)) {
|
||||
return true;
|
||||
} else if(norm16<=minYesNo) {
|
||||
// Hangul LVT (==minYesNo) has a boundary after it.
|
||||
// Hangul LV and non-inert yesYes characters combine forward.
|
||||
return isHangul(norm16) && !Hangul.isHangulWithoutJamoT((char)c);
|
||||
} else if(norm16>= (testInert ? minNoNo : minMaybeYes)) {
|
||||
return false;
|
||||
} else if(isDecompNoAlgorithmic(norm16)) {
|
||||
c=mapAlgorithmic(c, norm16);
|
||||
} else {
|
||||
// c decomposes, get everything from the variable-length extra data.
|
||||
// If testInert, then c must be a yesNo character which has lccc=0,
|
||||
// otherwise it could be a noNo.
|
||||
int firstUnit=extraData.charAt(norm16);
|
||||
// true if
|
||||
// c is not deleted, and
|
||||
// it and its decomposition do not combine forward, and it has a starter, and
|
||||
// if FCC then trailCC<=1
|
||||
return
|
||||
(firstUnit&MAPPING_LENGTH_MASK)!=0 &&
|
||||
(firstUnit&(MAPPING_PLUS_COMPOSITION_LIST|MAPPING_NO_COMP_BOUNDARY_AFTER))==0 &&
|
||||
(!onlyContiguous || firstUnit<=0x1ff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasFCDBoundaryBefore(int c) { return c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff; }
|
||||
@ -1322,7 +1439,6 @@ public final class Normalizer2Impl {
|
||||
private void decomposeShort(CharSequence s, int src, int limit,
|
||||
ReorderingBuffer buffer) {
|
||||
while(src<limit) {
|
||||
// TODO: use trie string iterator?? C++ uses UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
|
||||
int c=Character.codePointAt(s, src);
|
||||
src+=Character.charCount(c);
|
||||
decompose(c, getNorm16(c), buffer);
|
||||
@ -1462,7 +1578,6 @@ public final class Normalizer2Impl {
|
||||
c=sb.codePointAt(p);
|
||||
p+=Character.charCount(c);
|
||||
norm16=getNorm16(c);
|
||||
// TODO: use trie string iterator?? C++ uses UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
|
||||
cc=getCCFromYesOrMaybe(norm16);
|
||||
if( // this character combines backward and
|
||||
isMaybe(norm16) &&
|
||||
@ -1612,8 +1727,17 @@ public final class Normalizer2Impl {
|
||||
}
|
||||
}
|
||||
}
|
||||
private int findPreviousCompBoundary(CharSequence s, int start, int p) {
|
||||
throw new UnsupportedOperationException(); // TODO
|
||||
private int findPreviousCompBoundary(CharSequence s, int p) {
|
||||
while(p>0) {
|
||||
int c=Character.codePointBefore(s, p);
|
||||
p-=Character.charCount(c);
|
||||
if(hasCompBoundaryBefore(c)) {
|
||||
break;
|
||||
}
|
||||
// We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
|
||||
// but that's probably not worth the extra cost.
|
||||
}
|
||||
return p;
|
||||
}
|
||||
private int findNextCompBoundary(CharSequence s, int p, int limit) {
|
||||
while(p<limit) {
|
||||
@ -1627,8 +1751,15 @@ public final class Normalizer2Impl {
|
||||
return p;
|
||||
}
|
||||
|
||||
private int findPreviousFCDBoundary(CharSequence s, int start, int p) {
|
||||
throw new UnsupportedOperationException(); // TODO
|
||||
private int findPreviousFCDBoundary(CharSequence s, int p) {
|
||||
while(p>0) {
|
||||
int c=Character.codePointBefore(s, p);
|
||||
p-=Character.charCount(c);
|
||||
if(fcdTrie.get(c)<=0xff) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
private int findNextFCDBoundary(CharSequence s, int p, int limit) {
|
||||
while(p<limit) {
|
||||
@ -1642,23 +1773,22 @@ public final class Normalizer2Impl {
|
||||
return p;
|
||||
}
|
||||
|
||||
VersionInfo dataVersion;
|
||||
@SuppressWarnings("unused")
|
||||
private VersionInfo dataVersion;
|
||||
|
||||
// Code point thresholds for quick check codes.
|
||||
int minDecompNoCP;
|
||||
int minCompNoMaybeCP;
|
||||
private int minDecompNoCP;
|
||||
private int minCompNoMaybeCP;
|
||||
|
||||
// Norm16 value thresholds for quick check combinations and types of extra data.
|
||||
int minYesNo;
|
||||
int minNoNo;
|
||||
int limitNoNo;
|
||||
int minMaybeYes;
|
||||
private int minYesNo;
|
||||
private int minNoNo;
|
||||
private int limitNoNo;
|
||||
private int minMaybeYes;
|
||||
|
||||
Trie2_16 normTrie;
|
||||
String maybeYesCompositions;
|
||||
String extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
|
||||
private Trie2_16 normTrie;
|
||||
private String maybeYesCompositions;
|
||||
private String extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
|
||||
|
||||
Trie2_16 fcdTrie;
|
||||
private Trie2_16 fcdTrie;
|
||||
}
|
||||
|
||||
// TODO: Copy parts of normalizer2impl.h starting with Normalizer2Factory??
|
||||
|
@ -154,11 +154,11 @@ public final class Normalizer implements Cloneable {
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public static class Mode {
|
||||
protected Mode(Normalizer2 n2) {
|
||||
private Mode(Normalizer2 n2) {
|
||||
normalizer2 = n2;
|
||||
uni32Normalizer2 = new FilteredNormalizer2(n2, UNI32_SET);
|
||||
}
|
||||
protected final Normalizer2 getNormalizer2(int options) {
|
||||
private final Normalizer2 getNormalizer2(int options) {
|
||||
return (options&UNICODE_3_2) != 0 ? uni32Normalizer2 : normalizer2;
|
||||
}
|
||||
|
||||
@ -191,14 +191,6 @@ public final class Normalizer implements Cloneable {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
protected String normalize(String src, int options) {
|
||||
return src;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* @stable ICU 2.8
|
||||
@ -231,18 +223,6 @@ public final class Normalizer implements Cloneable {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
protected QuickCheckResult quickCheck(char[] src,int start, int limit,
|
||||
boolean allowMaybe,UnicodeSet nx) {
|
||||
if(allowMaybe) {
|
||||
return MAYBE;
|
||||
}
|
||||
return NO;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* @stable ICU 2.8
|
||||
@ -281,10 +261,6 @@ public final class Normalizer implements Cloneable {
|
||||
false, trailCC,nx);
|
||||
}
|
||||
|
||||
protected String normalize( String src, int options) {
|
||||
return decompose(src,false);
|
||||
}
|
||||
|
||||
protected int getMinC() {
|
||||
return NormalizerImpl.MIN_WITH_LEAD_CC;
|
||||
}
|
||||
@ -301,21 +277,6 @@ public final class Normalizer implements Cloneable {
|
||||
return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD);
|
||||
}
|
||||
|
||||
protected QuickCheckResult quickCheck(char[] src,int start,
|
||||
int limit,boolean allowMaybe,
|
||||
UnicodeSet nx) {
|
||||
return NormalizerImpl.quickCheck(
|
||||
src, start,limit,
|
||||
NormalizerImpl.getFromIndexesArr(
|
||||
NormalizerImpl.INDEX_MIN_NFD_NO_MAYBE
|
||||
),
|
||||
NormalizerImpl.QC_NFD,
|
||||
0,
|
||||
allowMaybe,
|
||||
nx
|
||||
);
|
||||
}
|
||||
|
||||
protected boolean isNFSkippable(int c) {
|
||||
return NormalizerImpl.isNFSkippable(c,this,
|
||||
(NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD)
|
||||
@ -343,10 +304,6 @@ public final class Normalizer implements Cloneable {
|
||||
true, trailCC, nx);
|
||||
}
|
||||
|
||||
protected String normalize( String src, int options) {
|
||||
return decompose(src,true);
|
||||
}
|
||||
|
||||
protected int getMinC() {
|
||||
return NormalizerImpl.MIN_WITH_LEAD_CC;
|
||||
}
|
||||
@ -363,21 +320,6 @@ public final class Normalizer implements Cloneable {
|
||||
return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD);
|
||||
}
|
||||
|
||||
protected QuickCheckResult quickCheck(char[] src,int start,
|
||||
int limit,boolean allowMaybe,
|
||||
UnicodeSet nx) {
|
||||
return NormalizerImpl.quickCheck(
|
||||
src,start,limit,
|
||||
NormalizerImpl.getFromIndexesArr(
|
||||
NormalizerImpl.INDEX_MIN_NFKD_NO_MAYBE
|
||||
),
|
||||
NormalizerImpl.QC_NFKD,
|
||||
NormalizerImpl.OPTIONS_COMPAT,
|
||||
allowMaybe,
|
||||
nx
|
||||
);
|
||||
}
|
||||
|
||||
protected boolean isNFSkippable(int c) {
|
||||
return NormalizerImpl.isNFSkippable(c, this,
|
||||
(NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD)
|
||||
@ -403,10 +345,6 @@ public final class Normalizer implements Cloneable {
|
||||
0, nx);
|
||||
}
|
||||
|
||||
protected String normalize( String src, int options) {
|
||||
return compose(src, false, options);
|
||||
}
|
||||
|
||||
protected int getMinC() {
|
||||
return NormalizerImpl.getFromIndexesArr(
|
||||
NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
|
||||
@ -421,20 +359,6 @@ public final class Normalizer implements Cloneable {
|
||||
protected int getMask() {
|
||||
return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFC);
|
||||
}
|
||||
protected QuickCheckResult quickCheck(char[] src,int start,
|
||||
int limit,boolean allowMaybe,
|
||||
UnicodeSet nx) {
|
||||
return NormalizerImpl.quickCheck(
|
||||
src,start,limit,
|
||||
NormalizerImpl.getFromIndexesArr(
|
||||
NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
|
||||
),
|
||||
NormalizerImpl.QC_NFC,
|
||||
0,
|
||||
allowMaybe,
|
||||
nx
|
||||
);
|
||||
}
|
||||
protected boolean isNFSkippable(int c) {
|
||||
return NormalizerImpl.isNFSkippable(c,this,
|
||||
( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
|
||||
@ -468,9 +392,6 @@ public final class Normalizer implements Cloneable {
|
||||
NormalizerImpl.OPTIONS_COMPAT, nx);
|
||||
}
|
||||
|
||||
protected String normalize( String src, int options) {
|
||||
return compose(src, true, options);
|
||||
}
|
||||
protected int getMinC() {
|
||||
return NormalizerImpl.getFromIndexesArr(
|
||||
NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
|
||||
@ -485,20 +406,6 @@ public final class Normalizer implements Cloneable {
|
||||
protected int getMask() {
|
||||
return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKC);
|
||||
}
|
||||
protected QuickCheckResult quickCheck(char[] src,int start,
|
||||
int limit,boolean allowMaybe,
|
||||
UnicodeSet nx) {
|
||||
return NormalizerImpl.quickCheck(
|
||||
src,start,limit,
|
||||
NormalizerImpl.getFromIndexesArr(
|
||||
NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
|
||||
),
|
||||
NormalizerImpl.QC_NFKC,
|
||||
NormalizerImpl.OPTIONS_COMPAT,
|
||||
allowMaybe,
|
||||
nx
|
||||
);
|
||||
}
|
||||
protected boolean isNFSkippable(int c) {
|
||||
return NormalizerImpl.isNFSkippable(c, this,
|
||||
( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
|
||||
@ -525,9 +432,6 @@ public final class Normalizer implements Cloneable {
|
||||
return NormalizerImpl.makeFCD(src, srcStart,srcLimit,
|
||||
dest, destStart,destLimit, nx);
|
||||
}
|
||||
protected String normalize( String src, int options) {
|
||||
return makeFCD(src, options);
|
||||
}
|
||||
protected int getMinC() {
|
||||
return NormalizerImpl.MIN_WITH_LEAD_CC;
|
||||
}
|
||||
@ -540,11 +444,6 @@ public final class Normalizer implements Cloneable {
|
||||
protected int getMask() {
|
||||
return NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD;
|
||||
}
|
||||
protected QuickCheckResult quickCheck(char[] src,int start,
|
||||
int limit,boolean allowMaybe,
|
||||
UnicodeSet nx) {
|
||||
return NormalizerImpl.checkFCD(src,start,limit,nx) ? YES : NO;
|
||||
}
|
||||
protected boolean isNFSkippable(int c) {
|
||||
/* FCD: skippable if lead cc==0 and trail cc<=1 */
|
||||
return (NormalizerImpl.getFCD16(c)>1);
|
||||
@ -1005,10 +904,6 @@ public final class Normalizer implements Cloneable {
|
||||
return app.length();
|
||||
}
|
||||
|
||||
private static String makeFCD(String src,int options) {
|
||||
return Norm2AllModes.getFCDNormalizer2NoIOException().normalize(src);
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes a <tt>String</tt> using the given normalization operation.
|
||||
* <p>
|
||||
@ -1329,16 +1224,36 @@ public final class Normalizer implements Cloneable {
|
||||
public static int compare(char[] s1, int s1Start, int s1Limit,
|
||||
char[] s2, int s2Start, int s2Limit,
|
||||
int options) {
|
||||
return internalCompare(s1, s1Start, s1Limit,
|
||||
s2, s2Start, s2Limit,
|
||||
if( s1==null || s1Start<0 || s1Limit<0 ||
|
||||
s2==null || s2Start<0 || s2Limit<0 ||
|
||||
s1Limit<s1Start || s2Limit<s2Start
|
||||
) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
return internalCompare(CharBuffer.wrap(s1, s1Start, s1Limit-s1Start),
|
||||
CharBuffer.wrap(s2, s2Start, s2Limit-s2Start),
|
||||
options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compare two strings for canonical equivalence.
|
||||
* Further options include case-insensitive comparison and
|
||||
* code point order (as opposed to code unit order).
|
||||
* Convenience method.
|
||||
*
|
||||
* Canonical equivalence between two strings is defined as their normalized
|
||||
* forms (NFD or NFC) being identical.
|
||||
* This function compares strings incrementally instead of normalizing
|
||||
* (and optionally case-folding) both strings entirely,
|
||||
* improving performance significantly.
|
||||
*
|
||||
* Bulk normalization is only necessary if the strings do not fulfill the
|
||||
* FCD conditions. Only in this case, and only if the strings are relatively
|
||||
* long, is memory allocated temporarily.
|
||||
* For FCD strings and short non-FCD strings there is no memory allocation.
|
||||
*
|
||||
* Semantically, this is equivalent to
|
||||
* strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2)))
|
||||
* where code point order and foldCase are all optional.
|
||||
*
|
||||
* @param s1 First source string.
|
||||
* @param s2 Second source string.
|
||||
@ -1368,12 +1283,9 @@ public final class Normalizer implements Cloneable {
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public static int compare(String s1, String s2, int options) {
|
||||
|
||||
return compare(s1.toCharArray(),0,s1.length(),
|
||||
s2.toCharArray(),0,s2.length(),
|
||||
options);
|
||||
return internalCompare(s1, s2, options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compare two strings for canonical equivalence.
|
||||
* Further options include case-insensitive comparison and
|
||||
@ -1408,9 +1320,9 @@ public final class Normalizer implements Cloneable {
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public static int compare(char[] s1, char[] s2, int options) {
|
||||
return compare(s1,0,s1.length,s2,0,s2.length,options);
|
||||
}
|
||||
|
||||
return internalCompare(CharBuffer.wrap(s1), CharBuffer.wrap(s2), options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that can have faster implementation
|
||||
* by not allocating buffers.
|
||||
@ -1419,12 +1331,10 @@ public final class Normalizer implements Cloneable {
|
||||
* @param options A bit set of options
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
// TODO: actually do the optimization when the guts of Normalizer are
|
||||
// upgraded --has just dumb implementation for now
|
||||
public static int compare(int char32a, int char32b,int options) {
|
||||
return compare(UTF16.valueOf(char32a), UTF16.valueOf(char32b), options);
|
||||
public static int compare(int char32a, int char32b, int options) {
|
||||
return internalCompare(UTF16.valueOf(char32a), UTF16.valueOf(char32b), options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convenience method that can have faster implementation
|
||||
* by not allocating buffers.
|
||||
@ -1433,12 +1343,10 @@ public final class Normalizer implements Cloneable {
|
||||
* @param options A bit set of options
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
// TODO: actually do the optimization when the guts of Normalizer are
|
||||
// upgraded --has just dumb implementation for now
|
||||
public static int compare(int char32a, String str2, int options) {
|
||||
return compare(UTF16.valueOf(char32a), str2, options);
|
||||
return internalCompare(UTF16.valueOf(char32a), str2, options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Concatenate normalized strings, making sure that the result is normalized
|
||||
* as well.
|
||||
@ -2618,27 +2526,10 @@ public final class Normalizer implements Cloneable {
|
||||
}
|
||||
|
||||
|
||||
private static int internalCompare(char[] s1, int s1Start,int s1Limit,
|
||||
char[] s2, int s2Start,int s2Limit,
|
||||
int options) {
|
||||
|
||||
char[] fcd1 = new char[300];
|
||||
char[] fcd2 = new char[300];
|
||||
|
||||
Normalizer.Mode mode;
|
||||
int result;
|
||||
|
||||
if( s1==null || s1Start<0 || s1Limit<0 ||
|
||||
s2==null || s2Start<0 || s2Limit<0 ||
|
||||
s1Limit<s1Start || s2Limit<s2Start
|
||||
) {
|
||||
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
|
||||
UnicodeSet nx=NormalizerImpl.getNX(options>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT);
|
||||
// TODO: Consider proposing this function as public API.
|
||||
private static int internalCompare(CharSequence s1, CharSequence s2, int options) {
|
||||
int normOptions=options>>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT;
|
||||
options|= NormalizerImpl.COMPARE_EQUIV;
|
||||
result=0;
|
||||
|
||||
/*
|
||||
* UAX #21 Case Mappings, as fixed for Unicode version 4
|
||||
@ -2661,20 +2552,18 @@ public final class Normalizer implements Cloneable {
|
||||
* are first decomposed or not, so an FCD check - a check only for
|
||||
* canonical order - is not sufficient.
|
||||
*/
|
||||
if((options& Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) >0 ) {
|
||||
mode=Normalizer.NFD;
|
||||
options&=~ Normalizer.INPUT_IS_FCD;
|
||||
} else {
|
||||
mode=Normalizer.FCD;
|
||||
}
|
||||
if((options& Normalizer.INPUT_IS_FCD)==0) {
|
||||
char[] dest;
|
||||
int fcdLen1, fcdLen2;
|
||||
boolean isFCD1, isFCD2;
|
||||
|
||||
if((options&INPUT_IS_FCD)==0 || (options&FOLD_CASE_EXCLUDE_SPECIAL_I)!=0) {
|
||||
Normalizer2 n2;
|
||||
if((options&FOLD_CASE_EXCLUDE_SPECIAL_I)!=0) {
|
||||
n2=NFD.getNormalizer2(normOptions);
|
||||
} else {
|
||||
n2=FCD.getNormalizer2(normOptions);
|
||||
}
|
||||
|
||||
// check if s1 and/or s2 fulfill the FCD conditions
|
||||
isFCD1= Normalizer.YES==mode.quickCheck(s1, s1Start, s1Limit, true, nx);
|
||||
isFCD2= Normalizer.YES==mode.quickCheck(s2, s2Start, s2Limit, true, nx);
|
||||
int spanQCYes1=n2.spanQuickCheckYes(s1);
|
||||
int spanQCYes2=n2.spanQuickCheckYes(s2);
|
||||
|
||||
/*
|
||||
* ICU 2.4 had a further optimization:
|
||||
* If both strings were not in FCD, then they were both NFD'ed,
|
||||
@ -2684,49 +2573,30 @@ public final class Normalizer implements Cloneable {
|
||||
* Therefore, ICU 2.6 removes that optimization.
|
||||
*/
|
||||
|
||||
if(!isFCD1) {
|
||||
fcdLen1=mode.normalize(s1, 0, s1.length,
|
||||
fcd1, 0, fcd1.length,
|
||||
nx);
|
||||
|
||||
if(fcdLen1>fcd1.length) {
|
||||
dest=new char[fcdLen1];
|
||||
fcdLen1=mode.normalize( s1, 0, s1.length,
|
||||
dest, 0, dest.length,
|
||||
nx);
|
||||
s1=dest;
|
||||
} else {
|
||||
s1=fcd1;
|
||||
}
|
||||
s1Limit=fcdLen1;
|
||||
s1Start=0;
|
||||
if(spanQCYes1<s1.length()) {
|
||||
StringBuilder fcd1=new StringBuilder(s1.length()+16).append(s1, 0, spanQCYes1);
|
||||
s1=n2.normalizeSecondAndAppend(fcd1, s1.subSequence(spanQCYes1, s1.length()));
|
||||
}
|
||||
|
||||
if(!isFCD2) {
|
||||
fcdLen2=mode.normalize(s2,s2Start,s2Limit,
|
||||
fcd2,0,fcd2.length,
|
||||
nx);
|
||||
|
||||
if(fcdLen2>fcd2.length) {
|
||||
dest=new char[fcdLen2];
|
||||
fcdLen2=mode.normalize( s2,s2Start,s2Limit,
|
||||
dest,0,dest.length,
|
||||
nx);
|
||||
s2=dest;
|
||||
} else {
|
||||
s2=fcd2;
|
||||
}
|
||||
s2Limit=fcdLen2;
|
||||
s2Start=0;
|
||||
if(spanQCYes2<s2.length()) {
|
||||
StringBuilder fcd2=new StringBuilder(s2.length()+16).append(s2, 0, spanQCYes2);
|
||||
s2=n2.normalizeSecondAndAppend(fcd2, s2.subSequence(spanQCYes2, s2.length()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
result=NormalizerImpl.cmpEquivFold(s1, s1Start, s1Limit,
|
||||
s2, s2Start, s2Limit, options);
|
||||
return result;
|
||||
|
||||
// TODO: Temporarily hideously slow. Convert internals to work on CharSequence.
|
||||
int length1=s1.length();
|
||||
char[] s1Array=new char[length1];
|
||||
for(int i=0; i<length1; ++i) {
|
||||
s1Array[i]=s1.charAt(i);
|
||||
}
|
||||
int length2=s2.length();
|
||||
char[] s2Array=new char[length2];
|
||||
for(int i=0; i<length2; ++i) {
|
||||
s2Array[i]=s2.charAt(i);
|
||||
}
|
||||
return NormalizerImpl.cmpEquivFold(s1Array, 0, length1, s2Array, 0, length2, options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fetches the Unicode version burned into the Normalization data file
|
||||
* @return VersionInfo version information of the normalizer
|
||||
|
Loading…
Reference in New Issue
Block a user