ICU-7273 change Normalizer.concatenate() to use new code and remove most of the old Normalizer implementation code
X-SVN-Rev: 27491
This commit is contained in:
parent
fc0acc5419
commit
a1a504d023
@ -12,7 +12,6 @@ import com.ibm.icu.util.VersionInfo;
|
||||
|
||||
import java.nio.CharBuffer;
|
||||
import java.text.CharacterIterator;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
|
||||
/**
|
||||
* Unicode Normalization
|
||||
@ -115,13 +114,7 @@ import com.ibm.icu.impl.Utility;
|
||||
* For more usage examples, see the Unicode Standard Annex.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
|
||||
public final class Normalizer implements Cloneable {
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Private data
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
// The input text and our position in it
|
||||
private UCharacterIterator text;
|
||||
private Normalizer2 norm2;
|
||||
@ -166,12 +159,13 @@ public final class Normalizer implements Cloneable {
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* Obsolete method.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
protected int normalize(char[] src, int srcStart, int srcLimit,
|
||||
char[] dest,int destStart,int destLimit,
|
||||
UnicodeSet nx) {
|
||||
// TODO: deprecate or remove this method
|
||||
int srcLen = (srcLimit - srcStart);
|
||||
int destLen = (destLimit - destStart);
|
||||
if( srcLen > destLen ) {
|
||||
@ -182,62 +176,65 @@ public final class Normalizer implements Cloneable {
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* Obsolete method.
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
protected int normalize(char[] src, int srcStart, int srcLimit,
|
||||
char[] dest,int destStart,int destLimit,
|
||||
int options) {
|
||||
// TODO: deprecate or remove this method
|
||||
return normalize( src, srcStart, srcLimit,
|
||||
dest,destStart,destLimit,
|
||||
NormalizerImpl.getNX(options)
|
||||
);
|
||||
null);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* Obsolete method.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
protected int getMinC() {
|
||||
return -1;
|
||||
return -1; // TODO: deprecate or remove this method
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* Obsolete method.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
protected int getMask() {
|
||||
return -1;
|
||||
return -1; // TODO: deprecate or remove this method
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* Obsolete method.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
protected IsPrevBoundary getPrevBoundary() {
|
||||
return null;
|
||||
return null; // TODO: deprecate or remove this method
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* Obsolete method.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
protected IsNextBoundary getNextBoundary() {
|
||||
return null;
|
||||
return null; // TODO: deprecate or remove this method
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used for method dispatch
|
||||
* Obsolete method.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
protected boolean isNFSkippable(int c) {
|
||||
return true;
|
||||
return true; // TODO: deprecate or remove this method
|
||||
}
|
||||
private final Normalizer2 normalizer2;
|
||||
private final FilteredNormalizer2 uni32Normalizer2;
|
||||
private static final UnicodeSet UNI32_SET = new UnicodeSet("[:age=3.2:]").freeze();
|
||||
}
|
||||
|
||||
private interface IsPrevBoundary {} // TODO: remove when Mode.getPrevBoundary() is removed
|
||||
private interface IsNextBoundary {} // TODO: remove when Mode.getNextBoundary() is removed
|
||||
|
||||
/**
|
||||
* No decomposition/composition.
|
||||
* @stable ICU 2.8
|
||||
@ -248,128 +245,19 @@ public final class Normalizer implements Cloneable {
|
||||
* Canonical decomposition.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public static final Mode NFD = new NFDMode();
|
||||
|
||||
private static final class NFDMode extends Mode {
|
||||
private NFDMode() {
|
||||
super(Norm2AllModes.getNFCInstanceNoIOException().decomp);
|
||||
}
|
||||
|
||||
protected int normalize(char[] src, int srcStart, int srcLimit,
|
||||
char[] dest,int destStart,int destLimit,
|
||||
UnicodeSet nx) {
|
||||
int[] trailCC = new int[1];
|
||||
return NormalizerImpl.decompose(src, srcStart,srcLimit,
|
||||
dest, destStart,destLimit,
|
||||
false, trailCC,nx);
|
||||
}
|
||||
|
||||
protected int getMinC() {
|
||||
return NormalizerImpl.MIN_WITH_LEAD_CC;
|
||||
}
|
||||
|
||||
protected IsPrevBoundary getPrevBoundary() {
|
||||
return new IsPrevNFDSafe();
|
||||
}
|
||||
|
||||
protected IsNextBoundary getNextBoundary() {
|
||||
return new IsNextNFDSafe();
|
||||
}
|
||||
|
||||
protected int getMask() {
|
||||
return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD);
|
||||
}
|
||||
|
||||
protected boolean isNFSkippable(int c) {
|
||||
return NormalizerImpl.isNFSkippable(c,this,
|
||||
(NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD)
|
||||
);
|
||||
}
|
||||
}
|
||||
public static final Mode NFD = new Mode(Norm2AllModes.getNFCInstanceNoIOException().decomp);
|
||||
|
||||
/**
|
||||
* Compatibility decomposition.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public static final Mode NFKD = new NFKDMode();
|
||||
|
||||
private static final class NFKDMode extends Mode {
|
||||
private NFKDMode() {
|
||||
super(Norm2AllModes.getNFKCInstanceNoIOException().decomp);
|
||||
}
|
||||
|
||||
protected int normalize(char[] src, int srcStart, int srcLimit,
|
||||
char[] dest,int destStart,int destLimit,
|
||||
UnicodeSet nx) {
|
||||
int[] trailCC = new int[1];
|
||||
return NormalizerImpl.decompose(src, srcStart,srcLimit,
|
||||
dest, destStart,destLimit,
|
||||
true, trailCC, nx);
|
||||
}
|
||||
|
||||
protected int getMinC() {
|
||||
return NormalizerImpl.MIN_WITH_LEAD_CC;
|
||||
}
|
||||
|
||||
protected IsPrevBoundary getPrevBoundary() {
|
||||
return new IsPrevNFDSafe();
|
||||
}
|
||||
|
||||
protected IsNextBoundary getNextBoundary() {
|
||||
return new IsNextNFDSafe();
|
||||
}
|
||||
|
||||
protected int getMask() {
|
||||
return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD);
|
||||
}
|
||||
|
||||
protected boolean isNFSkippable(int c) {
|
||||
return NormalizerImpl.isNFSkippable(c, this,
|
||||
(NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKD)
|
||||
);
|
||||
}
|
||||
}
|
||||
public static final Mode NFKD = new Mode(Norm2AllModes.getNFKCInstanceNoIOException().decomp);
|
||||
|
||||
/**
|
||||
* Canonical decomposition followed by canonical composition.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public static final Mode NFC = new NFCMode();
|
||||
|
||||
private static final class NFCMode extends Mode{
|
||||
private NFCMode() {
|
||||
super(Norm2AllModes.getNFCInstanceNoIOException().comp);
|
||||
}
|
||||
protected int normalize(char[] src, int srcStart, int srcLimit,
|
||||
char[] dest,int destStart,int destLimit,
|
||||
UnicodeSet nx) {
|
||||
return NormalizerImpl.compose( src, srcStart, srcLimit,
|
||||
dest,destStart,destLimit,
|
||||
0, nx);
|
||||
}
|
||||
|
||||
protected int getMinC() {
|
||||
return NormalizerImpl.getFromIndexesArr(
|
||||
NormalizerImpl.INDEX_MIN_NFC_NO_MAYBE
|
||||
);
|
||||
}
|
||||
protected IsPrevBoundary getPrevBoundary() {
|
||||
return new IsPrevTrueStarter();
|
||||
}
|
||||
protected IsNextBoundary getNextBoundary() {
|
||||
return new IsNextTrueStarter();
|
||||
}
|
||||
protected int getMask() {
|
||||
return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFC);
|
||||
}
|
||||
protected boolean isNFSkippable(int c) {
|
||||
return NormalizerImpl.isNFSkippable(c,this,
|
||||
( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
|
||||
(NormalizerImpl.QC_NFC & NormalizerImpl.QC_ANY_NO)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
public static final Mode NFC = new Mode(Norm2AllModes.getNFCInstanceNoIOException().comp);
|
||||
|
||||
/**
|
||||
* Default normalization.
|
||||
@ -381,42 +269,7 @@ public final class Normalizer implements Cloneable {
|
||||
* Compatibility decomposition followed by canonical composition.
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public static final Mode NFKC =new NFKCMode();
|
||||
|
||||
private static final class NFKCMode extends Mode{
|
||||
private NFKCMode() {
|
||||
super(Norm2AllModes.getNFKCInstanceNoIOException().comp);
|
||||
}
|
||||
protected int normalize(char[] src, int srcStart, int srcLimit,
|
||||
char[] dest,int destStart,int destLimit,
|
||||
UnicodeSet nx) {
|
||||
return NormalizerImpl.compose(src, srcStart,srcLimit,
|
||||
dest, destStart,destLimit,
|
||||
NormalizerImpl.OPTIONS_COMPAT, nx);
|
||||
}
|
||||
|
||||
protected int getMinC() {
|
||||
return NormalizerImpl.getFromIndexesArr(
|
||||
NormalizerImpl.INDEX_MIN_NFKC_NO_MAYBE
|
||||
);
|
||||
}
|
||||
protected IsPrevBoundary getPrevBoundary() {
|
||||
return new IsPrevTrueStarter();
|
||||
}
|
||||
protected IsNextBoundary getNextBoundary() {
|
||||
return new IsNextTrueStarter();
|
||||
}
|
||||
protected int getMask() {
|
||||
return (NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFKC);
|
||||
}
|
||||
protected boolean isNFSkippable(int c) {
|
||||
return NormalizerImpl.isNFSkippable(c, this,
|
||||
( NormalizerImpl.CC_MASK|NormalizerImpl.COMBINES_ANY|
|
||||
(NormalizerImpl.QC_NFKC & NormalizerImpl.QC_ANY_NO)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
public static final Mode NFKC =new Mode(Norm2AllModes.getNFKCInstanceNoIOException().comp);
|
||||
|
||||
/**
|
||||
* "Fast C or D" form.
|
||||
@ -429,30 +282,7 @@ public final class Normalizer implements Cloneable {
|
||||
super(Norm2AllModes.getNFCInstanceNoIOException().fcd);
|
||||
Norm2AllModes.getNFCInstanceNoIOException().impl.getFCDTrie();
|
||||
}
|
||||
protected int normalize(char[] src, int srcStart, int srcLimit,
|
||||
char[] dest,int destStart,int destLimit,
|
||||
UnicodeSet nx) {
|
||||
return NormalizerImpl.makeFCD(src, srcStart,srcLimit,
|
||||
dest, destStart,destLimit, nx);
|
||||
}
|
||||
protected int getMinC() {
|
||||
return NormalizerImpl.MIN_WITH_LEAD_CC;
|
||||
}
|
||||
protected IsPrevBoundary getPrevBoundary() {
|
||||
return new IsPrevNFDSafe();
|
||||
}
|
||||
protected IsNextBoundary getNextBoundary() {
|
||||
return new IsNextNFDSafe();
|
||||
}
|
||||
protected int getMask() {
|
||||
return NormalizerImpl.CC_MASK|NormalizerImpl.QC_NFD;
|
||||
}
|
||||
protected boolean isNFSkippable(int c) {
|
||||
/* FCD: skippable if lead cc==0 and trail cc<=1 */
|
||||
return (NormalizerImpl.getFCD16(c)>1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Null operation for use with the {@link com.ibm.icu.text.Normalizer constructors}
|
||||
@ -645,7 +475,7 @@ public final class Normalizer implements Cloneable {
|
||||
public static final int COMPARE_NORM_OPTIONS_SHIFT = 20;
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Constructors
|
||||
// Iterator constructors
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
@ -835,8 +665,6 @@ public final class Normalizer implements Cloneable {
|
||||
return app.length();
|
||||
}
|
||||
|
||||
private static final int MAX_BUF_SIZE_DECOMPOSE = 3;
|
||||
|
||||
/**
|
||||
* Decompose a string.
|
||||
* The string will be decomposed to according to the specified mode.
|
||||
@ -1356,6 +1184,7 @@ public final class Normalizer implements Cloneable {
|
||||
return internalCompare(UTF16.valueOf(char32a), str2, options);
|
||||
}
|
||||
|
||||
/* Concatenation of normalized strings --------------------------------- */
|
||||
/**
|
||||
* Concatenate normalized strings, making sure that the result is normalized
|
||||
* as well.
|
||||
@ -1399,18 +1228,10 @@ public final class Normalizer implements Cloneable {
|
||||
* required length
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
/* Concatenation of normalized strings --------------------------------- */
|
||||
|
||||
public static int concatenate(char[] left, int leftStart, int leftLimit,
|
||||
char[] right, int rightStart, int rightLimit,
|
||||
char[] dest, int destStart, int destLimit,
|
||||
Normalizer.Mode mode, int options) {
|
||||
|
||||
|
||||
UCharacterIterator iter;
|
||||
|
||||
int leftBoundary, rightBoundary, destLength;
|
||||
|
||||
if(dest == null) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
@ -1421,88 +1242,13 @@ public final class Normalizer implements Cloneable {
|
||||
}
|
||||
|
||||
/* allow left==dest */
|
||||
|
||||
/*
|
||||
* Input: left[0..leftLength[ + right[0..rightLength[
|
||||
*
|
||||
* Find normalization-safe boundaries leftBoundary and rightBoundary
|
||||
* and copy the end parts together:
|
||||
* buffer=left[leftBoundary..leftLength[ + right[0..rightBoundary[
|
||||
*
|
||||
* dest=left[0..leftBoundary[ +
|
||||
* normalize(buffer) +
|
||||
* right[rightBoundary..rightLength[
|
||||
*/
|
||||
|
||||
/*
|
||||
* find a normalization boundary at the end of the left string
|
||||
* and copy the end part into the buffer
|
||||
*/
|
||||
|
||||
iter = UCharacterIterator.getInstance(left, leftStart, leftLimit);
|
||||
|
||||
iter.setIndex(iter.getLength()); /* end of left string */
|
||||
char[] buffer=new char[100];
|
||||
int bufferLength;
|
||||
bufferLength=previous(iter, buffer,0,buffer.length,mode,false,null,options);
|
||||
|
||||
leftBoundary=iter.getIndex();
|
||||
|
||||
if(bufferLength>buffer.length) {
|
||||
char[] newBuf = new char[buffer.length*2];
|
||||
buffer = newBuf;
|
||||
newBuf = null; // null the reference for GC
|
||||
/* just copy from the left string: we know the boundary already */
|
||||
System.arraycopy(left,leftBoundary,buffer,0,bufferLength);
|
||||
}
|
||||
|
||||
/*
|
||||
* find a normalization boundary at the beginning of the right string
|
||||
* and concatenate the beginning part to the buffer
|
||||
*/
|
||||
|
||||
iter = UCharacterIterator.getInstance(right, rightStart, rightLimit);
|
||||
|
||||
rightBoundary=next(iter,buffer,bufferLength, buffer.length-bufferLength,
|
||||
mode, false,null, options);
|
||||
|
||||
if(bufferLength>buffer.length) {
|
||||
char[] newBuf = new char[buffer.length*2];
|
||||
buffer = newBuf;
|
||||
newBuf = null; // null the reference for GC
|
||||
/* just copy from the right string: we know the boundary already */
|
||||
System.arraycopy(right,rightBoundary,buffer,
|
||||
bufferLength,rightBoundary);
|
||||
}
|
||||
|
||||
bufferLength+=rightBoundary;
|
||||
|
||||
/* copy left[0..leftBoundary[ to dest */
|
||||
if(left!=dest && leftBoundary>0 && (destLimit)>0) {
|
||||
System.arraycopy(left,0,dest,0, Math.min(leftBoundary,destLimit));
|
||||
}
|
||||
destLength=leftBoundary;
|
||||
|
||||
/* concatenate the normalization of the buffer to dest */
|
||||
if(destLimit>destLength) {
|
||||
destLength+=Normalizer.normalize(buffer,0,bufferLength,dest,
|
||||
destLength,destLimit,mode,options);
|
||||
|
||||
} else {
|
||||
destLength+=Normalizer.normalize(buffer, 0, bufferLength,null,0,0,mode,options);
|
||||
}
|
||||
|
||||
/* concatenate right[rightBoundary..rightLength[ to dest */
|
||||
rightStart+=rightBoundary;
|
||||
int rightLength=(rightLimit-rightStart);
|
||||
if(rightLength>0 && destLimit>destLength) {
|
||||
System.arraycopy(right,rightStart,dest,destLength,
|
||||
Math.min(rightLength,destLimit - destLength)
|
||||
);
|
||||
}
|
||||
destLength+=rightLength;
|
||||
|
||||
StringBuilder destBuilder=new StringBuilder(leftLimit-leftStart+rightLimit-rightStart+16);
|
||||
destBuilder.append(left, leftStart, leftLimit-leftStart);
|
||||
CharBuffer rightBuffer=CharBuffer.wrap(right, rightStart, rightLimit-rightStart);
|
||||
mode.getNormalizer2(options).append(destBuilder, rightBuffer);
|
||||
int destLength=destBuilder.length();
|
||||
if(destLength<=(destLimit-destStart)) {
|
||||
destBuilder.getChars(0, destLength, dest, destStart);
|
||||
return destLength;
|
||||
} else {
|
||||
throw new IndexOutOfBoundsException(Integer.toString(destLength));
|
||||
@ -1537,19 +1283,8 @@ public final class Normalizer implements Cloneable {
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public static String concatenate(char[] left, char[] right,Mode mode, int options) {
|
||||
char[] result = new char[(left.length+right.length)* MAX_BUF_SIZE_DECOMPOSE];
|
||||
for(;;) {
|
||||
|
||||
int length = concatenate(left, 0, left.length,
|
||||
right, 0, right.length,
|
||||
result,0, result.length,
|
||||
mode, options);
|
||||
if(length<=result.length) {
|
||||
return new String(result,0,length);
|
||||
} else {
|
||||
result = new char[length];
|
||||
}
|
||||
}
|
||||
StringBuilder dest=new StringBuilder(left.length+right.length+16).append(left);
|
||||
return mode.getNormalizer2(options).append(dest, CharBuffer.wrap(right)).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1564,7 +1299,11 @@ public final class Normalizer implements Cloneable {
|
||||
* dest=normalize(left+right, mode)
|
||||
* </code>
|
||||
*
|
||||
* For details see concatenate
|
||||
* With the input strings already being normalized,
|
||||
* this function will use next() and previous()
|
||||
* to find the adjacent end pieces of the input strings.
|
||||
* Only the concatenation of these end pieces will be normalized and
|
||||
* then concatenated with the remaining parts of the input strings.
|
||||
*
|
||||
* @param left Left source string.
|
||||
* @param right Right source string.
|
||||
@ -1580,19 +1319,8 @@ public final class Normalizer implements Cloneable {
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public static String concatenate(String left, String right, Mode mode, int options) {
|
||||
char[] result = new char[(left.length()+right.length())* MAX_BUF_SIZE_DECOMPOSE];
|
||||
for(;;) {
|
||||
|
||||
int length = concatenate(left.toCharArray(), 0, left.length(),
|
||||
right.toCharArray(),0, right.length(),
|
||||
result, 0, result.length,
|
||||
mode, options);
|
||||
if(length<=result.length) {
|
||||
return new String(result,0,length);
|
||||
} else {
|
||||
result = new char[length];
|
||||
}
|
||||
}
|
||||
StringBuilder dest=new StringBuilder(left.length()+right.length()+16).append(left);
|
||||
return mode.getNormalizer2(options).append(dest, right).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1621,6 +1349,7 @@ public final class Normalizer implements Cloneable {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Iteration API
|
||||
//-------------------------------------------------------------------------
|
||||
@ -1820,7 +1549,7 @@ public final class Normalizer implements Cloneable {
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Property access methods
|
||||
// Iterator attributes
|
||||
//-------------------------------------------------------------------------
|
||||
/**
|
||||
* Set the normalization mode for this object.
|
||||
@ -2012,466 +1741,6 @@ public final class Normalizer implements Cloneable {
|
||||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Private utility methods
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
|
||||
/* backward iteration --------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* read backwards and get norm32
|
||||
* return 0 if the character is <minC
|
||||
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
|
||||
* surrogate but read second!)
|
||||
*/
|
||||
|
||||
private static long getPrevNorm32(UCharacterIterator src,
|
||||
int/*unsigned*/ minC,
|
||||
int/*unsigned*/ mask,
|
||||
char[] chars) {
|
||||
long norm32;
|
||||
int ch=0;
|
||||
/* need src.hasPrevious() */
|
||||
if((ch=src.previous()) == UCharacterIterator.DONE) {
|
||||
return 0;
|
||||
}
|
||||
chars[0]=(char)ch;
|
||||
chars[1]=0;
|
||||
|
||||
/* check for a surrogate before getting norm32 to see if we need to
|
||||
* predecrement further */
|
||||
if(chars[0]<minC) {
|
||||
return 0;
|
||||
} else if(!UTF16.isSurrogate(chars[0])) {
|
||||
return NormalizerImpl.getNorm32(chars[0]);
|
||||
} else if(UTF16.isLeadSurrogate(chars[0]) || (src.getIndex()==0)) {
|
||||
/* unpaired surrogate */
|
||||
chars[1]=(char)src.current();
|
||||
return 0;
|
||||
} else if(UTF16.isLeadSurrogate(chars[1]=(char)src.previous())) {
|
||||
norm32=NormalizerImpl.getNorm32(chars[1]);
|
||||
if((norm32&mask)==0) {
|
||||
/* all surrogate pairs with this lead surrogate have irrelevant
|
||||
* data */
|
||||
return 0;
|
||||
} else {
|
||||
/* norm32 must be a surrogate special */
|
||||
return NormalizerImpl.getNorm32FromSurrogatePair(norm32,chars[0]);
|
||||
}
|
||||
} else {
|
||||
/* unpaired second surrogate, undo the c2=src.previous() movement */
|
||||
src.moveIndex( 1);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private interface IsPrevBoundary{
|
||||
public boolean isPrevBoundary(UCharacterIterator src,
|
||||
int/*unsigned*/ minC,
|
||||
int/*unsigned*/ mask,
|
||||
char[] chars);
|
||||
}
|
||||
private static final class IsPrevNFDSafe implements IsPrevBoundary{
|
||||
/*
|
||||
* for NF*D:
|
||||
* read backwards and check if the lead combining class is 0
|
||||
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
|
||||
* surrogate but read second!)
|
||||
*/
|
||||
public boolean isPrevBoundary(UCharacterIterator src,
|
||||
int/*unsigned*/ minC,
|
||||
int/*unsigned*/ ccOrQCMask,
|
||||
char[] chars) {
|
||||
|
||||
return NormalizerImpl.isNFDSafe(getPrevNorm32(src, minC,
|
||||
ccOrQCMask, chars),
|
||||
ccOrQCMask,
|
||||
ccOrQCMask& NormalizerImpl.QC_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class IsPrevTrueStarter implements IsPrevBoundary{
|
||||
/*
|
||||
* read backwards and check if the character is (or its decomposition
|
||||
* begins with) a "true starter" (cc==0 and NF*C_YES)
|
||||
* if c2!=0 then (c2, c) is a surrogate pair (reversed - c2 is first
|
||||
* surrogate but read second!)
|
||||
*/
|
||||
public boolean isPrevBoundary(UCharacterIterator src,
|
||||
int/*unsigned*/ minC,
|
||||
int/*unsigned*/ ccOrQCMask,
|
||||
char[] chars) {
|
||||
long norm32;
|
||||
int/*unsigned*/ decompQCMask;
|
||||
|
||||
decompQCMask=(ccOrQCMask<<2)&0xf; /*decomposition quick check mask*/
|
||||
norm32=getPrevNorm32(src, minC, ccOrQCMask|decompQCMask, chars);
|
||||
return NormalizerImpl.isTrueStarter(norm32,ccOrQCMask,decompQCMask);
|
||||
}
|
||||
}
|
||||
|
||||
private static int findPreviousIterationBoundary(UCharacterIterator src,
|
||||
IsPrevBoundary obj,
|
||||
int/*unsigned*/ minC,
|
||||
int/*mask*/ mask,
|
||||
char[] buffer,
|
||||
int[] startIndex) {
|
||||
char[] chars=new char[2];
|
||||
boolean isBoundary;
|
||||
|
||||
/* fill the buffer from the end backwards */
|
||||
startIndex[0] = buffer.length;
|
||||
chars[0]=0;
|
||||
while(src.getIndex()>0 && chars[0]!=UCharacterIterator.DONE) {
|
||||
isBoundary=obj.isPrevBoundary(src, minC, mask, chars);
|
||||
|
||||
/* always write this character to the front of the buffer */
|
||||
/* make sure there is enough space in the buffer */
|
||||
if(startIndex[0] < (chars[1]==0 ? 1 : 2)) {
|
||||
|
||||
// grow the buffer
|
||||
char[] newBuf = new char[buffer.length*2];
|
||||
/* move the current buffer contents up */
|
||||
System.arraycopy(buffer,startIndex[0],newBuf,
|
||||
newBuf.length-(buffer.length-startIndex[0]),
|
||||
buffer.length-startIndex[0]);
|
||||
//adjust the startIndex
|
||||
startIndex[0]+=newBuf.length-buffer.length;
|
||||
|
||||
buffer=newBuf;
|
||||
newBuf=null;
|
||||
|
||||
}
|
||||
|
||||
buffer[--startIndex[0]]=chars[0];
|
||||
if(chars[1]!=0) {
|
||||
buffer[--startIndex[0]]=chars[1];
|
||||
}
|
||||
|
||||
/* stop if this just-copied character is a boundary */
|
||||
if(isBoundary) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* return the length of the buffer contents */
|
||||
return buffer.length-startIndex[0];
|
||||
}
|
||||
|
||||
private static int previous(UCharacterIterator src,
|
||||
char[] dest, int destStart, int destLimit,
|
||||
Mode mode,
|
||||
boolean doNormalize,
|
||||
boolean[] pNeededToNormalize,
|
||||
int options) {
|
||||
|
||||
IsPrevBoundary isPreviousBoundary;
|
||||
int destLength, bufferLength;
|
||||
int/*unsigned*/ mask;
|
||||
|
||||
int c,c2;
|
||||
|
||||
char minC;
|
||||
int destCapacity = destLimit-destStart;
|
||||
destLength=0;
|
||||
|
||||
|
||||
if(pNeededToNormalize!=null) {
|
||||
pNeededToNormalize[0]=false;
|
||||
}
|
||||
minC = (char)mode.getMinC();
|
||||
mask = mode.getMask();
|
||||
isPreviousBoundary = mode.getPrevBoundary();
|
||||
|
||||
if(isPreviousBoundary==null) {
|
||||
destLength=0;
|
||||
if((c=src.previous())>=0) {
|
||||
destLength=1;
|
||||
if(UTF16.isTrailSurrogate((char)c)) {
|
||||
c2= src.previous();
|
||||
if(c2!= UCharacterIterator.DONE) {
|
||||
if(UTF16.isLeadSurrogate((char)c2)) {
|
||||
if(destCapacity>=2) {
|
||||
dest[1]=(char)c; // trail surrogate
|
||||
destLength=2;
|
||||
}
|
||||
// lead surrogate to be written below
|
||||
c=c2;
|
||||
} else {
|
||||
src.moveIndex(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(destCapacity>0) {
|
||||
dest[0]=(char)c;
|
||||
}
|
||||
}
|
||||
return destLength;
|
||||
}
|
||||
|
||||
char[] buffer = new char[100];
|
||||
int[] startIndex= new int[1];
|
||||
bufferLength=findPreviousIterationBoundary(src,
|
||||
isPreviousBoundary,
|
||||
minC, mask,buffer,
|
||||
startIndex);
|
||||
if(bufferLength>0) {
|
||||
if(doNormalize) {
|
||||
destLength=Normalizer.normalize(buffer,startIndex[0],
|
||||
startIndex[0]+bufferLength,
|
||||
dest, destStart,destLimit,
|
||||
mode, options);
|
||||
|
||||
if(pNeededToNormalize!=null) {
|
||||
pNeededToNormalize[0]=(destLength!=bufferLength ||
|
||||
Utility.arrayRegionMatches(
|
||||
buffer,0,dest,
|
||||
destStart,destLimit
|
||||
));
|
||||
}
|
||||
} else {
|
||||
/* just copy the source characters */
|
||||
if(destCapacity>0) {
|
||||
System.arraycopy(buffer,startIndex[0],dest,0,
|
||||
(bufferLength<destCapacity) ?
|
||||
bufferLength : destCapacity);
|
||||
}
|
||||
destLength = bufferLength;
|
||||
}
|
||||
} else {
|
||||
destLength = bufferLength;
|
||||
}
|
||||
|
||||
|
||||
return destLength;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* forward iteration ---------------------------------------------------- */
|
||||
/*
|
||||
* read forward and check if the character is a next-iteration boundary
|
||||
* if c2!=0 then (c, c2) is a surrogate pair
|
||||
*/
|
||||
private interface IsNextBoundary{
|
||||
boolean isNextBoundary(UCharacterIterator src,
|
||||
int/*unsigned*/ minC,
|
||||
int/*unsigned*/ mask,
|
||||
int[] chars);
|
||||
}
|
||||
/*
|
||||
* read forward and get norm32
|
||||
* return 0 if the character is <minC
|
||||
* if c2!=0 then (c2, c) is a surrogate pair
|
||||
* always reads complete characters
|
||||
*/
|
||||
private static long /*unsigned*/ getNextNorm32(UCharacterIterator src,
|
||||
int/*unsigned*/ minC,
|
||||
int/*unsigned*/ mask,
|
||||
int[] chars) {
|
||||
long norm32;
|
||||
|
||||
/* need src.hasNext() to be true */
|
||||
chars[0]=src.next();
|
||||
chars[1]=0;
|
||||
|
||||
if(chars[0]<minC) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
norm32=NormalizerImpl.getNorm32((char)chars[0]);
|
||||
if(UTF16.isLeadSurrogate((char)chars[0])) {
|
||||
if(src.current()!=UCharacterIterator.DONE &&
|
||||
UTF16.isTrailSurrogate((char)(chars[1]=src.current()))) {
|
||||
src.moveIndex(1); /* skip the c2 surrogate */
|
||||
if((norm32&mask)==0) {
|
||||
/* irrelevant data */
|
||||
return 0;
|
||||
} else {
|
||||
/* norm32 must be a surrogate special */
|
||||
return NormalizerImpl.getNorm32FromSurrogatePair(norm32,(char)chars[1]);
|
||||
}
|
||||
} else {
|
||||
/* unmatched surrogate */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return norm32;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* for NF*D:
|
||||
* read forward and check if the lead combining class is 0
|
||||
* if c2!=0 then (c, c2) is a surrogate pair
|
||||
*/
|
||||
private static final class IsNextNFDSafe implements IsNextBoundary{
|
||||
public boolean isNextBoundary(UCharacterIterator src,
|
||||
int/*unsigned*/ minC,
|
||||
int/*unsigned*/ ccOrQCMask,
|
||||
int[] chars) {
|
||||
return NormalizerImpl.isNFDSafe(getNextNorm32(src,minC,ccOrQCMask,chars),
|
||||
ccOrQCMask, ccOrQCMask&NormalizerImpl.QC_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* for NF*C:
|
||||
* read forward and check if the character is (or its decomposition begins
|
||||
* with) a "true starter" (cc==0 and NF*C_YES)
|
||||
* if c2!=0 then (c, c2) is a surrogate pair
|
||||
*/
|
||||
private static final class IsNextTrueStarter implements IsNextBoundary{
|
||||
public boolean isNextBoundary(UCharacterIterator src,
|
||||
int/*unsigned*/ minC,
|
||||
int/*unsigned*/ ccOrQCMask,
|
||||
int[] chars) {
|
||||
long norm32;
|
||||
int/*unsigned*/ decompQCMask;
|
||||
|
||||
decompQCMask=(ccOrQCMask<<2)&0xf; /*decomposition quick check mask*/
|
||||
norm32=getNextNorm32(src, minC, ccOrQCMask|decompQCMask, chars);
|
||||
return NormalizerImpl.isTrueStarter(norm32, ccOrQCMask, decompQCMask);
|
||||
}
|
||||
}
|
||||
|
||||
private static int findNextIterationBoundary(UCharacterIterator src,
|
||||
IsNextBoundary obj,
|
||||
int/*unsigned*/ minC,
|
||||
int/*unsigned*/ mask,
|
||||
char[] buffer) {
|
||||
int[] chars = new int[2];
|
||||
int bufferIndex =0;
|
||||
|
||||
if(src.current()==UCharacterIterator.DONE) {
|
||||
return 0;
|
||||
}
|
||||
/* get one character and ignore its properties */
|
||||
chars[0]=src.next();
|
||||
buffer[0]=(char)chars[0];
|
||||
bufferIndex=1;
|
||||
|
||||
if(UTF16.isLeadSurrogate((char)chars[0])&&
|
||||
src.current()!=UCharacterIterator.DONE) {
|
||||
if(UTF16.isTrailSurrogate((char)(chars[1]=src.next()))) {
|
||||
buffer[bufferIndex++]=(char)chars[1];
|
||||
} else {
|
||||
src.moveIndex(-1); /* back out the non-trail-surrogate */
|
||||
}
|
||||
}
|
||||
|
||||
/* get all following characters until we see a boundary */
|
||||
/* checking hasNext() instead of c!=DONE on the off-chance that U+ffff
|
||||
* is part of the string */
|
||||
while( src.current()!=UCharacterIterator.DONE) {
|
||||
if(obj.isNextBoundary(src, minC, mask, chars)) {
|
||||
/* back out the latest movement to stop at the boundary */
|
||||
src.moveIndex(chars[1]==0 ? -1 : -2);
|
||||
break;
|
||||
} else {
|
||||
if(bufferIndex+(chars[1]==0 ? 1 : 2)<=buffer.length) {
|
||||
buffer[bufferIndex++]=(char)chars[0];
|
||||
if(chars[1]!=0) {
|
||||
buffer[bufferIndex++]=(char)chars[1];
|
||||
}
|
||||
} else {
|
||||
char[] newBuf = new char[buffer.length *2];
|
||||
System.arraycopy(buffer,0,newBuf,0,bufferIndex);
|
||||
buffer = newBuf;
|
||||
buffer[bufferIndex++]=(char)chars[0];
|
||||
if(chars[1]!=0) {
|
||||
buffer[bufferIndex++]=(char)chars[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* return the length of the buffer contents */
|
||||
return bufferIndex;
|
||||
}
|
||||
|
||||
private static int next(UCharacterIterator src,
|
||||
char[] dest, int destStart, int destLimit,
|
||||
Normalizer.Mode mode,
|
||||
boolean doNormalize,
|
||||
boolean[] pNeededToNormalize,
|
||||
int options) {
|
||||
|
||||
IsNextBoundary isNextBoundary;
|
||||
int /*unsigned*/ mask;
|
||||
int /*unsigned*/ bufferLength;
|
||||
int c,c2;
|
||||
char minC;
|
||||
int destCapacity = destLimit - destStart;
|
||||
int destLength = 0;
|
||||
|
||||
if(pNeededToNormalize!=null) {
|
||||
pNeededToNormalize[0]=false;
|
||||
}
|
||||
|
||||
minC = (char)mode.getMinC();
|
||||
mask = mode.getMask();
|
||||
isNextBoundary = mode.getNextBoundary();
|
||||
|
||||
if(isNextBoundary==null) {
|
||||
destLength=0;
|
||||
c=src.next();
|
||||
if(c!=UCharacterIterator.DONE) {
|
||||
destLength=1;
|
||||
if(UTF16.isLeadSurrogate((char)c)) {
|
||||
c2= src.next();
|
||||
if(c2!= UCharacterIterator.DONE) {
|
||||
if(UTF16.isTrailSurrogate((char)c2)) {
|
||||
if(destCapacity>=2) {
|
||||
dest[1]=(char)c2; // trail surrogate
|
||||
destLength=2;
|
||||
}
|
||||
// lead surrogate to be written below
|
||||
} else {
|
||||
src.moveIndex(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(destCapacity>0) {
|
||||
dest[0]=(char)c;
|
||||
}
|
||||
}
|
||||
return destLength;
|
||||
}
|
||||
|
||||
char[] buffer=new char[100];
|
||||
int[] startIndex = new int[1];
|
||||
|
||||
bufferLength=findNextIterationBoundary(src,isNextBoundary, minC, mask,
|
||||
buffer);
|
||||
if(bufferLength>0) {
|
||||
if(doNormalize) {
|
||||
destLength=mode.normalize(buffer,startIndex[0],bufferLength,
|
||||
dest,destStart,destLimit, options);
|
||||
|
||||
if(pNeededToNormalize!=null) {
|
||||
pNeededToNormalize[0]=(destLength!=bufferLength ||
|
||||
Utility.arrayRegionMatches(buffer,startIndex[0],
|
||||
dest,destStart,
|
||||
destLength));
|
||||
}
|
||||
} else {
|
||||
/* just copy the source characters */
|
||||
if(destCapacity>0) {
|
||||
System.arraycopy(buffer,0,dest,destStart,
|
||||
Math.min(bufferLength,destCapacity - destStart));
|
||||
}
|
||||
destLength = bufferLength;
|
||||
}
|
||||
} else {
|
||||
destLength = bufferLength;
|
||||
}
|
||||
return destLength;
|
||||
}
|
||||
|
||||
private void clearBuffer() {
|
||||
buffer.delete(0, 0x7fffffff);
|
||||
bufferPos=0;
|
||||
@ -2527,10 +1796,10 @@ public final class Normalizer implements Cloneable {
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
public static boolean isNFSkippable(int c, Mode mode) {
|
||||
return mode.isNFSkippable(c);
|
||||
return mode.normalizer2.isInert(c);
|
||||
}
|
||||
|
||||
// TODO: Consider proposing this function as public API.
|
||||
// TODO: Broaden the public compare(String, String, options) API like this. Ticket #7407
|
||||
private static int internalCompare(CharSequence s1, CharSequence s2, int options) {
|
||||
int normOptions=options>>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT;
|
||||
options|= NormalizerImpl.COMPARE_EQUIV;
|
||||
|
@ -1547,37 +1547,9 @@ public class BasicTest extends TestFmwk {
|
||||
/* ### TODO: add more interesting cases */
|
||||
{
|
||||
Normalizer.NFD,
|
||||
"\u0340\u0341\u0343\u0344\u0374\u037E\u0387\u0958" +
|
||||
"\u0959\u095A\u095B\u095C\u095D\u095E\u095F\u09DC" +
|
||||
"\u09DD\u09DF\u0A33\u0A36\u0A59\u0A5A\u0A5B\u0A5E" +
|
||||
"\u0B5C\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69" +
|
||||
"\u0F73\u0F75\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2" +
|
||||
"\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79" +
|
||||
"\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB",
|
||||
|
||||
"\u1FE3\u1FEB\u1FEE\u1FEF\u1FF9\u1FFB\u1FFD\u2000" +
|
||||
"\u2001\u2126\u212A\u212B\u2329\u232A\uF900\uFA10" +
|
||||
"\uFA12\uFA15\uFA20\uFA22\uFA25\uFA26\uFA2A\uFB1F" +
|
||||
"\uFB2A\uFB2B\uFB2C\uFB2D\uFB2E\uFB2F\uFB30\uFB31" +
|
||||
"\uFB32\uFB33\uFB34\uFB35\uFB36\uFB38\uFB39\uFB3A" +
|
||||
"\uFB3B\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46" +
|
||||
"\uFB47\uFB48\uFB49\uFB4A\uFB4B\uFB4C\uFB4D\uFB4E",
|
||||
|
||||
"\u0340\u0341\u0343\u0344\u0374\u037E\u0387\u0958" +
|
||||
"\u0959\u095A\u095B\u095C\u095D\u095E\u095F\u09DC" +
|
||||
"\u09DD\u09DF\u0A33\u0A36\u0A59\u0A5A\u0A5B\u0A5E" +
|
||||
"\u0B5C\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69" +
|
||||
"\u0F73\u0F75\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2" +
|
||||
"\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79" +
|
||||
"\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u0399" +
|
||||
"\u0301\u03C5\u0308\u0301\u1FEB\u1FEE\u1FEF\u1FF9" +
|
||||
"\u1FFB\u1FFD\u2000\u2001\u2126\u212A\u212B\u2329" +
|
||||
"\u232A\uF900\uFA10\uFA12\uFA15\uFA20\uFA22\uFA25" +
|
||||
"\uFA26\uFA2A\uFB1F\uFB2A\uFB2B\uFB2C\uFB2D\uFB2E" +
|
||||
"\uFB2F\uFB30\uFB31\uFB32\uFB33\uFB34\uFB35\uFB36" +
|
||||
"\uFB38\uFB39\uFB3A\uFB3B\uFB3C\uFB3E\uFB40\uFB41" +
|
||||
"\uFB43\uFB44\uFB46\uFB47\uFB48\uFB49\uFB4A\uFB4B" +
|
||||
"\uFB4C\uFB4D\uFB4E"
|
||||
"\u03B1\u0345",
|
||||
"\u0C4D\uD804\uDCBA\uD834\uDD69", // 0C4D 110BA 1D169
|
||||
"\u03B1\uD834\uDD69\uD804\uDCBA\u0C4D\u0345" // 03B1 1D169 110BA 0C4D 0345
|
||||
}
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user