ICU-7273 remove old NormalizerImpl.getDecomposition()
X-SVN-Rev: 27504
This commit is contained in:
parent
016cd3ca66
commit
f90578937a
@ -14,6 +14,8 @@ import java.util.Hashtable;
|
||||
import java.util.Vector;
|
||||
|
||||
import com.ibm.icu.impl.IntTrieBuilder;
|
||||
import com.ibm.icu.impl.Norm2AllModes;
|
||||
import com.ibm.icu.impl.Normalizer2Impl;
|
||||
import com.ibm.icu.impl.NormalizerImpl;
|
||||
import com.ibm.icu.impl.TrieBuilder;
|
||||
import com.ibm.icu.impl.TrieIterator;
|
||||
@ -1326,7 +1328,7 @@ final class CollationParsedRuleBuilder {
|
||||
new WeightRange(), new WeightRange(), new WeightRange(),
|
||||
new WeightRange() };
|
||||
private WeightRange m_utilWeightRange_ = new WeightRange();
|
||||
private char m_utilCharBuffer_[] = new char[256];
|
||||
private Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstanceNoIOException().impl;
|
||||
private CanonicalIterator m_utilCanIter_ = new CanonicalIterator("");
|
||||
private StringBuilder m_utilStringBuffer_ = new StringBuilder("");
|
||||
// Flag indicating a combining marks table is required or not.
|
||||
@ -3856,12 +3858,9 @@ final class CollationParsedRuleBuilder {
|
||||
// if the range is assigned - we might ommit more categories later
|
||||
|
||||
for (int u32 = start; u32 < limit; u32++) {
|
||||
int noOfDec = NormalizerImpl.getDecomposition(u32, false,
|
||||
m_utilCharBuffer_, 0, 256);
|
||||
if (noOfDec > 0) {
|
||||
// if we're positive, that means there is no decomposition
|
||||
String decomp = nfcImpl.getDecomposition(u32);
|
||||
if (decomp != null) {
|
||||
String comp = UCharacter.toString(u32);
|
||||
String decomp = new String(m_utilCharBuffer_, 0, noOfDec);
|
||||
if (!collator.equals(comp, decomp)) {
|
||||
m_utilElement_.m_cPoints_ = decomp;
|
||||
m_utilElement_.m_prefix_ = 0;
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
@ -1037,95 +1037,6 @@ public final class NormalizerImpl {
|
||||
//------------------------------------------------------
|
||||
// make NFD & NFKD
|
||||
//------------------------------------------------------
|
||||
public static int getDecomposition(int c /*UTF-32*/ ,
|
||||
boolean compat,
|
||||
char[] dest,
|
||||
int destStart,
|
||||
int destCapacity) {
|
||||
|
||||
if( (UNSIGNED_INT_MASK & c)<=0x10ffff) {
|
||||
long /*unsigned*/ norm32;
|
||||
int qcMask;
|
||||
int minNoMaybe;
|
||||
int length;
|
||||
|
||||
// initialize
|
||||
if(!compat) {
|
||||
minNoMaybe = indexes[INDEX_MIN_NFD_NO_MAYBE];
|
||||
qcMask = QC_NFD;
|
||||
} else {
|
||||
minNoMaybe = indexes[INDEX_MIN_NFKD_NO_MAYBE];
|
||||
qcMask = QC_NFKD;
|
||||
}
|
||||
|
||||
if(c<minNoMaybe) {
|
||||
// trivial case
|
||||
if(destCapacity>0) {
|
||||
dest[0]=(char)c;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* data lookup */
|
||||
norm32=getNorm32(c);
|
||||
if((norm32&qcMask)==0) {
|
||||
/* simple case: no decomposition */
|
||||
if(c<=0xffff) {
|
||||
if(destCapacity>0) {
|
||||
dest[0]=(char)c;
|
||||
}
|
||||
return -1;
|
||||
} else {
|
||||
if(destCapacity>=2) {
|
||||
dest[0]=UTF16.getLeadSurrogate(c);
|
||||
dest[1]=UTF16.getTrailSurrogate(c);
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
} else if(isNorm32HangulOrJamo(norm32)) {
|
||||
/* Hangul syllable: decompose algorithmically */
|
||||
char c2;
|
||||
|
||||
c-=HANGUL_BASE;
|
||||
|
||||
c2=(char)(c%JAMO_T_COUNT);
|
||||
c/=JAMO_T_COUNT;
|
||||
if(c2>0) {
|
||||
if(destCapacity>=3) {
|
||||
dest[2]=(char)(JAMO_T_BASE+c2);
|
||||
}
|
||||
length=3;
|
||||
} else {
|
||||
length=2;
|
||||
}
|
||||
|
||||
if(destCapacity>=2) {
|
||||
dest[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT);
|
||||
dest[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT);
|
||||
}
|
||||
return length;
|
||||
} else {
|
||||
/* c decomposes, get everything from the variable-length extra
|
||||
* data
|
||||
*/
|
||||
int p, limit;
|
||||
DecomposeArgs args = new DecomposeArgs();
|
||||
/* the index into extra data array*/
|
||||
p=decompose(norm32, qcMask, args);
|
||||
if(args.length<=destCapacity) {
|
||||
limit=p+args.length;
|
||||
do {
|
||||
dest[destStart++]=extraData[p++];
|
||||
} while(p<limit);
|
||||
}
|
||||
return args.length;
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static int decompose(char[] src,int srcStart,int srcLimit,
|
||||
char[] dest,int destStart,int destLimit,
|
||||
boolean compat,int[] outTrailCC,
|
||||
|
@ -3731,7 +3731,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
return String.valueOf((char)ch);
|
||||
}
|
||||
|
||||
StringBuffer result = new StringBuffer();
|
||||
StringBuilder result = new StringBuilder();
|
||||
result.append(UTF16.getLeadSurrogate(ch));
|
||||
result.append(UTF16.getTrailSurrogate(ch));
|
||||
return result.toString();
|
||||
|
@ -1,13 +1,18 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
import com.ibm.icu.impl.NormalizerImpl;
|
||||
import com.ibm.icu.impl.Norm2AllModes;
|
||||
import com.ibm.icu.impl.Normalizer2Impl;
|
||||
|
||||
/**
|
||||
* This class has been deprecated since ICU 2.2.
|
||||
* One problem is that this class is not designed to return supplementary characters.
|
||||
* Use the Normalizer2 and UCharacter classes instead.
|
||||
* <p>
|
||||
* <tt>ComposedCharIter</tt> is an iterator class that returns all
|
||||
* of the precomposed characters defined in the Unicode standard, along
|
||||
* with their decomposed forms. This is often useful when building
|
||||
@ -50,7 +55,6 @@ import com.ibm.icu.impl.NormalizerImpl;
|
||||
*/
|
||||
///CLOVER:OFF
|
||||
public final class ComposedCharIter {
|
||||
|
||||
/**
|
||||
* Constant that indicates the iteration has completed.
|
||||
* {@link #next} returns this value when there are no more composed characters
|
||||
@ -58,7 +62,7 @@ public final class ComposedCharIter {
|
||||
* @deprecated ICU 2.2
|
||||
*/
|
||||
public static final char DONE = (char) Normalizer.DONE;
|
||||
|
||||
|
||||
/**
|
||||
* Construct a new <tt>ComposedCharIter</tt>. The iterator will return
|
||||
* all Unicode characters with canonical decompositions, including Korean
|
||||
@ -66,11 +70,9 @@ public final class ComposedCharIter {
|
||||
* @deprecated ICU 2.2
|
||||
*/
|
||||
public ComposedCharIter() {
|
||||
compat = false;
|
||||
//options =0;
|
||||
this(false, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
|
||||
* <p>
|
||||
@ -78,18 +80,17 @@ public final class ComposedCharIter {
|
||||
* <tt>true</tt> for both canonical and compatibility
|
||||
* decompositions.
|
||||
*
|
||||
* @param options Optional decomposition features. Currently, the only
|
||||
* supported option is {@link Normalizer#IGNORE_HANGUL}, which
|
||||
* causes this <tt>ComposedCharIter</tt> not to iterate
|
||||
* over the Hangul characters and their corresponding
|
||||
* Jamo decompositions.
|
||||
* @param options Optional decomposition features. None are supported, so this is ignored.
|
||||
* @deprecated ICU 2.2
|
||||
*/
|
||||
public ComposedCharIter(boolean compat, int options) {
|
||||
this.compat = compat;
|
||||
//this.options = options;
|
||||
if(compat) {
|
||||
n2impl = Norm2AllModes.getNFKCInstanceNoIOException().impl;
|
||||
} else {
|
||||
n2impl = Norm2AllModes.getNFCInstanceNoIOException().impl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines whether there any precomposed Unicode characters not yet returned
|
||||
* by {@link #next}.
|
||||
@ -129,36 +130,35 @@ public final class ComposedCharIter {
|
||||
public String decomposition() {
|
||||
// the decomposition buffer contains the decomposition of
|
||||
// current char so just return it
|
||||
return new String(decompBuf,0, bufLen);
|
||||
if(decompBuf != null) {
|
||||
return decompBuf;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void findNextChar() {
|
||||
int c=curChar+1;
|
||||
for(;;){
|
||||
if(c < 0xFFFF){
|
||||
bufLen = NormalizerImpl.getDecomposition(c,compat,
|
||||
decompBuf,0,
|
||||
decompBuf.length);
|
||||
if(bufLen>0){
|
||||
decompBuf = null;
|
||||
for(;;) {
|
||||
if(c < 0xFFFF) {
|
||||
decompBuf = n2impl.getDecomposition(c);
|
||||
if(decompBuf != null) {
|
||||
// the curChar can be decomposed... so it is a composed char
|
||||
// cache the result
|
||||
break;
|
||||
}
|
||||
c++;
|
||||
}else{
|
||||
c=Normalizer.DONE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
c++;
|
||||
} else {
|
||||
c=Normalizer.DONE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
nextChar=c;
|
||||
}
|
||||
|
||||
//private int options;
|
||||
private boolean compat;
|
||||
private char[] decompBuf = new char[100];
|
||||
private int bufLen=0;
|
||||
|
||||
private final Normalizer2Impl n2impl;
|
||||
private String decompBuf;
|
||||
private int curChar = 0;
|
||||
private int nextChar = Normalizer.DONE;
|
||||
|
||||
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ package com.ibm.icu.text;
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import com.ibm.icu.impl.Norm2AllModes;
|
||||
import com.ibm.icu.text.Normalizer;
|
||||
|
||||
/**
|
||||
@ -63,7 +64,7 @@ public abstract class Normalizer2 {
|
||||
* @draft ICU 4.4
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
enum Mode {
|
||||
public enum Mode {
|
||||
/**
|
||||
* Decomposition followed by composition.
|
||||
* Same as standard NFC when using an "nfc" instance.
|
||||
@ -132,7 +133,14 @@ public abstract class Normalizer2 {
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Normalizer2 getInstance(InputStream data, String name, Mode mode) throws IOException {
|
||||
return null;
|
||||
Norm2AllModes all2Modes=Norm2AllModes.getInstance(data, name);
|
||||
switch(mode) {
|
||||
case COMPOSE: return all2Modes.comp;
|
||||
case DECOMPOSE: return all2Modes.decomp;
|
||||
case FCD: return all2Modes.fcd;
|
||||
case COMPOSE_CONTIGUOUS: return all2Modes.fcc;
|
||||
default: return null; // will not occur
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2670,7 +2670,7 @@ public final class UTF16 {
|
||||
return String.valueOf((char) ch);
|
||||
}
|
||||
|
||||
StringBuffer result = new StringBuffer();
|
||||
StringBuilder result = new StringBuilder();
|
||||
result.append(getLeadSurrogate(ch));
|
||||
result.append(getTrailSurrogate(ch));
|
||||
return result.toString();
|
||||
|
@ -1,6 +1,6 @@
|
||||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2009, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -8,6 +8,7 @@
|
||||
package com.ibm.icu.dev.test.lang;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
|
||||
@ -26,6 +27,7 @@ import com.ibm.icu.lang.UCharacterDirection;
|
||||
import com.ibm.icu.lang.UCharacterEnums;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
@ -2279,9 +2281,7 @@ public final class UCharacterTest extends TestFmwk
|
||||
}
|
||||
|
||||
/* various tests for consistency of UCD data and API behavior */
|
||||
public void TestConsistency() {
|
||||
char[] buffer16 = new char[300];
|
||||
char[] buffer = new char[300];
|
||||
public void TestConsistency() throws IOException {
|
||||
UnicodeSet set1, set2, set3, set4;
|
||||
|
||||
USerializedSet sset;
|
||||
@ -2366,6 +2366,7 @@ public final class UCharacterTest extends TestFmwk
|
||||
* In general, the set for the middle such character should be a subset
|
||||
* of the set for the first.
|
||||
*/
|
||||
Normalizer2 norm2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE);
|
||||
set1=new UnicodeSet();
|
||||
set2=new UnicodeSet();
|
||||
sset = new USerializedSet();
|
||||
@ -2374,7 +2375,8 @@ public final class UCharacterTest extends TestFmwk
|
||||
|
||||
/* enumerate all characters that are plausible to be latin letters */
|
||||
for(start=0xa0; start<0x2000; ++start) {
|
||||
if(NormalizerImpl.getDecomposition(start, false, buffer16,0,buffer16.length) > 1 && buffer[0]==0x0049) {
|
||||
String decomp=norm2.normalize(UTF16.valueOf(start));
|
||||
if(decomp.length() > 1 && decomp.charAt(0)==0x49) {
|
||||
set2.add(start);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2007, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -146,10 +146,10 @@ public class TestDeprecatedNormalizerAPI extends TestFmwk
|
||||
|
||||
ComposedCharIter iter = new ComposedCharIter(false, options);
|
||||
while (iter.hasNext()) {
|
||||
char ch = iter.next();
|
||||
final char ch = iter.next();
|
||||
|
||||
String chStr = new StringBuffer().append(ch).toString();
|
||||
String decomp = Normalizer.decompose(chStr, compat);
|
||||
String chStr = String.valueOf(ch);
|
||||
String decomp = iter.decomposition();
|
||||
String comp = Normalizer.compose(decomp, compat);
|
||||
|
||||
if (NormalizerImpl.isFullCompositionExclusion(ch)) {
|
||||
@ -158,19 +158,14 @@ public class TestDeprecatedNormalizerAPI extends TestFmwk
|
||||
}
|
||||
|
||||
// Avoid disparaged characters
|
||||
if (getDecomposition(ch,compat).length() == 4) continue;
|
||||
if (decomp.length() == 4) continue;
|
||||
|
||||
if (!comp.equals(chStr)) {
|
||||
errln("ERROR: Round trip invalid: " + hex(chStr) + " --> " + hex(decomp)
|
||||
+ " --> " + hex(comp));
|
||||
|
||||
errln(" char decomp is '" + getDecomposition(ch,compat) + "'");
|
||||
errln(" char decomp is '" + decomp + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
private String getDecomposition(char ch, boolean compat){
|
||||
char[] dest = new char[10];
|
||||
int length = NormalizerImpl.getDecomposition(ch,compat,dest,0,dest.length);
|
||||
return new String(dest,0,length);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user