ICU-7273 remove old NormalizerImpl.getDecomposition()

X-SVN-Rev: 27504
This commit is contained in:
Markus Scherer 2010-02-05 22:36:34 +00:00
parent 016cd3ca66
commit f90578937a
8 changed files with 69 additions and 154 deletions

View File

@ -14,6 +14,8 @@ import java.util.Hashtable;
import java.util.Vector;
import com.ibm.icu.impl.IntTrieBuilder;
import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.impl.Normalizer2Impl;
import com.ibm.icu.impl.NormalizerImpl;
import com.ibm.icu.impl.TrieBuilder;
import com.ibm.icu.impl.TrieIterator;
@ -1326,7 +1328,7 @@ final class CollationParsedRuleBuilder {
new WeightRange(), new WeightRange(), new WeightRange(),
new WeightRange() };
private WeightRange m_utilWeightRange_ = new WeightRange();
private char m_utilCharBuffer_[] = new char[256];
private Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstanceNoIOException().impl;
private CanonicalIterator m_utilCanIter_ = new CanonicalIterator("");
private StringBuilder m_utilStringBuffer_ = new StringBuilder("");
// Flag indicating a combining marks table is required or not.
@ -3856,12 +3858,9 @@ final class CollationParsedRuleBuilder {
// if the range is assigned - we might ommit more categories later
for (int u32 = start; u32 < limit; u32++) {
int noOfDec = NormalizerImpl.getDecomposition(u32, false,
m_utilCharBuffer_, 0, 256);
if (noOfDec > 0) {
// if we're positive, that means there is no decomposition
String decomp = nfcImpl.getDecomposition(u32);
if (decomp != null) {
String comp = UCharacter.toString(u32);
String decomp = new String(m_utilCharBuffer_, 0, noOfDec);
if (!collator.equals(comp, decomp)) {
m_utilElement_.m_cPoints_ = decomp;
m_utilElement_.m_prefix_ = 0;

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
* Copyright (C) 1996-2009, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 1996-2010, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
@ -1037,95 +1037,6 @@ public final class NormalizerImpl {
//------------------------------------------------------
// make NFD & NFKD
//------------------------------------------------------
public static int getDecomposition(int c /*UTF-32*/ ,
boolean compat,
char[] dest,
int destStart,
int destCapacity) {
if( (UNSIGNED_INT_MASK & c)<=0x10ffff) {
long /*unsigned*/ norm32;
int qcMask;
int minNoMaybe;
int length;
// initialize
if(!compat) {
minNoMaybe = indexes[INDEX_MIN_NFD_NO_MAYBE];
qcMask = QC_NFD;
} else {
minNoMaybe = indexes[INDEX_MIN_NFKD_NO_MAYBE];
qcMask = QC_NFKD;
}
if(c<minNoMaybe) {
// trivial case
if(destCapacity>0) {
dest[0]=(char)c;
}
return -1;
}
/* data lookup */
norm32=getNorm32(c);
if((norm32&qcMask)==0) {
/* simple case: no decomposition */
if(c<=0xffff) {
if(destCapacity>0) {
dest[0]=(char)c;
}
return -1;
} else {
if(destCapacity>=2) {
dest[0]=UTF16.getLeadSurrogate(c);
dest[1]=UTF16.getTrailSurrogate(c);
}
return -2;
}
} else if(isNorm32HangulOrJamo(norm32)) {
/* Hangul syllable: decompose algorithmically */
char c2;
c-=HANGUL_BASE;
c2=(char)(c%JAMO_T_COUNT);
c/=JAMO_T_COUNT;
if(c2>0) {
if(destCapacity>=3) {
dest[2]=(char)(JAMO_T_BASE+c2);
}
length=3;
} else {
length=2;
}
if(destCapacity>=2) {
dest[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT);
dest[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT);
}
return length;
} else {
/* c decomposes, get everything from the variable-length extra
* data
*/
int p, limit;
DecomposeArgs args = new DecomposeArgs();
/* the index into extra data array*/
p=decompose(norm32, qcMask, args);
if(args.length<=destCapacity) {
limit=p+args.length;
do {
dest[destStart++]=extraData[p++];
} while(p<limit);
}
return args.length;
}
} else {
return 0;
}
}
public static int decompose(char[] src,int srcStart,int srcLimit,
char[] dest,int destStart,int destLimit,
boolean compat,int[] outTrailCC,

View File

@ -3731,7 +3731,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
return String.valueOf((char)ch);
}
StringBuffer result = new StringBuffer();
StringBuilder result = new StringBuilder();
result.append(UTF16.getLeadSurrogate(ch));
result.append(UTF16.getTrailSurrogate(ch));
return result.toString();

View File

@ -1,13 +1,18 @@
/*
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2010, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.text;
import com.ibm.icu.impl.NormalizerImpl;
import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.impl.Normalizer2Impl;
/**
* This class has been deprecated since ICU 2.2.
* One problem is that this class is not designed to return supplementary characters.
* Use the Normalizer2 and UCharacter classes instead.
* <p>
* <tt>ComposedCharIter</tt> is an iterator class that returns all
* of the precomposed characters defined in the Unicode standard, along
* with their decomposed forms. This is often useful when building
@ -50,7 +55,6 @@ import com.ibm.icu.impl.NormalizerImpl;
*/
///CLOVER:OFF
public final class ComposedCharIter {
/**
* Constant that indicates the iteration has completed.
* {@link #next} returns this value when there are no more composed characters
@ -58,7 +62,7 @@ public final class ComposedCharIter {
* @deprecated ICU 2.2
*/
public static final char DONE = (char) Normalizer.DONE;
/**
* Construct a new <tt>ComposedCharIter</tt>. The iterator will return
* all Unicode characters with canonical decompositions, including Korean
@ -66,11 +70,9 @@ public final class ComposedCharIter {
* @deprecated ICU 2.2
*/
public ComposedCharIter() {
compat = false;
//options =0;
this(false, 0);
}
/**
* Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
* <p>
@ -78,18 +80,17 @@ public final class ComposedCharIter {
* <tt>true</tt> for both canonical and compatibility
* decompositions.
*
* @param options Optional decomposition features. Currently, the only
* supported option is {@link Normalizer#IGNORE_HANGUL}, which
* causes this <tt>ComposedCharIter</tt> not to iterate
* over the Hangul characters and their corresponding
* Jamo decompositions.
* @param options Optional decomposition features. None are supported, so this is ignored.
* @deprecated ICU 2.2
*/
public ComposedCharIter(boolean compat, int options) {
this.compat = compat;
//this.options = options;
if(compat) {
n2impl = Norm2AllModes.getNFKCInstanceNoIOException().impl;
} else {
n2impl = Norm2AllModes.getNFCInstanceNoIOException().impl;
}
}
/**
* Determines whether there any precomposed Unicode characters not yet returned
* by {@link #next}.
@ -129,36 +130,35 @@ public final class ComposedCharIter {
public String decomposition() {
// the decomposition buffer contains the decomposition of
// current char so just return it
return new String(decompBuf,0, bufLen);
if(decompBuf != null) {
return decompBuf;
} else {
return "";
}
}
private void findNextChar() {
int c=curChar+1;
for(;;){
if(c < 0xFFFF){
bufLen = NormalizerImpl.getDecomposition(c,compat,
decompBuf,0,
decompBuf.length);
if(bufLen>0){
decompBuf = null;
for(;;) {
if(c < 0xFFFF) {
decompBuf = n2impl.getDecomposition(c);
if(decompBuf != null) {
// the curChar can be decomposed... so it is a composed char
// cache the result
break;
}
c++;
}else{
c=Normalizer.DONE;
break;
}
}
c++;
} else {
c=Normalizer.DONE;
break;
}
}
nextChar=c;
}
//private int options;
private boolean compat;
private char[] decompBuf = new char[100];
private int bufLen=0;
private final Normalizer2Impl n2impl;
private String decompBuf;
private int curChar = 0;
private int nextChar = Normalizer.DONE;
}

View File

@ -9,6 +9,7 @@ package com.ibm.icu.text;
import java.io.InputStream;
import java.io.IOException;
import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.text.Normalizer;
/**
@ -63,7 +64,7 @@ public abstract class Normalizer2 {
* @draft ICU 4.4
* @provisional This API might change or be removed in a future release.
*/
enum Mode {
public enum Mode {
/**
* Decomposition followed by composition.
* Same as standard NFC when using an "nfc" instance.
@ -132,7 +133,14 @@ public abstract class Normalizer2 {
* @provisional This API might change or be removed in a future release.
*/
public static Normalizer2 getInstance(InputStream data, String name, Mode mode) throws IOException {
return null;
Norm2AllModes all2Modes=Norm2AllModes.getInstance(data, name);
switch(mode) {
case COMPOSE: return all2Modes.comp;
case DECOMPOSE: return all2Modes.decomp;
case FCD: return all2Modes.fcd;
case COMPOSE_CONTIGUOUS: return all2Modes.fcc;
default: return null; // will not occur
}
}
/**

View File

@ -2670,7 +2670,7 @@ public final class UTF16 {
return String.valueOf((char) ch);
}
StringBuffer result = new StringBuffer();
StringBuilder result = new StringBuilder();
result.append(getLeadSurrogate(ch));
result.append(getTrailSurrogate(ch));
return result.toString();

View File

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2009, International Business Machines Corporation and *
* Copyright (C) 1996-2010, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -8,6 +8,7 @@
package com.ibm.icu.dev.test.lang;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Locale;
@ -26,6 +27,7 @@ import com.ibm.icu.lang.UCharacterDirection;
import com.ibm.icu.lang.UCharacterEnums;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
@ -2279,9 +2281,7 @@ public final class UCharacterTest extends TestFmwk
}
/* various tests for consistency of UCD data and API behavior */
public void TestConsistency() {
char[] buffer16 = new char[300];
char[] buffer = new char[300];
public void TestConsistency() throws IOException {
UnicodeSet set1, set2, set3, set4;
USerializedSet sset;
@ -2366,6 +2366,7 @@ public final class UCharacterTest extends TestFmwk
* In general, the set for the middle such character should be a subset
* of the set for the first.
*/
Normalizer2 norm2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE);
set1=new UnicodeSet();
set2=new UnicodeSet();
sset = new USerializedSet();
@ -2374,7 +2375,8 @@ public final class UCharacterTest extends TestFmwk
/* enumerate all characters that are plausible to be latin letters */
for(start=0xa0; start<0x2000; ++start) {
if(NormalizerImpl.getDecomposition(start, false, buffer16,0,buffer16.length) > 1 && buffer[0]==0x0049) {
String decomp=norm2.normalize(UTF16.valueOf(start));
if(decomp.length() > 1 && decomp.charAt(0)==0x49) {
set2.add(start);
}
}

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2010, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -146,10 +146,10 @@ public class TestDeprecatedNormalizerAPI extends TestFmwk
ComposedCharIter iter = new ComposedCharIter(false, options);
while (iter.hasNext()) {
char ch = iter.next();
final char ch = iter.next();
String chStr = new StringBuffer().append(ch).toString();
String decomp = Normalizer.decompose(chStr, compat);
String chStr = String.valueOf(ch);
String decomp = iter.decomposition();
String comp = Normalizer.compose(decomp, compat);
if (NormalizerImpl.isFullCompositionExclusion(ch)) {
@ -158,19 +158,14 @@ public class TestDeprecatedNormalizerAPI extends TestFmwk
}
// Avoid disparaged characters
if (getDecomposition(ch,compat).length() == 4) continue;
if (decomp.length() == 4) continue;
if (!comp.equals(chStr)) {
errln("ERROR: Round trip invalid: " + hex(chStr) + " --> " + hex(decomp)
+ " --> " + hex(comp));
errln(" char decomp is '" + getDecomposition(ch,compat) + "'");
errln(" char decomp is '" + decomp + "'");
}
}
}
private String getDecomposition(char ch, boolean compat){
char[] dest = new char[10];
int length = NormalizerImpl.getDecomposition(ch,compat,dest,0,dest.length);
return new String(dest,0,length);
}
}