ICU-2757 add APIs for NF*_QC properties
X-SVN-Rev: 14923
This commit is contained in:
parent
acf1906951
commit
72ce99314f
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2003, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -14,6 +14,7 @@ import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.NormalizerImpl;
|
||||
import com.ibm.icu.impl.USerializedSet;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.*;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UCharacterCategory;
|
||||
import com.ibm.icu.text.Normalizer;
|
||||
@ -634,6 +635,89 @@ public class BasicTest extends TestFmwk {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static final int qcToInt(Normalizer.QuickCheckResult qc) {
|
||||
if(qc==Normalizer.NO) {
|
||||
return 0;
|
||||
} else if(qc==Normalizer.YES) {
|
||||
return 1;
|
||||
} else /* Normalizer.MAYBE */ {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
public void TestQuickCheckPerCP() {
|
||||
int c, lead, trail;
|
||||
String s, nfd;
|
||||
//int lccc1, lccc2, tccc1, tccc2;
|
||||
int qc1, qc2;
|
||||
|
||||
if(
|
||||
UCharacter.getIntPropertyMaxValue(UProperty.NFD_QUICK_CHECK)!=1 || // YES
|
||||
UCharacter.getIntPropertyMaxValue(UProperty.NFKD_QUICK_CHECK)!=1 ||
|
||||
UCharacter.getIntPropertyMaxValue(UProperty.NFC_QUICK_CHECK)!=2 || // MAYBE
|
||||
UCharacter.getIntPropertyMaxValue(UProperty.NFKC_QUICK_CHECK)!=2/* ||
|
||||
UCharacter.getIntPropertyMaxValue(UProperty.LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
|
||||
UCharacter.getIntPropertyMaxValue(UProperty.TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)*/
|
||||
) {
|
||||
errln("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS");
|
||||
}
|
||||
|
||||
/*
|
||||
* compare the quick check property values for some code points
|
||||
* to the quick check results for checking same-code point strings
|
||||
*/
|
||||
c=0;
|
||||
while(c<0x110000) {
|
||||
s=UTF16.valueOf(c);
|
||||
|
||||
qc1=UCharacter.getIntPropertyValue(c, UProperty.NFC_QUICK_CHECK);
|
||||
qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFC));
|
||||
if(qc1!=qc2) {
|
||||
errln("getIntPropertyValue(NFC)="+qc1+" != "+qc2+"=quickCheck(NFC) for U+"+Integer.toHexString(c));
|
||||
}
|
||||
|
||||
qc1=UCharacter.getIntPropertyValue(c, UProperty.NFD_QUICK_CHECK);
|
||||
qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFD));
|
||||
if(qc1!=qc2) {
|
||||
errln("getIntPropertyValue(NFD)="+qc1+" != "+qc2+"=quickCheck(NFD) for U+"+Integer.toHexString(c));
|
||||
}
|
||||
|
||||
qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKC_QUICK_CHECK);
|
||||
qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKC));
|
||||
if(qc1!=qc2) {
|
||||
errln("getIntPropertyValue(NFKC)="+qc1+" != "+qc2+"=quickCheck(NFKC) for U+"+Integer.toHexString(c));
|
||||
}
|
||||
|
||||
qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKD_QUICK_CHECK);
|
||||
qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKD));
|
||||
if(qc1!=qc2) {
|
||||
errln("getIntPropertyValue(NFKD)="+qc1+" != "+qc2+"=quickCheck(NFKD) for U+"+Integer.toHexString(c));
|
||||
}
|
||||
/*
|
||||
length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
|
||||
U16_GET(nfd, 0, 0, length, lead);
|
||||
U16_GET(nfd, 0, length-1, length, trail);
|
||||
|
||||
lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
|
||||
lccc2=u_getCombiningClass(lead);
|
||||
tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
|
||||
tccc2=u_getCombiningClass(trail);
|
||||
|
||||
if(lccc1!=lccc2) {
|
||||
log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
|
||||
lccc1, lccc2, c);
|
||||
}
|
||||
if(tccc1!=tccc2) {
|
||||
log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
|
||||
tccc1, tccc2, c);
|
||||
}
|
||||
*/
|
||||
/* skip some code points */
|
||||
c=(20*c)/19+1;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// Internal utilities
|
||||
//
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2003, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -2798,6 +2798,30 @@ public final class NormalizerImpl {
|
||||
set.add(HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */
|
||||
return set; // for chaining
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal API, used in UCharacter.getIntPropertyValue().
|
||||
* @internal
|
||||
* @param c code point
|
||||
* @param modeValue numeric value compatible with Mode
|
||||
* @return numeric value compatible with QuickCheck
|
||||
*/
|
||||
public static final int quickCheck(int c, int modeValue) {
|
||||
final int qcMask[/*UNORM_MODE_COUNT*/]={
|
||||
0, 0, QC_NFD, QC_NFKD, QC_NFC, QC_NFKC
|
||||
};
|
||||
|
||||
int norm32=(int)getNorm32(c)&qcMask[modeValue];
|
||||
|
||||
if(norm32==0) {
|
||||
return 1; // YES
|
||||
} else if((norm32&QC_ANY_NO)!=0) {
|
||||
return 0; // NO
|
||||
} else /* _NORM_QC_ANY_MAYBE */ {
|
||||
return 2; // MAYBE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal API, used by collation code.
|
||||
* Get access to the internal FCD trie table to be able to perform
|
||||
|
@ -4328,7 +4328,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
return ch%NormalizerImpl.JAMO_T_COUNT==0 ? HangulSyllableType.LV_SYLLABLE : HangulSyllableType.LVT_SYLLABLE;
|
||||
}
|
||||
return 0; /* NA */
|
||||
|
||||
|
||||
case UProperty.NFD_QUICK_CHECK:
|
||||
case UProperty.NFKD_QUICK_CHECK:
|
||||
case UProperty.NFC_QUICK_CHECK:
|
||||
case UProperty.NFKC_QUICK_CHECK:
|
||||
return NormalizerImpl.quickCheck(ch, (type-UProperty.NFD_QUICK_CHECK)+2); // 2=UNORM_NFD
|
||||
default:
|
||||
|
||||
return 0; /* undefined */
|
||||
@ -4436,6 +4441,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
return (max!= 0)? max : UScript.CODE_LIMIT - 1;
|
||||
case UProperty.HANGUL_SYLLABLE_TYPE:
|
||||
return HangulSyllableType.COUNT-1;
|
||||
case UProperty.NFD_QUICK_CHECK:
|
||||
case UProperty.NFKD_QUICK_CHECK:
|
||||
return 1; // YES -- these are never "maybe", only "no" or "yes"
|
||||
case UProperty.NFC_QUICK_CHECK:
|
||||
case UProperty.NFKC_QUICK_CHECK:
|
||||
return 2; // MAYBE
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -378,12 +378,36 @@ public interface UProperty
|
||||
* @draft ICU 2.6
|
||||
* */
|
||||
public static final int HANGUL_SYLLABLE_TYPE = 0x100B;
|
||||
/**
|
||||
* Enumerated property NFD_Quick_Check.
|
||||
* Returns numeric values compatible with Normalizer.QuickCheckResult.
|
||||
* @draft ICU 3.0
|
||||
*/
|
||||
public static final int NFD_QUICK_CHECK = 0x100C;
|
||||
/**
|
||||
* Enumerated property NFKD_Quick_Check.
|
||||
* Returns numeric values compatible with Normalizer.QuickCheckResult.
|
||||
* @draft ICU 3.0
|
||||
*/
|
||||
public static final int NFKD_QUICK_CHECK = 0x100D;
|
||||
/**
|
||||
* Enumerated property NFC_Quick_Check.
|
||||
* Returns numeric values compatible with Normalizer.QuickCheckResult.
|
||||
* @draft ICU 3.0
|
||||
*/
|
||||
public static final int NFC_QUICK_CHECK = 0x100E;
|
||||
/**
|
||||
* Enumerated property NFKC_Quick_Check.
|
||||
* Returns numeric values compatible with Normalizer.QuickCheckResult.
|
||||
* @draft ICU 3.0
|
||||
*/
|
||||
public static final int NFKC_QUICK_CHECK = 0x100F;
|
||||
/**
|
||||
* One more than the last constant for enumerated/integer Unicode
|
||||
* properties.
|
||||
* @draft ICU 2.4
|
||||
*/
|
||||
public static final int INT_LIMIT = 0x100C;
|
||||
public static final int INT_LIMIT = 0x1010;
|
||||
|
||||
/**
|
||||
* Bitmask property General_Category_Mask.
|
||||
|
Loading…
Reference in New Issue
Block a user