ICU-2757 add APIs for NF*_QC properties

X-SVN-Rev: 14923
This commit is contained in:
Markus Scherer 2004-04-09 21:37:21 +00:00
parent acf1906951
commit 72ce99314f
4 changed files with 147 additions and 4 deletions

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1996-2003, International Business Machines Corporation and *
* Copyright (C) 1996-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -14,6 +14,7 @@ import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.NormalizerImpl;
import com.ibm.icu.impl.USerializedSet;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.*;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UCharacterCategory;
import com.ibm.icu.text.Normalizer;
@ -634,6 +635,89 @@ public class BasicTest extends TestFmwk {
}
}
}
static final int qcToInt(Normalizer.QuickCheckResult qc) {
if(qc==Normalizer.NO) {
return 0;
} else if(qc==Normalizer.YES) {
return 1;
} else /* Normalizer.MAYBE */ {
return 2;
}
}
public void TestQuickCheckPerCP() {
int c, lead, trail;
String s, nfd;
//int lccc1, lccc2, tccc1, tccc2;
int qc1, qc2;
if(
UCharacter.getIntPropertyMaxValue(UProperty.NFD_QUICK_CHECK)!=1 || // YES
UCharacter.getIntPropertyMaxValue(UProperty.NFKD_QUICK_CHECK)!=1 ||
UCharacter.getIntPropertyMaxValue(UProperty.NFC_QUICK_CHECK)!=2 || // MAYBE
UCharacter.getIntPropertyMaxValue(UProperty.NFKC_QUICK_CHECK)!=2/* ||
UCharacter.getIntPropertyMaxValue(UProperty.LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
UCharacter.getIntPropertyMaxValue(UProperty.TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)*/
) {
errln("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS");
}
/*
* compare the quick check property values for some code points
* to the quick check results for checking same-code point strings
*/
c=0;
while(c<0x110000) {
s=UTF16.valueOf(c);
qc1=UCharacter.getIntPropertyValue(c, UProperty.NFC_QUICK_CHECK);
qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFC));
if(qc1!=qc2) {
errln("getIntPropertyValue(NFC)="+qc1+" != "+qc2+"=quickCheck(NFC) for U+"+Integer.toHexString(c));
}
qc1=UCharacter.getIntPropertyValue(c, UProperty.NFD_QUICK_CHECK);
qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFD));
if(qc1!=qc2) {
errln("getIntPropertyValue(NFD)="+qc1+" != "+qc2+"=quickCheck(NFD) for U+"+Integer.toHexString(c));
}
qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKC_QUICK_CHECK);
qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKC));
if(qc1!=qc2) {
errln("getIntPropertyValue(NFKC)="+qc1+" != "+qc2+"=quickCheck(NFKC) for U+"+Integer.toHexString(c));
}
qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKD_QUICK_CHECK);
qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKD));
if(qc1!=qc2) {
errln("getIntPropertyValue(NFKD)="+qc1+" != "+qc2+"=quickCheck(NFKD) for U+"+Integer.toHexString(c));
}
/*
length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
U16_GET(nfd, 0, 0, length, lead);
U16_GET(nfd, 0, length-1, length, trail);
lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
lccc2=u_getCombiningClass(lead);
tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
tccc2=u_getCombiningClass(trail);
if(lccc1!=lccc2) {
log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
lccc1, lccc2, c);
}
if(tccc1!=tccc2) {
log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
tccc1, tccc2, c);
}
*/
/* skip some code points */
c=(20*c)/19+1;
}
}
//------------------------------------------------------------------------
// Internal utilities
//

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1996-2003, International Business Machines Corporation and *
* Copyright (C) 1996-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -2798,6 +2798,30 @@ public final class NormalizerImpl {
set.add(HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */
return set; // for chaining
}
/**
* Internal API, used in UCharacter.getIntPropertyValue().
* @internal
* @param c code point
* @param modeValue numeric value compatible with Mode
* @return numeric value compatible with QuickCheck
*/
public static final int quickCheck(int c, int modeValue) {
final int qcMask[/*UNORM_MODE_COUNT*/]={
0, 0, QC_NFD, QC_NFKD, QC_NFC, QC_NFKC
};
int norm32=(int)getNorm32(c)&qcMask[modeValue];
if(norm32==0) {
return 1; // YES
} else if((norm32&QC_ANY_NO)!=0) {
return 0; // NO
} else /* _NORM_QC_ANY_MAYBE */ {
return 2; // MAYBE;
}
}
/**
* Internal API, used by collation code.
* Get access to the internal FCD trie table to be able to perform

View File

@ -4328,7 +4328,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
return ch%NormalizerImpl.JAMO_T_COUNT==0 ? HangulSyllableType.LV_SYLLABLE : HangulSyllableType.LVT_SYLLABLE;
}
return 0; /* NA */
case UProperty.NFD_QUICK_CHECK:
case UProperty.NFKD_QUICK_CHECK:
case UProperty.NFC_QUICK_CHECK:
case UProperty.NFKC_QUICK_CHECK:
return NormalizerImpl.quickCheck(ch, (type-UProperty.NFD_QUICK_CHECK)+2); // 2=UNORM_NFD
default:
return 0; /* undefined */
@ -4436,6 +4441,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
return (max!= 0)? max : UScript.CODE_LIMIT - 1;
case UProperty.HANGUL_SYLLABLE_TYPE:
return HangulSyllableType.COUNT-1;
case UProperty.NFD_QUICK_CHECK:
case UProperty.NFKD_QUICK_CHECK:
return 1; // YES -- these are never "maybe", only "no" or "yes"
case UProperty.NFC_QUICK_CHECK:
case UProperty.NFKC_QUICK_CHECK:
return 2; // MAYBE
}
}

View File

@ -378,12 +378,36 @@ public interface UProperty
* @draft ICU 2.6
* */
public static final int HANGUL_SYLLABLE_TYPE = 0x100B;
/**
* Enumerated property NFD_Quick_Check.
* Returns numeric values compatible with Normalizer.QuickCheckResult.
* @draft ICU 3.0
*/
public static final int NFD_QUICK_CHECK = 0x100C;
/**
* Enumerated property NFKD_Quick_Check.
* Returns numeric values compatible with Normalizer.QuickCheckResult.
* @draft ICU 3.0
*/
public static final int NFKD_QUICK_CHECK = 0x100D;
/**
* Enumerated property NFC_Quick_Check.
* Returns numeric values compatible with Normalizer.QuickCheckResult.
* @draft ICU 3.0
*/
public static final int NFC_QUICK_CHECK = 0x100E;
/**
* Enumerated property NFKC_Quick_Check.
* Returns numeric values compatible with Normalizer.QuickCheckResult.
* @draft ICU 3.0
*/
public static final int NFKC_QUICK_CHECK = 0x100F;
/**
* One more than the last constant for enumerated/integer Unicode
* properties.
* @draft ICU 2.4
*/
public static final int INT_LIMIT = 0x100C;
public static final int INT_LIMIT = 0x1010;
/**
* Bitmask property General_Category_Mask.