ICU-2758 add APIs for FCD properties lccc & tccc

X-SVN-Rev: 14894
This commit is contained in:
Markus Scherer 2004-04-07 02:57:06 +00:00
parent ef9f2f2fbc
commit f65ee799e6
8 changed files with 1347 additions and 1221 deletions

View File

@ -351,6 +351,20 @@ typedef enum UProperty {
/** Enumerated property NFKC_Quick_Check.
Returns UNormalizationCheckResult values. @draft ICU 3.0 */
UCHAR_NFKC_QUICK_CHECK,
/** Enumerated property Lead_Canonical_Combining_Class.
ICU-specific property for the ccc of the first code point
of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
Useful for checking for canonically ordered text;
see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @draft ICU 3.0 */
UCHAR_LEAD_CANONICAL_COMBINING_CLASS,
/** Enumerated property Trail_Canonical_Combining_Class.
ICU-specific property for the ccc of the last code point
of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
Useful for checking for canonically ordered text;
see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @draft ICU 3.0 */
UCHAR_TRAIL_CANONICAL_COMBINING_CLASS,
/** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
UCHAR_INT_LIMIT,

View File

@ -1189,6 +1189,20 @@ unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
}
}
U_CAPI uint16_t U_EXPORT2
unorm_getFCD16FromCodePoint(UChar32 c) {
UErrorCode errorCode;
uint16_t fcd;
errorCode=U_ZERO_ERROR;
if(!_haveData(errorCode)) {
return 0;
}
UTRIE_GET16(&fcdTrie, c, fcd);
return fcd;
}
/* reorder UTF-16 in-place -------------------------------------------------- */
/*

View File

@ -265,6 +265,14 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
#if !UCONFIG_NO_NORMALIZATION
/**
* Internal API to get the 16-bit FCD value (lccc + tccc) for c,
* for u_getIntPropertyValue().
* @internal
*/
U_CAPI uint16_t U_EXPORT2
unorm_getFCD16FromCodePoint(UChar32 c);
/**
* Internal API, used by collation code.
* Get access to the internal FCD trie table to be able to perform

View File

@ -354,6 +354,10 @@ u_getIntPropertyValue(UChar32 c, UProperty which) {
case UCHAR_NFC_QUICK_CHECK:
case UCHAR_NFKC_QUICK_CHECK:
return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK)+UNORM_NFD);
case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
return unorm_getFCD16FromCodePoint(c)>>8;
case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
return unorm_getFCD16FromCodePoint(c)&0xff;
default:
return 0; /* undefined */
}
@ -387,6 +391,8 @@ u_getIntPropertyMaxValue(UProperty which) {
max=(uprv_getMaxValues(0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT;
return max!=0 ? max : (int32_t)UBLOCK_COUNT-1;
case UCHAR_CANONICAL_COMBINING_CLASS:
case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
return 0xff; /* TODO do we need to be more precise, getting the actual maximum? */
case UCHAR_DECOMPOSITION_TYPE:
max=uprv_getMaxValues(2)&UPROPS_DT_MASK;

View File

@ -1388,18 +1388,20 @@ TestFCNFKCClosure(void) {
static void
TestQuickCheckPerCP() {
UErrorCode errorCode;
UChar32 c;
UChar s[U16_MAX_LENGTH];
int32_t length;
UChar32 c, lead, trail;
UChar s[U16_MAX_LENGTH], nfd[16];
int32_t length, lccc1, lccc2, tccc1, tccc2;
UNormalizationCheckResult qc1, qc2;
if(
u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE
u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
) {
log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK)\n");
log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
}
/*
@ -1436,6 +1438,24 @@ TestQuickCheckPerCP() {
log_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x\n", qc1, qc2, c);
}
length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
U16_GET(nfd, 0, 0, length, lead);
U16_GET(nfd, 0, length-1, length, trail);
lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
lccc2=u_getCombiningClass(lead);
tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
tccc2=u_getCombiningClass(trail);
if(lccc1!=lccc2) {
log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
lccc1, lccc2, c);
}
if(tccc1!=tccc2) {
log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
tccc1, tccc2, c);
}
/* skip some code points */
c=(20*c)/19+1;
}

View File

@ -1,5 +1,5 @@
######################################################################
# Copyright (c) 2003, International Business Machines
# Copyright (c) 2004, International Business Machines
# Corporation and others. All Rights Reserved.
######################################################################
# Author: Alan Liu
@ -21,6 +21,11 @@
# Enumerated Non-Binary Properties
# ================================================
# lccc(c)=ccc(NFD(c)[0])
# tccc(c)=ccc(NFD(c)[last])
lccc; Lead_Canonical_Combining_Class
tccc; Trail_Canonical_Combining_Class
# ================================================
# Bitmask Properties
# ================================================

File diff suppressed because it is too large Load Diff

View File

@ -688,11 +688,14 @@ sub merge_PropertyValueAliases {
}
# Merge the combining class values in manually
# Add the same values to the synthetic lccc and tccc properties
die "Error: No ccc data"
unless exists $va->{'ccc'};
for my $ccc (keys %{$va->{'ccc'}}) {
die "Error: Can't overwrite ccc $ccc"
if (exists $h->{'ccc'}->{$ccc});
$h->{'lccc'}->{$ccc} =
$h->{'tccc'}->{$ccc} =
$h->{'ccc'}->{$ccc} = $va->{'ccc'}->{$ccc};
}
delete $va->{'ccc'};