ICU-2758 add APIs for FCD properties lccc & tccc
X-SVN-Rev: 14894
This commit is contained in:
parent
ef9f2f2fbc
commit
f65ee799e6
@ -351,6 +351,20 @@ typedef enum UProperty {
|
||||
/** Enumerated property NFKC_Quick_Check.
|
||||
Returns UNormalizationCheckResult values. @draft ICU 3.0 */
|
||||
UCHAR_NFKC_QUICK_CHECK,
|
||||
/** Enumerated property Lead_Canonical_Combining_Class.
|
||||
ICU-specific property for the ccc of the first code point
|
||||
of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
|
||||
Useful for checking for canonically ordered text;
|
||||
see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
|
||||
Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @draft ICU 3.0 */
|
||||
UCHAR_LEAD_CANONICAL_COMBINING_CLASS,
|
||||
/** Enumerated property Trail_Canonical_Combining_Class.
|
||||
ICU-specific property for the ccc of the last code point
|
||||
of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
|
||||
Useful for checking for canonically ordered text;
|
||||
see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
|
||||
Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @draft ICU 3.0 */
|
||||
UCHAR_TRAIL_CANONICAL_COMBINING_CLASS,
|
||||
/** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
|
||||
UCHAR_INT_LIMIT,
|
||||
|
||||
|
@ -1189,6 +1189,20 @@ unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI uint16_t U_EXPORT2
|
||||
unorm_getFCD16FromCodePoint(UChar32 c) {
|
||||
UErrorCode errorCode;
|
||||
uint16_t fcd;
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(!_haveData(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
UTRIE_GET16(&fcdTrie, c, fcd);
|
||||
return fcd;
|
||||
}
|
||||
|
||||
/* reorder UTF-16 in-place -------------------------------------------------- */
|
||||
|
||||
/*
|
||||
|
@ -265,6 +265,14 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1,
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
/**
|
||||
* Internal API to get the 16-bit FCD value (lccc + tccc) for c,
|
||||
* for u_getIntPropertyValue().
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI uint16_t U_EXPORT2
|
||||
unorm_getFCD16FromCodePoint(UChar32 c);
|
||||
|
||||
/**
|
||||
* Internal API, used by collation code.
|
||||
* Get access to the internal FCD trie table to be able to perform
|
||||
|
@ -354,6 +354,10 @@ u_getIntPropertyValue(UChar32 c, UProperty which) {
|
||||
case UCHAR_NFC_QUICK_CHECK:
|
||||
case UCHAR_NFKC_QUICK_CHECK:
|
||||
return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK)+UNORM_NFD);
|
||||
case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
|
||||
return unorm_getFCD16FromCodePoint(c)>>8;
|
||||
case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
|
||||
return unorm_getFCD16FromCodePoint(c)&0xff;
|
||||
default:
|
||||
return 0; /* undefined */
|
||||
}
|
||||
@ -387,6 +391,8 @@ u_getIntPropertyMaxValue(UProperty which) {
|
||||
max=(uprv_getMaxValues(0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT;
|
||||
return max!=0 ? max : (int32_t)UBLOCK_COUNT-1;
|
||||
case UCHAR_CANONICAL_COMBINING_CLASS:
|
||||
case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
|
||||
case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
|
||||
return 0xff; /* TODO do we need to be more precise, getting the actual maximum? */
|
||||
case UCHAR_DECOMPOSITION_TYPE:
|
||||
max=uprv_getMaxValues(2)&UPROPS_DT_MASK;
|
||||
|
@ -1388,18 +1388,20 @@ TestFCNFKCClosure(void) {
|
||||
static void
|
||||
TestQuickCheckPerCP() {
|
||||
UErrorCode errorCode;
|
||||
UChar32 c;
|
||||
UChar s[U16_MAX_LENGTH];
|
||||
int32_t length;
|
||||
UChar32 c, lead, trail;
|
||||
UChar s[U16_MAX_LENGTH], nfd[16];
|
||||
int32_t length, lccc1, lccc2, tccc1, tccc2;
|
||||
UNormalizationCheckResult qc1, qc2;
|
||||
|
||||
if(
|
||||
u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
|
||||
u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
|
||||
u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
|
||||
u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE
|
||||
u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
|
||||
u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
|
||||
u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
|
||||
) {
|
||||
log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK)\n");
|
||||
log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1436,6 +1438,24 @@ TestQuickCheckPerCP() {
|
||||
log_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x\n", qc1, qc2, c);
|
||||
}
|
||||
|
||||
length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
|
||||
U16_GET(nfd, 0, 0, length, lead);
|
||||
U16_GET(nfd, 0, length-1, length, trail);
|
||||
|
||||
lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
|
||||
lccc2=u_getCombiningClass(lead);
|
||||
tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
|
||||
tccc2=u_getCombiningClass(trail);
|
||||
|
||||
if(lccc1!=lccc2) {
|
||||
log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
|
||||
lccc1, lccc2, c);
|
||||
}
|
||||
if(tccc1!=tccc2) {
|
||||
log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
|
||||
tccc1, tccc2, c);
|
||||
}
|
||||
|
||||
/* skip some code points */
|
||||
c=(20*c)/19+1;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
######################################################################
|
||||
# Copyright (c) 2003, International Business Machines
|
||||
# Copyright (c) 2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
######################################################################
|
||||
# Author: Alan Liu
|
||||
@ -21,6 +21,11 @@
|
||||
# Enumerated Non-Binary Properties
|
||||
# ================================================
|
||||
|
||||
# lccc(c)=ccc(NFD(c)[0])
|
||||
# tccc(c)=ccc(NFD(c)[last])
|
||||
lccc; Lead_Canonical_Combining_Class
|
||||
tccc; Trail_Canonical_Combining_Class
|
||||
|
||||
# ================================================
|
||||
# Bitmask Properties
|
||||
# ================================================
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -688,11 +688,14 @@ sub merge_PropertyValueAliases {
|
||||
}
|
||||
|
||||
# Merge the combining class values in manually
|
||||
# Add the same values to the synthetic lccc and tccc properties
|
||||
die "Error: No ccc data"
|
||||
unless exists $va->{'ccc'};
|
||||
for my $ccc (keys %{$va->{'ccc'}}) {
|
||||
die "Error: Can't overwrite ccc $ccc"
|
||||
if (exists $h->{'ccc'}->{$ccc});
|
||||
$h->{'lccc'}->{$ccc} =
|
||||
$h->{'tccc'}->{$ccc} =
|
||||
$h->{'ccc'}->{$ccc} = $va->{'ccc'}->{$ccc};
|
||||
}
|
||||
delete $va->{'ccc'};
|
||||
|
Loading…
Reference in New Issue
Block a user