ICU-8606 add Normalizer2.getCombiningClass(c)
X-SVN-Rev: 30263
This commit is contained in:
parent
05cff4e761
commit
4abbf7161a
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2010, International Business Machines
|
||||
* Copyright (C) 2009-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -153,6 +153,11 @@ FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) c
|
||||
return set.contains(c) && norm2.getDecomposition(c, decomposition);
|
||||
}
|
||||
|
||||
uint8_t
|
||||
FilteredNormalizer2::getCombiningClass(UChar32 c) const {
|
||||
return set.contains(c) ? norm2.getCombiningClass(c) : 0;
|
||||
}
|
||||
|
||||
UBool
|
||||
FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
uprv_checkCanGetBuffer(s, errorCode);
|
||||
|
@ -189,6 +189,11 @@ public:
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
virtual uint8_t
|
||||
getCombiningClass(UChar32 c) const {
|
||||
return impl.getCC(impl.getNorm16(c));
|
||||
}
|
||||
|
||||
// quick checks
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
@ -632,6 +637,11 @@ Normalizer2::getInstance(const char *packageName,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uint8_t
|
||||
Normalizer2::getCombiningClass(UChar32 /*c*/) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
|
||||
|
||||
U_NAMESPACE_END
|
||||
@ -782,6 +792,11 @@ unorm2_getDecomposition(const UNormalizer2 *norm2,
|
||||
}
|
||||
}
|
||||
|
||||
U_DRAFT uint8_t U_EXPORT2
|
||||
unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
|
||||
return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
|
||||
}
|
||||
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
unorm2_isNormalized(const UNormalizer2 *norm2,
|
||||
const UChar *s, int32_t length,
|
||||
@ -847,9 +862,9 @@ unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
|
||||
U_CAPI uint8_t U_EXPORT2
|
||||
u_getCombiningClass(UChar32 c) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||
const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
return impl->getCC(impl->getNorm16(c));
|
||||
return nfd->getCombiningClass(c);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
@ -190,6 +190,17 @@ public:
|
||||
virtual UBool
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
|
||||
|
||||
/**
|
||||
* Gets the combining class of c.
|
||||
* The default implementation returns 0
|
||||
* but all standard implementations return the Unicode Canonical_Combining_Class value.
|
||||
* @param c code point
|
||||
* @return c's combining class
|
||||
* @draft ICU 49
|
||||
*/
|
||||
virtual uint8_t
|
||||
getCombiningClass(UChar32 c) const;
|
||||
|
||||
/**
|
||||
* Tests if the string is normalized.
|
||||
* Internally, in cases where the quickCheck() method would return "maybe"
|
||||
@ -394,6 +405,17 @@ public:
|
||||
virtual UBool
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const;
|
||||
|
||||
/**
|
||||
* Gets the combining class of c.
|
||||
* The default implementation returns 0
|
||||
* but all standard implementations return the Unicode Canonical_Combining_Class value.
|
||||
* @param c code point
|
||||
* @return c's combining class
|
||||
* @draft ICU 49
|
||||
*/
|
||||
virtual uint8_t
|
||||
getCombiningClass(UChar32 c) const;
|
||||
|
||||
/**
|
||||
* Tests if the string is normalized.
|
||||
* For details see the Normalizer2 base class documentation.
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009-2010, International Business Machines
|
||||
* Copyright (C) 2009-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -280,6 +280,18 @@ unorm2_getDecomposition(const UNormalizer2 *norm2,
|
||||
UChar32 c, UChar *decomposition, int32_t capacity,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Gets the combining class of c.
|
||||
* The default implementation returns 0
|
||||
* but all standard implementations return the Unicode Canonical_Combining_Class value.
|
||||
* @param norm2 UNormalizer2 instance
|
||||
* @param c code point
|
||||
* @return c's combining class
|
||||
* @draft ICU 49
|
||||
*/
|
||||
U_DRAFT uint8_t U_EXPORT2
|
||||
unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
|
||||
|
||||
/**
|
||||
* Tests if the string is normalized.
|
||||
* Internally, in cases where the quickCheck() method would return "maybe"
|
||||
|
@ -1120,7 +1120,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
|
||||
UChar32 c;
|
||||
int32_t j=i;
|
||||
U16_PREV_UNSAFE(label, j, c);
|
||||
if(u_getCombiningClass(c)==9) {
|
||||
if(uts46Norm2.getCombiningClass(c)==9) {
|
||||
continue;
|
||||
}
|
||||
// check precontext (Joining_Type:{L,D})(Joining_Type:T)*
|
||||
@ -1163,7 +1163,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
|
||||
UChar32 c;
|
||||
int32_t j=i;
|
||||
U16_PREV_UNSAFE(label, j, c);
|
||||
if(u_getCombiningClass(c)!=9) {
|
||||
if(uts46Norm2.getCombiningClass(c)!=9) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -925,6 +925,14 @@ static void TestIdentifier()
|
||||
}
|
||||
|
||||
/* for each line of UnicodeData.txt, check some of the properties */
|
||||
typedef struct UnicodeDataContext {
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
const void *dummy;
|
||||
#else
|
||||
const UNormalizer2 *nfkc;
|
||||
#endif
|
||||
} UnicodeDataContext;
|
||||
|
||||
/*
|
||||
* ### TODO
|
||||
* This test fails incorrectly if the First or Last code point of a repetitive area
|
||||
@ -950,6 +958,10 @@ unicodeDataLineFn(void *context,
|
||||
int32_t i;
|
||||
int8_t type;
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
const UNormalizer2 *nfkc;
|
||||
#endif
|
||||
|
||||
/* get the character code, field 0 */
|
||||
c=strtoul(fields[0][0], &end, 16);
|
||||
if(end<=fields[0][0] || end!=fields[0][1]) {
|
||||
@ -985,6 +997,10 @@ unicodeDataLineFn(void *context,
|
||||
if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
|
||||
log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
|
||||
}
|
||||
nfkc=((UnicodeDataContext *)context)->nfkc;
|
||||
if(value!=unorm2_getCombiningClass(nfkc, c)) {
|
||||
log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* get BiDi category, field 4 */
|
||||
@ -1191,6 +1207,8 @@ static void TestUnicodeData()
|
||||
UChar32 c;
|
||||
int8_t type;
|
||||
|
||||
UnicodeDataContext context;
|
||||
|
||||
u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
|
||||
u_getUnicodeVersion(versionArray);
|
||||
if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
|
||||
@ -1212,7 +1230,14 @@ static void TestUnicodeData()
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, NULL, &errorCode);
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
context.nfkc=unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_data_err("error: unable to open an NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return; /* if we couldn't parse UnicodeData.txt, we should return */
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
@ -1458,9 +1458,7 @@ BasicNormalizerTest::TestFilteredNormalizer2Coverage() {
|
||||
dataerrln("Normalizer2Factory::getNFCInstance() call failed - %s", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff]"), errorCode);
|
||||
UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]");
|
||||
UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]");
|
||||
UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff\\u0310-\\u031f]"), errorCode);
|
||||
FilteredNormalizer2 fn2(*nfcNorm2, filter);
|
||||
|
||||
UChar32 char32 = 0x0054;
|
||||
@ -1473,6 +1471,20 @@ BasicNormalizerTest::TestFilteredNormalizer2Coverage() {
|
||||
errln("FilteredNormalizer2.hasBoundaryAfter() failed.");
|
||||
}
|
||||
|
||||
UChar32 c;
|
||||
for(c=0; c<=0x3ff; ++c) {
|
||||
uint8_t expectedCC= filter.contains(c) ? nfcNorm2->getCombiningClass(c) : 0;
|
||||
uint8_t cc=fn2.getCombiningClass(c);
|
||||
if(cc!=expectedCC) {
|
||||
errln(
|
||||
UnicodeString("FilteredNormalizer2(NFC, ^A0-FF,310-31F).getCombiningClass(U+")+
|
||||
hex(c)+
|
||||
")==filtered NFC.getCC()");
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]");
|
||||
UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]");
|
||||
fn2.append(newString1, newString2, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errln("FilteredNormalizer2.append() failed.");
|
||||
|
Loading…
Reference in New Issue
Block a user