ICU-2021 fix unorm_compare(case-insensitive) - see Jitterbug details
X-SVN-Rev: 11107
This commit is contained in:
parent
794e3d9fdc
commit
5625465b67
@ -4244,7 +4244,7 @@ unorm_compare(const UChar *s1, int32_t length1,
|
|||||||
const UChar *s2, int32_t length2,
|
const UChar *s2, int32_t length2,
|
||||||
uint32_t options,
|
uint32_t options,
|
||||||
UErrorCode *pErrorCode) {
|
UErrorCode *pErrorCode) {
|
||||||
UChar fold1[300], fold2[300], fcd1[300], fcd2[300];
|
UChar fcd1[300], fcd2[300];
|
||||||
UChar *f1, *f2, *d1, *d2;
|
UChar *f1, *f2, *d1, *d2;
|
||||||
const UnicodeSet *dx;
|
const UnicodeSet *dx;
|
||||||
int32_t result;
|
int32_t result;
|
||||||
@ -4274,6 +4274,22 @@ unorm_compare(const UChar *s1, int32_t length1,
|
|||||||
options|=_COMPARE_EQUIV;
|
options|=_COMPARE_EQUIV;
|
||||||
result=0;
|
result=0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UAX #21 Case Mappings, as fixed for Unicode version 4
|
||||||
|
* (see Jitterbug 2021), defines a canonical caseless match as
|
||||||
|
*
|
||||||
|
* A string X is a canonical caseless match
|
||||||
|
* for a string Y if and only if
|
||||||
|
* NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
|
||||||
|
*
|
||||||
|
* For better performance, we check for FCD (or let the caller tell us that
|
||||||
|
* both strings are in FCD) for the inner normalization.
|
||||||
|
* BasicNormalizerTest::FindFoldFCDExceptions() makes sure that
|
||||||
|
* case-folding preserves the FCD-ness of a string.
|
||||||
|
* The outer normalization is then only performed by unorm_cmpEquivFold()
|
||||||
|
* when there is a difference.
|
||||||
|
*/
|
||||||
|
|
||||||
if(!(options&UNORM_INPUT_IS_FCD)) {
|
if(!(options&UNORM_INPUT_IS_FCD)) {
|
||||||
int32_t _len1, _len2;
|
int32_t _len1, _len2;
|
||||||
UBool isFCD1, isFCD2;
|
UBool isFCD1, isFCD2;
|
||||||
@ -4282,66 +4298,6 @@ unorm_compare(const UChar *s1, int32_t length1,
|
|||||||
isFCD1=unorm_checkFCD(s1, length1, dx);
|
isFCD1=unorm_checkFCD(s1, length1, dx);
|
||||||
isFCD2=unorm_checkFCD(s2, length2, dx);
|
isFCD2=unorm_checkFCD(s2, length2, dx);
|
||||||
|
|
||||||
if((options&U_COMPARE_IGNORE_CASE)!=0 && !(isFCD1 && isFCD2)) {
|
|
||||||
// case-fold first to keep the order of operations as in UAX 21 2.5
|
|
||||||
_len1=u_strFoldCase(fold1, sizeof(fold1)/U_SIZEOF_UCHAR,
|
|
||||||
s1, length1,
|
|
||||||
options,
|
|
||||||
pErrorCode);
|
|
||||||
if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
s1=fold1;
|
|
||||||
} else {
|
|
||||||
f1=(UChar *)uprv_malloc(_len1*U_SIZEOF_UCHAR);
|
|
||||||
if(f1==0) {
|
|
||||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
|
||||||
_len1=u_strFoldCase(f1, _len1,
|
|
||||||
s1, length1,
|
|
||||||
options,
|
|
||||||
pErrorCode);
|
|
||||||
if(U_FAILURE(*pErrorCode)) {
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
s1=f1;
|
|
||||||
}
|
|
||||||
length1=_len1;
|
|
||||||
|
|
||||||
_len2=u_strFoldCase(fold2, sizeof(fold2)/U_SIZEOF_UCHAR,
|
|
||||||
s2, length2,
|
|
||||||
options,
|
|
||||||
pErrorCode);
|
|
||||||
if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
s2=fold2;
|
|
||||||
} else {
|
|
||||||
f2=(UChar *)uprv_malloc(_len2*U_SIZEOF_UCHAR);
|
|
||||||
if(f2==0) {
|
|
||||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
|
||||||
_len2=u_strFoldCase(f2, _len2,
|
|
||||||
s2, length2,
|
|
||||||
options,
|
|
||||||
pErrorCode);
|
|
||||||
if(U_FAILURE(*pErrorCode)) {
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
s2=f2;
|
|
||||||
}
|
|
||||||
length2=_len2;
|
|
||||||
|
|
||||||
// turn off U_COMPARE_IGNORE_CASE and re-check FCD
|
|
||||||
options&=~U_COMPARE_IGNORE_CASE;
|
|
||||||
isFCD1=unorm_checkFCD(s1, length1, dx);
|
|
||||||
isFCD2=unorm_checkFCD(s2, length2, dx);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(!isFCD1 && !isFCD2) {
|
if(!isFCD1 && !isFCD2) {
|
||||||
// if both strings need normalization then make them NFD right away and
|
// if both strings need normalization then make them NFD right away and
|
||||||
// turn off normalization in the comparison function
|
// turn off normalization in the comparison function
|
||||||
|
Loading…
Reference in New Issue
Block a user