ICU-2021 replace Turkic-casefolding prototype with explanation
X-SVN-Rev: 11278
This commit is contained in:
parent
a32165212d
commit
25d1629bb7
@ -1924,53 +1924,20 @@ u_foldCase(UChar32 c, uint32_t options) {
|
||||
return c; /* no mapping - return c itself */
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* ### TODO Turkic-i case folding prototype, j2021 */
|
||||
enum {
|
||||
FOLD_T_LENGTH=3
|
||||
};
|
||||
|
||||
/*
|
||||
* Turkic full case foldings.
|
||||
* First UChar is the source, second the default mapping,
|
||||
* then the Turkic mapping.
|
||||
* Issue for canonical caseless match (UAX #21):
|
||||
* Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
|
||||
* canonical equivalence, unlike default-option casefolding.
|
||||
* For example, I-grave and I + grave fold to strings that are not canonically
|
||||
* equivalent.
|
||||
* For more details, see the comment in unorm_compare() in unorm.cpp
|
||||
* and the intermediate prototype changes for Jitterbug 2021.
|
||||
* (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
|
||||
*
|
||||
* This did not get fixed because it appears that it is not possible to fix
|
||||
* it for uppercase and lowercase characters (I-grave vs. i-grave)
|
||||
* together in a way that they still fold to common result strings.
|
||||
*/
|
||||
static const UChar fold_T[][2+FOLD_T_LENGTH]={
|
||||
{ 0x0049, 0x0069, 0x131 },
|
||||
{ 0x0069, 0x0069, 0x131, 0x307 },
|
||||
{ 0x00cc, 0x00ec, 0x131, 0x300 },
|
||||
{ 0x00cd, 0x00ed, 0x131, 0x301 },
|
||||
{ 0x00ce, 0x00ee, 0x131, 0x302 },
|
||||
{ 0x00cf, 0x00ef, 0x131, 0x308 },
|
||||
{ 0x00ec, 0x00ec, 0x131, 0x300 },
|
||||
{ 0x00ed, 0x00ed, 0x131, 0x301 },
|
||||
{ 0x00ee, 0x00ee, 0x131, 0x302 },
|
||||
{ 0x00ef, 0x00ef, 0x131, 0x308 },
|
||||
{ 0x0128, 0x0129, 0x131, 0x303 },
|
||||
{ 0x0129, 0x0129, 0x131, 0x303 },
|
||||
{ 0x012a, 0x012b, 0x131, 0x304 },
|
||||
{ 0x012b, 0x012b, 0x131, 0x304 },
|
||||
{ 0x012c, 0x012d, 0x131, 0x306 },
|
||||
{ 0x012d, 0x012d, 0x131, 0x306 },
|
||||
{ 0x012e, 0x012f, 0x131, 0x328 },
|
||||
{ 0x012f, 0x012f, 0x131, 0x328 },
|
||||
{ 0x0130, 0, 0x131, 0x307 }, /* normal mapping is 0069 0307 */
|
||||
{ 0x01cf, 0x01d0, 0x131, 0x30c },
|
||||
{ 0x01d0, 0x01d0, 0x131, 0x30c },
|
||||
{ 0x0208, 0x0209, 0x131, 0x30f },
|
||||
{ 0x0209, 0x0209, 0x131, 0x30f },
|
||||
{ 0x020a, 0x020b, 0x131, 0x311 },
|
||||
{ 0x020b, 0x020b, 0x131, 0x311 },
|
||||
{ 0x1e2c, 0x1e2d, 0x131, 0x330 },
|
||||
{ 0x1e2d, 0x1e2d, 0x131, 0x330 },
|
||||
{ 0x1e2e, 0x1e2f, 0x131, 0x308, 0x301 },
|
||||
{ 0x1e2f, 0x1e2f, 0x131, 0x308, 0x301 },
|
||||
{ 0x1ec8, 0x1ec9, 0x131, 0x309 },
|
||||
{ 0x1ec9, 0x1ec9, 0x131, 0x309 },
|
||||
{ 0x1eca, 0x1ecb, 0x131, 0x323 },
|
||||
{ 0x1ecb, 0x1ecb, 0x131, 0x323 }
|
||||
};
|
||||
#endif
|
||||
|
||||
/* internal, see ustr_imp.h */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
@ -2026,37 +1993,9 @@ u_internalFoldCase(UChar32 c,
|
||||
dest[1]=0x307;
|
||||
}
|
||||
return 2;
|
||||
#if 0
|
||||
/* ### TODO Turkic-i case folding prototype, j2021 */
|
||||
} else if(c<=fold_T[LENGTHOF(fold_T)-1][0]) {
|
||||
for(i=0; i<LENGTHOF(fold_T) && c>=fold_T[i][0]; ++i) {
|
||||
if(c==fold_T[i][0]) {
|
||||
result=fold_T[i][1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
/* Turkic mappings */
|
||||
#if 0
|
||||
/* ### TODO Turkic-i case folding prototype, j2021 */
|
||||
if(c<=fold_T[LENGTHOF(fold_T)-1][0]) {
|
||||
for(i=0; i<LENGTHOF(fold_T) && c>=fold_T[i][0]; ++i) {
|
||||
if(c==fold_T[i][0]) {
|
||||
const UChar *p=&(fold_T[i][2]);
|
||||
length=0;
|
||||
while(length<FOLD_T_LENGTH && *p!=0) {
|
||||
if(length<destCapacity) {
|
||||
dest[length]=*p++;
|
||||
}
|
||||
++length;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
if(c==0x49) {
|
||||
/* 0049; T; 0131; # LATIN CAPITAL LETTER I */
|
||||
result=0x131;
|
||||
@ -2064,7 +2003,6 @@ u_internalFoldCase(UChar32 c,
|
||||
/* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
|
||||
result=0x69;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/* return c itself because there is no special mapping for it */
|
||||
/* goto single; */
|
||||
|
Loading…
Reference in New Issue
Block a user