ICU-12410 add C++ CaseMap, make UCaseMap an alias for that, move new functions with Edits into CaseMap, make case properties functions ignore UCaseProps pointer
X-SVN-Rev: 39554
This commit is contained in:
parent
3b23b78b95
commit
fbd1e089fd
@ -64,13 +64,13 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, ui
|
||||
}
|
||||
|
||||
U_CFUNC void U_EXPORT2
|
||||
ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
ucase_addPropertyStarts(const UCaseProps * /* unused csp */, const USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* add the start code point of each same-value range of the trie */
|
||||
utrie2_enum(&csp->trie, NULL, _enumPropertyStartsRange, sa);
|
||||
utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
|
||||
|
||||
/* add code points with hardcoded properties, plus the ones following them */
|
||||
|
||||
@ -133,14 +133,14 @@ static const uint8_t flagsOffset[256]={
|
||||
/* simple case mappings ----------------------------------------------------- */
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucase_tolower(const UCaseProps *csp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
ucase_tolower(const UCaseProps * /* unused csp */, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
|
||||
c+=UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
const uint16_t *pe=GET_EXCEPTIONS(csp, props);
|
||||
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
|
||||
uint16_t excWord=*pe++;
|
||||
if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
|
||||
GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
|
||||
@ -150,14 +150,14 @@ ucase_tolower(const UCaseProps *csp, UChar32 c) {
|
||||
}
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucase_toupper(const UCaseProps *csp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
ucase_toupper(const UCaseProps * /* unused csp */, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
|
||||
c+=UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
const uint16_t *pe=GET_EXCEPTIONS(csp, props);
|
||||
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
|
||||
uint16_t excWord=*pe++;
|
||||
if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
|
||||
GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
|
||||
@ -167,14 +167,14 @@ ucase_toupper(const UCaseProps *csp, UChar32 c) {
|
||||
}
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucase_totitle(const UCaseProps *csp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
ucase_totitle(const UCaseProps * /* unused csp */, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
|
||||
c+=UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
const uint16_t *pe=GET_EXCEPTIONS(csp, props);
|
||||
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
|
||||
uint16_t excWord=*pe++;
|
||||
int32_t idx;
|
||||
if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
|
||||
@ -198,7 +198,7 @@ static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
|
||||
|
||||
|
||||
U_CFUNC void U_EXPORT2
|
||||
ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) {
|
||||
ucase_addCaseClosure(const UCaseProps * /* unused csp */, UChar32 c, const USetAdder *sa) {
|
||||
uint16_t props;
|
||||
|
||||
/*
|
||||
@ -229,7 +229,7 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) {
|
||||
break;
|
||||
}
|
||||
|
||||
props=UTRIE2_GET16(&csp->trie, c);
|
||||
props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
|
||||
/* add the one simple case mapping, no matter what type it is */
|
||||
@ -243,7 +243,7 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) {
|
||||
* c has exceptions, so there may be multiple simple and/or
|
||||
* full case mappings. Add them all.
|
||||
*/
|
||||
const uint16_t *pe0, *pe=GET_EXCEPTIONS(csp, props);
|
||||
const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
|
||||
const UChar *closure;
|
||||
uint16_t excWord=*pe++;
|
||||
int32_t idx, closureLength, fullLength, length;
|
||||
@ -338,10 +338,10 @@ strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
|
||||
}
|
||||
|
||||
U_CFUNC UBool U_EXPORT2
|
||||
ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) {
|
||||
ucase_addStringCaseClosure(const UCaseProps * /* unused csp */, const UChar *s, int32_t length, const USetAdder *sa) {
|
||||
int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
|
||||
|
||||
if(csp->unfold==NULL || s==NULL) {
|
||||
if(ucase_props_singleton.unfold==NULL || s==NULL) {
|
||||
return FALSE; /* no reverse case folding data, or no string */
|
||||
}
|
||||
if(length<=1) {
|
||||
@ -355,7 +355,7 @@ ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
const uint16_t *unfold=csp->unfold;
|
||||
const uint16_t *unfold=ucase_props_singleton.unfold;
|
||||
unfoldRows=unfold[UCASE_UNFOLD_ROWS];
|
||||
unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
|
||||
unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
|
||||
@ -381,7 +381,7 @@ ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length
|
||||
for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
|
||||
U16_NEXT_UNSAFE(p, i, c);
|
||||
sa->add(sa->set, c);
|
||||
ucase_addCaseClosure(csp, c, sa);
|
||||
ucase_addCaseClosure(&ucase_props_singleton, c, sa);
|
||||
}
|
||||
return TRUE;
|
||||
} else if(result<0) {
|
||||
@ -430,38 +430,38 @@ U_NAMESPACE_END
|
||||
|
||||
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucase_getType(const UCaseProps *csp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
ucase_getType(const UCaseProps * /* unused csp */, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
return UCASE_GET_TYPE(props);
|
||||
}
|
||||
|
||||
/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
ucase_getTypeOrIgnorable(const UCaseProps * /* unused csp */, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
return UCASE_GET_TYPE_AND_IGNORABLE(props);
|
||||
}
|
||||
|
||||
/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
|
||||
static inline int32_t
|
||||
getDotType(const UCaseProps *csp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
getDotType(const UCaseProps * /* unused csp */, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
return props&UCASE_DOT_MASK;
|
||||
} else {
|
||||
const uint16_t *pe=GET_EXCEPTIONS(csp, props);
|
||||
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
|
||||
return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ucase_isSoftDotted(const UCaseProps *csp, UChar32 c) {
|
||||
return (UBool)(getDotType(csp, c)==UCASE_SOFT_DOTTED);
|
||||
ucase_isSoftDotted(const UCaseProps * /* unused csp */, UChar32 c) {
|
||||
return (UBool)(getDotType(&ucase_props_singleton, c)==UCASE_SOFT_DOTTED);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
ucase_isCaseSensitive(const UCaseProps * /* unused csp */, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
return (UBool)((props&UCASE_SENSITIVE)!=0);
|
||||
}
|
||||
|
||||
@ -662,7 +662,7 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) {
|
||||
* it is also cased or not.
|
||||
*/
|
||||
static UBool
|
||||
isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void *context, int8_t dir) {
|
||||
isFollowedByCasedLetter(const UCaseProps * /* unused csp */, UCaseContextIterator *iter, void *context, int8_t dir) {
|
||||
UChar32 c;
|
||||
|
||||
if(iter==NULL) {
|
||||
@ -670,7 +670,7 @@ isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void
|
||||
}
|
||||
|
||||
for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
|
||||
int32_t type=ucase_getTypeOrIgnorable(csp, c);
|
||||
int32_t type=ucase_getTypeOrIgnorable(&ucase_props_singleton, c);
|
||||
if(type&4) {
|
||||
/* case-ignorable, continue with the loop */
|
||||
} else if(type!=UCASE_NONE) {
|
||||
@ -685,7 +685,7 @@ isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void
|
||||
|
||||
/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
|
||||
static UBool
|
||||
isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void *context) {
|
||||
isPrecededBySoftDotted(const UCaseProps * /* unused csp */, UCaseContextIterator *iter, void *context) {
|
||||
UChar32 c;
|
||||
int32_t dotType;
|
||||
int8_t dir;
|
||||
@ -695,7 +695,7 @@ isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void *
|
||||
}
|
||||
|
||||
for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
|
||||
dotType=getDotType(csp, c);
|
||||
dotType=getDotType(&ucase_props_singleton, c);
|
||||
if(dotType==UCASE_SOFT_DOTTED) {
|
||||
return TRUE; /* preceded by TYPE_i */
|
||||
} else if(dotType!=UCASE_OTHER_ACCENT) {
|
||||
@ -742,7 +742,7 @@ isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void *
|
||||
|
||||
/* Is preceded by base character 'I' with no intervening cc=230 ? */
|
||||
static UBool
|
||||
isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context) {
|
||||
isPrecededBy_I(const UCaseProps * /* unused csp */, UCaseContextIterator *iter, void *context) {
|
||||
UChar32 c;
|
||||
int32_t dotType;
|
||||
int8_t dir;
|
||||
@ -755,7 +755,7 @@ isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context)
|
||||
if(c==0x49) {
|
||||
return TRUE; /* preceded by I */
|
||||
}
|
||||
dotType=getDotType(csp, c);
|
||||
dotType=getDotType(&ucase_props_singleton, c);
|
||||
if(dotType!=UCASE_OTHER_ACCENT) {
|
||||
return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
|
||||
}
|
||||
@ -766,7 +766,7 @@ isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context)
|
||||
|
||||
/* Is followed by one or more cc==230 ? */
|
||||
static UBool
|
||||
isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) {
|
||||
isFollowedByMoreAbove(const UCaseProps * /* unused csp */, UCaseContextIterator *iter, void *context) {
|
||||
UChar32 c;
|
||||
int32_t dotType;
|
||||
int8_t dir;
|
||||
@ -776,7 +776,7 @@ isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *c
|
||||
}
|
||||
|
||||
for(dir=1; (c=iter(context, dir))>=0; dir=0) {
|
||||
dotType=getDotType(csp, c);
|
||||
dotType=getDotType(&ucase_props_singleton, c);
|
||||
if(dotType==UCASE_ABOVE) {
|
||||
return TRUE; /* at least one cc==230 following */
|
||||
} else if(dotType!=UCASE_OTHER_ACCENT) {
|
||||
@ -789,7 +789,7 @@ isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *c
|
||||
|
||||
/* Is followed by a dot above (without cc==230 in between) ? */
|
||||
static UBool
|
||||
isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) {
|
||||
isFollowedByDotAbove(const UCaseProps * /* unused csp */, UCaseContextIterator *iter, void *context) {
|
||||
UChar32 c;
|
||||
int32_t dotType;
|
||||
int8_t dir;
|
||||
@ -802,7 +802,7 @@ isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *co
|
||||
if(c==0x307) {
|
||||
return TRUE;
|
||||
}
|
||||
dotType=getDotType(csp, c);
|
||||
dotType=getDotType(&ucase_props_singleton, c);
|
||||
if(dotType!=UCASE_OTHER_ACCENT) {
|
||||
return FALSE; /* next base character or cc==230 in between */
|
||||
}
|
||||
@ -812,20 +812,20 @@ isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *co
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucase_toFullLower(const UCaseProps *csp, UChar32 c,
|
||||
ucase_toFullLower(const UCaseProps * /* unused csp */, UChar32 c,
|
||||
UCaseContextIterator *iter, void *context,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache) {
|
||||
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
|
||||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
|
||||
result=c+UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2;
|
||||
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
|
||||
uint16_t excWord=*pe++;
|
||||
int32_t full;
|
||||
|
||||
@ -844,7 +844,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
|
||||
if( loc==UCASE_LOC_LITHUANIAN &&
|
||||
/* base characters, find accents above */
|
||||
(((c==0x49 || c==0x4a || c==0x12e) &&
|
||||
isFollowedByMoreAbove(csp, iter, context)) ||
|
||||
isFollowedByMoreAbove(&ucase_props_singleton, iter, context)) ||
|
||||
/* precomposed with accent above, no need to find one */
|
||||
(c==0xcc || c==0xcd || c==0x128))
|
||||
) {
|
||||
@ -896,7 +896,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
|
||||
0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
*/
|
||||
return 0x69;
|
||||
} else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(csp, iter, context)) {
|
||||
} else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(&ucase_props_singleton, iter, context)) {
|
||||
/*
|
||||
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
|
||||
# This matches the behavior of the canonically equivalent I-dot_above
|
||||
@ -905,7 +905,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
|
||||
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
|
||||
*/
|
||||
return 0; /* remove the dot (continue without output) */
|
||||
} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(csp, iter, context)) {
|
||||
} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(&ucase_props_singleton, iter, context)) {
|
||||
/*
|
||||
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
|
||||
|
||||
@ -922,8 +922,8 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
|
||||
*pString=iDot;
|
||||
return 2;
|
||||
} else if( c==0x3a3 &&
|
||||
!isFollowedByCasedLetter(csp, iter, context, 1) &&
|
||||
isFollowedByCasedLetter(csp, iter, context, -1) /* -1=preceded */
|
||||
!isFollowedByCasedLetter(&ucase_props_singleton, iter, context, 1) &&
|
||||
isFollowedByCasedLetter(&ucase_props_singleton, iter, context, -1) /* -1=preceded */
|
||||
) {
|
||||
/* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
|
||||
/*
|
||||
@ -957,7 +957,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c,
|
||||
|
||||
/* internal */
|
||||
static int32_t
|
||||
toUpperOrTitle(const UCaseProps *csp, UChar32 c,
|
||||
toUpperOrTitle(const UCaseProps * /* unused csp */, UChar32 c,
|
||||
UCaseContextIterator *iter, void *context,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache,
|
||||
@ -965,13 +965,13 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c,
|
||||
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
|
||||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
|
||||
result=c+UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2;
|
||||
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
|
||||
uint16_t excWord=*pe++;
|
||||
int32_t full, idx;
|
||||
|
||||
@ -994,7 +994,7 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c,
|
||||
0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
|
||||
*/
|
||||
return 0x130;
|
||||
} else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(csp, iter, context)) {
|
||||
} else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(&ucase_props_singleton, iter, context)) {
|
||||
/*
|
||||
# Lithuanian
|
||||
|
||||
@ -1052,19 +1052,19 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c,
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucase_toFullUpper(const UCaseProps *csp, UChar32 c,
|
||||
ucase_toFullUpper(const UCaseProps * /* unused csp */, UChar32 c,
|
||||
UCaseContextIterator *iter, void *context,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache) {
|
||||
return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, TRUE);
|
||||
return toUpperOrTitle(&ucase_props_singleton, c, iter, context, pString, locale, locCache, TRUE);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucase_toFullTitle(const UCaseProps *csp, UChar32 c,
|
||||
ucase_toFullTitle(const UCaseProps * /* unused csp */, UChar32 c,
|
||||
UCaseContextIterator *iter, void *context,
|
||||
const UChar **pString,
|
||||
const char *locale, int32_t *locCache) {
|
||||
return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, FALSE);
|
||||
return toUpperOrTitle(&ucase_props_singleton, c, iter, context, pString, locale, locCache, FALSE);
|
||||
}
|
||||
|
||||
/* case folding ------------------------------------------------------------- */
|
||||
@ -1110,14 +1110,14 @@ ucase_toFullTitle(const UCaseProps *csp, UChar32 c,
|
||||
|
||||
/* return the simple case folding mapping for c */
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) {
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
ucase_fold(const UCaseProps * /* unused csp */, UChar32 c, uint32_t options) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
|
||||
c+=UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
const uint16_t *pe=GET_EXCEPTIONS(csp, props);
|
||||
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
|
||||
uint16_t excWord=*pe++;
|
||||
int32_t idx;
|
||||
if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
|
||||
@ -1170,19 +1170,19 @@ ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) {
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
|
||||
ucase_toFullFolding(const UCaseProps * /* unused csp */, UChar32 c,
|
||||
const UChar **pString,
|
||||
uint32_t options) {
|
||||
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
|
||||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
uint16_t props=UTRIE2_GET16(&csp->trie, c);
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
|
||||
result=c+UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2;
|
||||
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
|
||||
uint16_t excWord=*pe++;
|
||||
int32_t full, idx;
|
||||
|
||||
@ -1287,23 +1287,19 @@ ucase_hasBinaryProperty(UChar32 c, UProperty which) {
|
||||
/* case mapping properties */
|
||||
const UChar *resultString;
|
||||
int32_t locCache;
|
||||
const UCaseProps *csp=GET_CASE_PROPS();
|
||||
if(csp==NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
switch(which) {
|
||||
case UCHAR_LOWERCASE:
|
||||
return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
|
||||
return (UBool)(UCASE_LOWER==ucase_getType(&ucase_props_singleton, c));
|
||||
case UCHAR_UPPERCASE:
|
||||
return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
|
||||
return (UBool)(UCASE_UPPER==ucase_getType(&ucase_props_singleton, c));
|
||||
case UCHAR_SOFT_DOTTED:
|
||||
return ucase_isSoftDotted(csp, c);
|
||||
return ucase_isSoftDotted(&ucase_props_singleton, c);
|
||||
case UCHAR_CASE_SENSITIVE:
|
||||
return ucase_isCaseSensitive(csp, c);
|
||||
return ucase_isCaseSensitive(&ucase_props_singleton, c);
|
||||
case UCHAR_CASED:
|
||||
return (UBool)(UCASE_NONE!=ucase_getType(csp, c));
|
||||
return (UBool)(UCASE_NONE!=ucase_getType(&ucase_props_singleton, c));
|
||||
case UCHAR_CASE_IGNORABLE:
|
||||
return (UBool)(ucase_getTypeOrIgnorable(csp, c)>>2);
|
||||
return (UBool)(ucase_getTypeOrIgnorable(&ucase_props_singleton, c)>>2);
|
||||
/*
|
||||
* Note: The following Changes_When_Xyz are defined as testing whether
|
||||
* the NFD form of the input changes when Xyz-case-mapped.
|
||||
@ -1318,20 +1314,20 @@ ucase_hasBinaryProperty(UChar32 c, UProperty which) {
|
||||
*/
|
||||
case UCHAR_CHANGES_WHEN_LOWERCASED:
|
||||
locCache=UCASE_LOC_ROOT;
|
||||
return (UBool)(ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
|
||||
return (UBool)(ucase_toFullLower(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0);
|
||||
case UCHAR_CHANGES_WHEN_UPPERCASED:
|
||||
locCache=UCASE_LOC_ROOT;
|
||||
return (UBool)(ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
|
||||
return (UBool)(ucase_toFullUpper(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0);
|
||||
case UCHAR_CHANGES_WHEN_TITLECASED:
|
||||
locCache=UCASE_LOC_ROOT;
|
||||
return (UBool)(ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
|
||||
return (UBool)(ucase_toFullTitle(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0);
|
||||
/* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
|
||||
case UCHAR_CHANGES_WHEN_CASEMAPPED:
|
||||
locCache=UCASE_LOC_ROOT;
|
||||
return (UBool)(
|
||||
ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 ||
|
||||
ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 ||
|
||||
ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
|
||||
ucase_toFullLower(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0 ||
|
||||
ucase_toFullUpper(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0 ||
|
||||
ucase_toFullTitle(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0);
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -37,82 +37,57 @@
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
using icu::internal::CaseMapFriend;
|
||||
|
||||
/* UCaseMap service object -------------------------------------------------- */
|
||||
|
||||
U_CAPI UCaseMap * U_EXPORT2
|
||||
ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
|
||||
UCaseMap *csm;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap));
|
||||
CaseMap *csm = new CaseMap(locale, options, *pErrorCode);
|
||||
if(csm==NULL) {
|
||||
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
} else if (U_FAILURE(*pErrorCode)) {
|
||||
delete csm;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memset(csm, 0, sizeof(UCaseMap));
|
||||
|
||||
csm->csp=ucase_getSingleton();
|
||||
ucasemap_setLocale(csm, locale, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
uprv_free(csm);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
csm->options=options;
|
||||
return csm;
|
||||
return CaseMapFriend::toUCaseMap(*csm);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucasemap_close(UCaseMap *csm) {
|
||||
if(csm!=NULL) {
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
// Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
|
||||
delete reinterpret_cast<BreakIterator *>(csm->iter);
|
||||
#endif
|
||||
uprv_free(csm);
|
||||
delete CaseMapFriend::fromUCaseMap(csm);
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ucasemap_getLocale(const UCaseMap *csm) {
|
||||
return csm->locale;
|
||||
return CaseMapFriend::localeID(*CaseMapFriend::fromUCaseMap(csm));
|
||||
}
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucasemap_getOptions(const UCaseMap *csm) {
|
||||
return csm->options;
|
||||
return CaseMapFriend::options(*CaseMapFriend::fromUCaseMap(csm));
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
|
||||
int32_t length;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
/* we only really need the language code for case mappings */
|
||||
length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
|
||||
}
|
||||
if(length==sizeof(csm->locale)) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
csm->locCache=0;
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
ucase_getCaseLocale(csm->locale, &csm->locCache);
|
||||
} else {
|
||||
csm->locale[0]=0;
|
||||
}
|
||||
CaseMapFriend::setLocale(*CaseMapFriend::fromUCaseMap(csm), locale, *pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/) {
|
||||
csm->options=options;
|
||||
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
CaseMapFriend::setOptions(*CaseMapFriend::fromUCaseMap(csm), options);
|
||||
}
|
||||
|
||||
/* UTF-8 string case mappings ----------------------------------------------- */
|
||||
@ -258,7 +233,7 @@ utf8_caseContextIterator(void *context, int8_t dir) {
|
||||
* context [0..srcLength[ into account.
|
||||
*/
|
||||
static int32_t
|
||||
_caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
||||
_caseMap(const CaseMap &csm, UCaseMapFull *map,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, UCaseContext *csc,
|
||||
int32_t srcStart, int32_t srcLimit,
|
||||
@ -268,7 +243,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
||||
int32_t srcIndex, destIndex;
|
||||
int32_t locCache;
|
||||
|
||||
locCache=csm->locCache;
|
||||
locCache = CaseMapFriend::caseLocale(csm);
|
||||
|
||||
/* case mapping loop */
|
||||
srcIndex=srcStart;
|
||||
@ -286,7 +261,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache);
|
||||
c=map(NULL, c, utf8_caseContextIterator, csc, &s, NULL, &locCache);
|
||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
|
||||
/* fast path version of appendResult() for ASCII results */
|
||||
dest[destIndex++]=(uint8_t)c2;
|
||||
@ -308,7 +283,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
ucasemap_internalUTF8ToTitle(const CaseMap &csm, BreakIterator *iter,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
@ -321,12 +296,8 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Use the C++ abstract base class to minimize dependencies.
|
||||
// TODO: Change UCaseMap.iter to store a BreakIterator directly.
|
||||
BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
|
||||
|
||||
/* set up local variables */
|
||||
int32_t locCache=csm->locCache;
|
||||
int32_t locCache=CaseMapFriend::caseLocale(csm);
|
||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
@ -339,9 +310,9 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
/* find next index where to titlecase */
|
||||
if(isFirstIndex) {
|
||||
isFirstIndex=FALSE;
|
||||
idx=bi->first();
|
||||
idx=iter->first();
|
||||
} else {
|
||||
idx=bi->next();
|
||||
idx=iter->next();
|
||||
}
|
||||
if(idx==UBRK_DONE || idx>srcLength) {
|
||||
idx=srcLength;
|
||||
@ -364,7 +335,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
/* find and copy uncased characters [prev..titleStart[ */
|
||||
titleStart=titleLimit=prev;
|
||||
U8_NEXT(src, titleLimit, idx, c);
|
||||
if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
|
||||
if((CaseMapFriend::options(csm)&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) {
|
||||
/* Adjust the titlecasing index (titleStart) to the next cased character. */
|
||||
for(;;) {
|
||||
titleStart=titleLimit;
|
||||
@ -376,7 +347,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
break;
|
||||
}
|
||||
U8_NEXT(src, titleLimit, idx, c);
|
||||
if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
|
||||
if(UCASE_NONE!=ucase_getType(NULL, c)) {
|
||||
break; /* cased letter at [titleStart..titleLimit[ */
|
||||
}
|
||||
}
|
||||
@ -392,7 +363,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
if(c>=0) {
|
||||
csc.cpStart=titleStart;
|
||||
csc.cpLimit=titleLimit;
|
||||
c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
|
||||
c=ucase_toFullTitle(NULL, c, utf8_caseContextIterator, &csc, &s, NULL, &locCache);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
|
||||
} else {
|
||||
// Malformed UTF-8.
|
||||
@ -405,7 +376,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
|
||||
/* Special case Dutch IJ titlecasing */
|
||||
if (titleStart+1 < idx &&
|
||||
ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH &&
|
||||
locCache == UCASE_LOC_DUTCH &&
|
||||
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
|
||||
(src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
|
||||
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
|
||||
@ -413,7 +384,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
}
|
||||
/* lowercase [titleLimit..index[ */
|
||||
if(titleLimit<idx) {
|
||||
if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
if((CaseMapFriend::options(csm)&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
/* Normal operation: Lowercase the rest of the word. */
|
||||
destIndex+=
|
||||
_caseMap(
|
||||
@ -471,7 +442,7 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i
|
||||
}
|
||||
|
||||
// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
|
||||
int32_t toUpper(const UCaseMap *csm,
|
||||
int32_t toUpper(const CaseMap & /* unused csm */,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
@ -483,7 +454,7 @@ int32_t toUpper(const UCaseMap *csm,
|
||||
UChar32 c;
|
||||
U8_NEXT(src, nextIndex, srcLength, c);
|
||||
uint32_t nextState = 0;
|
||||
int32_t type = ucase_getTypeOrIgnorable(csm->csp, c);
|
||||
int32_t type = ucase_getTypeOrIgnorable(NULL, c);
|
||||
if ((type & UCASE_IGNORABLE) != 0) {
|
||||
// c is case-ignorable
|
||||
nextState |= (state & AFTER_CASED);
|
||||
@ -533,7 +504,7 @@ int32_t toUpper(const UCaseMap *csm,
|
||||
(data & HAS_ACCENT) != 0 &&
|
||||
numYpogegrammeni == 0 &&
|
||||
(state & AFTER_CASED) == 0 &&
|
||||
!isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) {
|
||||
!isFollowedByCasedLetter(NULL, src, nextIndex, srcLength)) {
|
||||
// Keep disjunctive "or" with (only) a tonos.
|
||||
// We use the same "word boundary" conditions as for the Final_Sigma test.
|
||||
if (i == nextIndex) {
|
||||
@ -569,7 +540,7 @@ int32_t toUpper(const UCaseMap *csm,
|
||||
} else if(c>=0) {
|
||||
const UChar *s;
|
||||
UChar32 c2 = 0;
|
||||
c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache);
|
||||
c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &locCache);
|
||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
|
||||
/* fast path version of appendResult() for ASCII results */
|
||||
dest[destIndex++]=(uint8_t)c2;
|
||||
@ -602,7 +573,7 @@ int32_t toUpper(const UCaseMap *csm,
|
||||
U_NAMESPACE_END
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8ToLower(const UCaseMap *csm,
|
||||
ucasemap_internalUTF8ToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
@ -617,12 +588,12 @@ ucasemap_internalUTF8ToLower(const UCaseMap *csm,
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8ToUpper(const UCaseMap *csm,
|
||||
ucasemap_internalUTF8ToUpper(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t locCache = csm->locCache;
|
||||
if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) {
|
||||
int32_t locCache = CaseMapFriend::caseLocale(csm);
|
||||
if (locCache == UCASE_LOC_GREEK) {
|
||||
return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode);
|
||||
}
|
||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
@ -635,12 +606,11 @@ ucasemap_internalUTF8ToUpper(const UCaseMap *csm,
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
static int32_t
|
||||
utf8_foldCase(const UCaseProps *csp,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
static int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8Fold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t srcIndex, destIndex;
|
||||
|
||||
const UChar *s;
|
||||
@ -661,7 +631,7 @@ utf8_foldCase(const UCaseProps *csp,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
c=ucase_toFullFolding(csp, c, &s, options);
|
||||
c=ucase_toFullFolding(NULL, c, &s, CaseMapFriend::options(csm));
|
||||
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
|
||||
/* fast path version of appendResult() for ASCII results */
|
||||
dest[destIndex++]=(uint8_t)c2;
|
||||
@ -680,16 +650,8 @@ utf8_foldCase(const UCaseProps *csp,
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8Fold(const UCaseMap *csm,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
ucasemap_mapUTF8(const UCaseMap *csm,
|
||||
ucasemap_mapUTF8(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UTF8CaseMapper *stringCaseMapper,
|
||||
@ -723,7 +685,8 @@ ucasemap_mapUTF8(const UCaseMap *csm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode);
|
||||
destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
|
||||
dest, destCapacity, src, srcLength, pErrorCode);
|
||||
return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
|
||||
}
|
||||
|
||||
@ -734,10 +697,11 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
return ucasemap_mapUTF8(csm,
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8ToLower, pErrorCode);
|
||||
return ucasemap_mapUTF8(
|
||||
*CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8ToLower, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
@ -745,10 +709,11 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
return ucasemap_mapUTF8(csm,
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8ToUpper, pErrorCode);
|
||||
return ucasemap_mapUTF8(
|
||||
*CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8ToUpper, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
@ -756,8 +721,9 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
return ucasemap_mapUTF8(csm,
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8Fold, pErrorCode);
|
||||
return ucasemap_mapUTF8(
|
||||
*CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8Fold, pErrorCode);
|
||||
}
|
||||
|
@ -30,35 +30,47 @@
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
using icu::internal::CaseMapFriend;
|
||||
|
||||
U_CAPI const UBreakIterator * U_EXPORT2
|
||||
ucasemap_getBreakIterator(const UCaseMap *csm) {
|
||||
return reinterpret_cast<UBreakIterator *>(csm->iter);
|
||||
return reinterpret_cast<const UBreakIterator *>(
|
||||
CaseMapFriend::iter(*CaseMapFriend::fromUCaseMap(csm)));
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) {
|
||||
delete csm->iter;
|
||||
csm->iter=reinterpret_cast<BreakIterator *>(iterToAdopt);
|
||||
ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
CaseMapFriend::adoptIter(*CaseMapFriend::fromUCaseMap(csm),
|
||||
reinterpret_cast<BreakIterator *>(iterToAdopt));
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_utf8ToTitle(UCaseMap *csm,
|
||||
ucasemap_utf8ToTitle(UCaseMap *ucsm,
|
||||
char *dest, int32_t destCapacity,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
UText utext=UTEXT_INITIALIZER;
|
||||
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
|
||||
if(csm->iter==NULL) {
|
||||
csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode);
|
||||
}
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
csm->iter->setText(&utext, *pErrorCode);
|
||||
CaseMap &csm = *CaseMapFriend::fromUCaseMap(ucsm);
|
||||
UText utext=UTEXT_INITIALIZER;
|
||||
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
|
||||
if (CaseMapFriend::iter(csm) == NULL) {
|
||||
CaseMapFriend::adoptIter(
|
||||
csm, BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode));
|
||||
}
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
CaseMapFriend::mutableIter(csm)->setText(&utext, *pErrorCode);
|
||||
int32_t length=ucasemap_mapUTF8(csm,
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8ToTitle, pErrorCode);
|
||||
CaseMapFriend::mutableIter(csm),
|
||||
(uint8_t *)dest, destCapacity,
|
||||
(const uint8_t *)src, srcLength,
|
||||
ucasemap_internalUTF8ToTitle, pErrorCode);
|
||||
utext_close(&utext);
|
||||
return length;
|
||||
}
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "unicode/localpointer.h"
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uobject.h"
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
@ -101,7 +102,9 @@ class BreakIterator;
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
// TODO: move to new C++ unicode/casemap.h
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Records lengths of string edits but not replacement text.
|
||||
@ -111,13 +114,13 @@ U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
|
||||
* An Edits object tracks a separate UErrorCode, but ICU case mapping functions
|
||||
* merge any such errors into their API's UErrorCode.
|
||||
*
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
class Edits final : public UMemory {
|
||||
class U_COMMON_API Edits final : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty object.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Edits() :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
|
||||
@ -126,7 +129,7 @@ public:
|
||||
|
||||
/**
|
||||
* Resets the data but may not release memory.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void reset();
|
||||
|
||||
@ -137,7 +140,7 @@ public:
|
||||
* to the original string.
|
||||
* @see omitUnchanged
|
||||
* @see writeUnchanged
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Edits &setWriteUnchanged(UBool write) {
|
||||
omit = !write;
|
||||
@ -146,26 +149,26 @@ public:
|
||||
/**
|
||||
* @return TRUE if the case mapping function is to omit characters that do not change.
|
||||
* @see setWriteUnchanged
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool omitUnchanged() const { return omit; }
|
||||
/**
|
||||
* @return TRUE if the case mapping function is to write characters that do not change.
|
||||
* @see setWriteUnchanged
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool writeUnchanged() const { return !omit; }
|
||||
|
||||
/**
|
||||
* Adds a record for an unchanged segment of text.
|
||||
* Normally called from inside ICU case mapping functions, not user code.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void addUnchanged(int32_t unchangedLength);
|
||||
/**
|
||||
* Adds a record for a text replacement/insertion/deletion.
|
||||
* Normally called from inside ICU case mapping functions, not user code.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
void addReplace(int32_t oldLength, int32_t newLength);
|
||||
/**
|
||||
@ -173,19 +176,19 @@ public:
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
* Normally called from inside ICU case mapping functions, not user code.
|
||||
* @return TRUE if U_FAILURE(outErrorCode)
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool copyErrorTo(UErrorCode &outErrorCode);
|
||||
|
||||
/**
|
||||
* How much longer is the new text compared with the old text?
|
||||
* @return new length minus old length
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t lengthDelta() const { return delta; }
|
||||
/**
|
||||
* @return TRUE if there are any change edits
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool hasChanges() const;
|
||||
|
||||
@ -193,13 +196,13 @@ public:
|
||||
* Access to the list of edits.
|
||||
* @see getCoarseIterator
|
||||
* @see getFineIterator
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
struct Iterator final : public UMemory {
|
||||
/**
|
||||
* Advances to the next edit.
|
||||
* @return TRUE if there is another edit
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode);
|
||||
|
||||
@ -217,42 +220,42 @@ public:
|
||||
*
|
||||
* @param i source index
|
||||
* @return TRUE if the edit for the source index was found
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
|
||||
* FALSE if oldLength units remain unchanged.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
UBool hasChange() const { return changed; }
|
||||
/**
|
||||
* @return the number of units in the original string which are replaced or remain unchanged.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t oldLength() const { return oldLength_; }
|
||||
/**
|
||||
* @return the number of units in the modified string, if hasChange() is TRUE.
|
||||
* Same as oldLength if hasChange() is FALSE.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t newLength() const { return newLength_; }
|
||||
|
||||
/**
|
||||
* @return the current index into the source string
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t sourceIndex() const { return srcIndex; }
|
||||
/**
|
||||
* @return the current index into the replacement-characters-only string,
|
||||
* not counting unchanged spans
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t replacementIndex() const { return replIndex; }
|
||||
/**
|
||||
* @return the current index into the full destination string
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t destinationIndex() const { return destIndex; }
|
||||
|
||||
@ -279,7 +282,7 @@ public:
|
||||
* Returns an Iterator for coarse-grained changes for simple string updates.
|
||||
* Skips non-changes.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getCoarseChangesIterator() const {
|
||||
return Iterator(array, length, TRUE, TRUE);
|
||||
@ -288,7 +291,7 @@ public:
|
||||
/**
|
||||
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getCoarseIterator() const {
|
||||
return Iterator(array, length, FALSE, TRUE);
|
||||
@ -298,7 +301,7 @@ public:
|
||||
* Returns an Iterator for fine-grained changes for modifying styled text.
|
||||
* Skips non-changes.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getFineChangesIterator() const {
|
||||
return Iterator(array, length, TRUE, FALSE);
|
||||
@ -307,7 +310,7 @@ public:
|
||||
/**
|
||||
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @internal ICU 59 technology preview
|
||||
* @draft ICU 59
|
||||
*/
|
||||
Iterator getFineIterator() const {
|
||||
return Iterator(array, length, FALSE, FALSE);
|
||||
@ -334,30 +337,209 @@ private:
|
||||
uint16_t stackArray[STACK_CAPACITY];
|
||||
};
|
||||
|
||||
/**
|
||||
* Omit unchanged text when case-mapping with Edits.
|
||||
*
|
||||
* TODO: revisit which bit to use; currently:
|
||||
* - 31..20: old normalization options (only deprecated Unicode 3.2)
|
||||
* shifted up for unorm_compare()
|
||||
* - 19..16: more options specific to unorm_compare() (currently bits 19, 17, 16)
|
||||
* - 15..12: more string compare options (currently bits 15 & 12)
|
||||
* - 11.. 8: titlecase mapping options (currently bits 9..8)
|
||||
* - 7.. 0: case folding options, but only bit 0 currently used
|
||||
*
|
||||
* could overlay any normalization and string *comparison* option bits
|
||||
* with case *mapping* option bits
|
||||
* *unless* we start using UCaseMap for string comparison functions
|
||||
*
|
||||
* future: German sharp s may need locale variant or option bit
|
||||
*
|
||||
* @internal ICU 59 technology preview
|
||||
*/
|
||||
// TODO: does not work well as an option because we would need to set/reset it on UCaseMaps
|
||||
// that are often const, replaced for now by Edits.setWriteUnchanged(UBool)
|
||||
// #define UCASEMAP_OMIT_UNCHANGED 0x4000
|
||||
namespace internal {
|
||||
/** @internal ICU implementation detail */
|
||||
class CaseMapFriend;
|
||||
} // namespace internal
|
||||
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
class U_COMMON_API CaseMap final : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructor for the root locale and options.
|
||||
* Explicitly construct with Locale::getDefault() for the default locale.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
inline CaseMap(uint32_t options, UErrorCode &errorCode);
|
||||
/**
|
||||
* Constructor for locale and options.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
CaseMap(const Locale &locale, uint32_t options, UErrorCode &errorCode);
|
||||
/**
|
||||
* Constructor for locale ID and options.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
CaseMap(const char *localeID, uint32_t options, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 59
|
||||
*/
|
||||
~CaseMap();
|
||||
|
||||
// TODO: reverse src & dest? C vs. C++ conventions
|
||||
|
||||
/**
|
||||
* Lowercases the characters in a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToLower
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t toLower(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Uppercases the characters in a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToLower
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t toUpper(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Titlecases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with ucasemap_setOptions().)
|
||||
*
|
||||
* The titlecase break iterator can be provided to customize for arbitrary
|
||||
* styles, using rules and dictionaries beyond the standard iterators.
|
||||
* The standard titlecase iterator for the root locale implements the
|
||||
* algorithm of Unicode TR 21.
|
||||
*
|
||||
* This function uses only the setText(), first() and next() methods of the
|
||||
* provided break iterator.
|
||||
*
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string and used one or more times for iteration.
|
||||
* If NULL, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToTitle
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t toTitle(BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
#endif // UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Case-folds the characters in a UTF-16 string and optionally records edits.
|
||||
*
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strFoldCase
|
||||
* @see ucasemap_setOptions
|
||||
* @see U_FOLD_CASE_DEFAULT
|
||||
* @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @draft ICU 59
|
||||
*/
|
||||
int32_t foldCase(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
private:
|
||||
friend class internal::CaseMapFriend;
|
||||
|
||||
CaseMap(const CaseMap &other) = delete;
|
||||
CaseMap &operator=(const CaseMap &other) = delete;
|
||||
|
||||
CaseMap(const Locale &loc, int32_t caseLoc, uint32_t opts, UErrorCode &errorCode);
|
||||
|
||||
void setCaseLocale(const char *localeID);
|
||||
void setLocale(const char *localeID, UErrorCode &errorCode);
|
||||
|
||||
int32_t caseLocale;
|
||||
uint32_t options;
|
||||
Locale locale;
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
BreakIterator *iter; // owned; only set by old C-style API
|
||||
#endif
|
||||
};
|
||||
|
||||
CaseMap::CaseMap(uint32_t opts, UErrorCode & /*errorCode*/) :
|
||||
caseLocale(/* UCASE_LOC_ROOT = */ 1), options(opts), locale(Locale::getRoot())
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
, iter(NULL)
|
||||
#endif
|
||||
{}
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
@ -450,168 +632,6 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
|
||||
*/
|
||||
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
|
||||
/**
|
||||
* Lowercases the characters in a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToLower
|
||||
* @internal ICU 59 technology preview
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_toLowerWithEdits(const UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Uppercases the characters in a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToLower
|
||||
* @internal ICU 59 technology preview
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_toUpperWithEdits(const UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Titlecases a UTF-16 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* Titlecasing uses a break iterator to find the first characters of words
|
||||
* that are to be titlecased. It titlecases those characters and lowercases
|
||||
* all others. (This can be modified with ucasemap_setOptions().)
|
||||
*
|
||||
* The titlecase break iterator can be provided to customize for arbitrary
|
||||
* styles, using rules and dictionaries beyond the standard iterators.
|
||||
* The standard titlecase iterator for the root locale implements the
|
||||
* algorithm of Unicode TR 21.
|
||||
*
|
||||
* This function uses only the setText(), first() and next() methods of the
|
||||
* provided break iterator.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param iter A break iterator to find the first characters of words that are to be titlecased.
|
||||
* It is set to the source string and used one or more times for iteration.
|
||||
* If NULL, then a clone of ucasemap_getBreakIterator() is used.
|
||||
* If that is NULL too, then a word break iterator for the locale is used
|
||||
* (or something equivalent).
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strToTitle
|
||||
* @internal ICU 59 technology preview
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif // UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
* Case-folds the characters in a UTF-16 string and optionally records edits.
|
||||
*
|
||||
* Case-folding is locale-independent and not context-sensitive,
|
||||
* but there is an option for whether to include or exclude mappings for dotted I
|
||||
* and dotless i that are marked with 'T' in CaseFolding.txt.
|
||||
*
|
||||
* The result may be longer or shorter than the original.
|
||||
* The source string and the destination buffer must not overlap.
|
||||
*
|
||||
* @param csm UCaseMap service object.
|
||||
* @param dest A buffer for the result string. The result will be NUL-terminated if
|
||||
* the buffer is large enough.
|
||||
* The contents is undefined in case of failure.
|
||||
* @param destCapacity The size of the buffer (number of bytes). If it is 0, then
|
||||
* dest may be NULL and the function will only return the length of the result
|
||||
* without writing any of the result string.
|
||||
* @param src The original string.
|
||||
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
|
||||
* @param edits Records edits for index mapping, working with styled text,
|
||||
* and getting only changes (if any). Can be NULL.
|
||||
* @param errorCode Reference to an in/out error code value
|
||||
* which must not indicate a failure before the function call.
|
||||
* @return The length of the result string, if successful - or in case of a buffer overflow,
|
||||
* in which case it will be greater than destCapacity.
|
||||
*
|
||||
* @see u_strFoldCase
|
||||
* @see ucasemap_setOptions
|
||||
* @see U_FOLD_CASE_DEFAULT
|
||||
* @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
|
||||
* @internal ICU 59 technology preview
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_foldCaseWithEdits(const UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/**
|
||||
@ -849,25 +869,4 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
|
||||
const char *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
|
||||
/**
|
||||
* Internal string case mapping function type.
|
||||
* All error checking must be done.
|
||||
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
|
||||
* src and dest must not overlap.
|
||||
* @internal
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UStringCaseMapper(const UCaseMap *csm,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
icu::BreakIterator *iter,
|
||||
#endif
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#endif // U_SHOW_CPLUSPLUS_API
|
||||
#endif
|
||||
|
@ -33,7 +33,6 @@
|
||||
#include "unicode/std_string.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
|
||||
struct UConverter; // unicode/ucnv.h
|
||||
|
||||
@ -60,6 +59,30 @@ U_NAMESPACE_BEGIN
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
class BreakIterator; // unicode/brkiter.h
|
||||
#endif
|
||||
class CaseMap;
|
||||
class Edits;
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
|
||||
/**
|
||||
* Internal string case mapping function type.
|
||||
* All error checking must be done.
|
||||
* src and dest must not overlap.
|
||||
* @internal
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UStringCaseMapper(const icu::CaseMap &csm,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
icu::BreakIterator *iter,
|
||||
#endif
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class Locale; // unicode/locid.h
|
||||
class StringCharacterIterator;
|
||||
class UnicodeStringAppendable; // unicode/appendable.h
|
||||
@ -3573,7 +3596,7 @@ private:
|
||||
* as in ustr_imp.h for ustrcase_map().
|
||||
*/
|
||||
UnicodeString &
|
||||
caseMap(const UCaseMap *csm,
|
||||
caseMap(const CaseMap &csm,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
BreakIterator *iter,
|
||||
#endif
|
||||
|
@ -88,7 +88,7 @@ UnicodeString::doCaseCompare(int32_t start,
|
||||
//========================================
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::caseMap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UStringCaseMapper *stringCaseMapper) {
|
||||
if(isEmpty() || !isWritable()) {
|
||||
// nothing to do
|
||||
@ -194,10 +194,9 @@ UnicodeString::caseMap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::foldCase(uint32_t options) {
|
||||
UCaseMap csm=UCASEMAP_INITIALIZER;
|
||||
csm.csp=ucase_getSingleton();
|
||||
csm.options=options;
|
||||
return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
CaseMap csm(options, errorCode);
|
||||
return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -19,8 +19,8 @@
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "cmemory.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
@ -29,22 +29,6 @@ U_NAMESPACE_BEGIN
|
||||
// Write implementation
|
||||
//========================================
|
||||
|
||||
/*
|
||||
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
|
||||
* Do this fast because it is called with every function call.
|
||||
*/
|
||||
static inline void
|
||||
setTempCaseMap(UCaseMap *csm, const char *locale) {
|
||||
if(csm->csp==NULL) {
|
||||
csm->csp=ucase_getSingleton();
|
||||
}
|
||||
if(locale!=NULL && locale[0]==0) {
|
||||
csm->locale[0]=0;
|
||||
} else {
|
||||
ustrcase_setTempCaseMapLocale(csm, locale);
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::toLower() {
|
||||
return toLower(Locale::getDefault());
|
||||
@ -52,9 +36,9 @@ UnicodeString::toLower() {
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::toLower(const Locale &locale) {
|
||||
UCaseMap csm=UCASEMAP_INITIALIZER;
|
||||
setTempCaseMap(&csm, locale.getName());
|
||||
return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
CaseMap csm(locale, 0, errorCode);
|
||||
return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
|
||||
}
|
||||
|
||||
UnicodeString &
|
||||
@ -64,9 +48,9 @@ UnicodeString::toUpper() {
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::toUpper(const Locale &locale) {
|
||||
UCaseMap csm=UCASEMAP_INITIALIZER;
|
||||
setTempCaseMap(&csm, locale.getName());
|
||||
return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
CaseMap csm(locale, 0, errorCode);
|
||||
return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -22,27 +22,11 @@
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "cmemory.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
/*
|
||||
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
|
||||
* Do this fast because it is called with every function call.
|
||||
*/
|
||||
static inline void
|
||||
setTempCaseMap(UCaseMap *csm, const char *locale) {
|
||||
if(csm->csp==NULL) {
|
||||
csm->csp=ucase_getSingleton();
|
||||
}
|
||||
if(locale!=NULL && locale[0]==0) {
|
||||
csm->locale[0]=0;
|
||||
} else {
|
||||
ustrcase_setTempCaseMapLocale(csm, locale);
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UnicodeString &
|
||||
@ -57,12 +41,10 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
|
||||
|
||||
UnicodeString &
|
||||
UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
|
||||
UCaseMap csm=UCASEMAP_INITIALIZER;
|
||||
csm.options=options;
|
||||
setTempCaseMap(&csm, locale.getName());
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
CaseMap csm(locale, options, errorCode);
|
||||
BreakIterator *bi=titleIter;
|
||||
if(bi==NULL) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
bi=BreakIterator::createWordInstance(locale, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
setToBogus();
|
||||
@ -70,7 +52,7 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t
|
||||
}
|
||||
}
|
||||
bi->setText(*this);
|
||||
caseMap(&csm, bi, ustrcase_internalToTitle);
|
||||
caseMap(csm, bi, ustrcase_internalToTitle);
|
||||
if(titleIter==NULL) {
|
||||
delete bi;
|
||||
}
|
||||
|
@ -104,21 +104,48 @@ uprv_loadPropsData(UErrorCode *errorCode);*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
// TODO: Consider moving these case mapping definitions
|
||||
// into a new internal header like ucasemap_imp.h.
|
||||
// into a new internal header like casemap_imp.h.
|
||||
|
||||
#include "unicode/unistr.h" // for UStringCaseMapper
|
||||
|
||||
/*
|
||||
* Internal string casing functions implementing
|
||||
* ustring.h/ustrcase.c and UnicodeString case mapping functions.
|
||||
*/
|
||||
|
||||
struct UCaseMap {
|
||||
const UCaseProps *csp;
|
||||
/** Avoid public @internal CaseMap methods. Define only one CaseMap friend. */
|
||||
class icu::internal::CaseMapFriend final /* all static */ {
|
||||
public:
|
||||
static UCaseMap *toUCaseMap(icu::CaseMap &csm) {
|
||||
return reinterpret_cast<UCaseMap *>(&csm);
|
||||
}
|
||||
|
||||
static const icu::CaseMap *fromUCaseMap(const UCaseMap *csm) {
|
||||
return reinterpret_cast<const icu::CaseMap *>(csm);
|
||||
}
|
||||
static icu::CaseMap *fromUCaseMap(UCaseMap *csm) {
|
||||
return reinterpret_cast<icu::CaseMap *>(csm);
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
|
||||
static const icu::BreakIterator *iter(const icu::CaseMap &csm) { return csm.iter; }
|
||||
static icu::BreakIterator *mutableIter(icu::CaseMap &csm) { return csm.iter; }
|
||||
static void adoptIter(icu::CaseMap &csm, icu::BreakIterator *iter);
|
||||
#endif
|
||||
char locale[32];
|
||||
int32_t locCache;
|
||||
uint32_t options;
|
||||
|
||||
static const icu::Locale &locale(const icu::CaseMap &csm) { return csm.locale; }
|
||||
static const char *localeID(const icu::CaseMap &csm) { return csm.locale.getName(); }
|
||||
static void setLocale(icu::CaseMap &csm, const char *localeID, UErrorCode &errorCode) {
|
||||
csm.setLocale(localeID, errorCode);
|
||||
}
|
||||
|
||||
static int32_t caseLocale(const icu::CaseMap &csm) { return csm.caseLocale; }
|
||||
|
||||
static uint32_t options(const icu::CaseMap &csm) { return csm.options; }
|
||||
static void setOptions(icu::CaseMap &csm, uint32_t options) { csm.options = options; }
|
||||
|
||||
private:
|
||||
CaseMapFriend() = delete;
|
||||
};
|
||||
|
||||
#if UCONFIG_NO_BREAK_ITERATION
|
||||
@ -135,12 +162,9 @@ struct UCaseMap {
|
||||
# define UCASEMAP_BREAK_ITERATOR_NULL NULL,
|
||||
#endif
|
||||
|
||||
U_CFUNC void
|
||||
ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
|
||||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_internalToLower(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
@ -148,7 +172,7 @@ ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_internalToUpper(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
@ -158,7 +182,7 @@ ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToTitle(const UCaseMap *csm,
|
||||
ustrcase_internalToTitle(const icu::CaseMap &csm,
|
||||
icu::BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
@ -169,7 +193,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
|
||||
|
||||
/** Implements UStringCaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_internalFold(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
@ -180,7 +204,7 @@ ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
* Implements argument checking.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_map(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
@ -193,7 +217,7 @@ ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
* Implements argument checking and internally works with an intermediate buffer if necessary.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_mapWithOverlap(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
@ -207,24 +231,32 @@ ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
* src and dest must not overlap.
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
UTF8CaseMapper(const UCaseMap *csm,
|
||||
UTF8CaseMapper(const icu::CaseMap &csm,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
icu::BreakIterator *iter,
|
||||
#endif
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
/** Implements UTF8CaseMapper. */
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
ucasemap_internalUTF8ToTitle(const icu::CaseMap &csm,
|
||||
icu::BreakIterator *iter,
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Implements argument checking and buffer handling
|
||||
* for UTF-8 string case mapping as a common function.
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
ucasemap_mapUTF8(const UCaseMap *csm,
|
||||
ucasemap_mapUTF8(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
uint8_t *dest, int32_t destCapacity,
|
||||
const uint8_t *src, int32_t srcLength,
|
||||
UTF8CaseMapper *stringCaseMapper,
|
||||
|
@ -29,25 +29,19 @@
|
||||
#include "ucase.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
/* functions available in the common library (for unistr_case.cpp) */
|
||||
U_NAMESPACE_USE
|
||||
|
||||
/*
|
||||
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
|
||||
* Do this fast because it is called with every function call.
|
||||
* Duplicate of the same function in ustrcase.cpp, to keep it inline.
|
||||
*/
|
||||
static inline void
|
||||
setTempCaseMap(UCaseMap *csm, const char *locale) {
|
||||
if(csm->csp==NULL) {
|
||||
csm->csp=ucase_getSingleton();
|
||||
}
|
||||
if(locale!=NULL && locale[0]==0) {
|
||||
csm->locale[0]=0;
|
||||
} else {
|
||||
ustrcase_setTempCaseMapLocale(csm, locale);
|
||||
}
|
||||
using icu::internal::CaseMapFriend;
|
||||
|
||||
// TODO: create casemap.cpp
|
||||
|
||||
void icu::internal::CaseMapFriend::adoptIter(CaseMap &csm, BreakIterator *iter) {
|
||||
delete csm.iter;
|
||||
csm.iter = iter;
|
||||
}
|
||||
|
||||
/* functions available in the common library (for unistr_case.cpp) */
|
||||
|
||||
/* public API functions */
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
@ -56,70 +50,76 @@ u_strToTitle(UChar *dest, int32_t destCapacity,
|
||||
UBreakIterator *titleIter,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseMap csm=UCASEMAP_INITIALIZER;
|
||||
setTempCaseMap(&csm, locale);
|
||||
icu::LocalPointer<icu::BreakIterator> ownedIter;
|
||||
icu::BreakIterator *iter;
|
||||
CaseMap csm(locale, 0, *pErrorCode);
|
||||
BreakIterator *iter;
|
||||
if(titleIter!=NULL) {
|
||||
iter=reinterpret_cast<icu::BreakIterator *>(titleIter);
|
||||
iter=reinterpret_cast<BreakIterator *>(titleIter);
|
||||
} else {
|
||||
iter=icu::BreakIterator::createWordInstance(icu::Locale(csm.locale), *pErrorCode);
|
||||
ownedIter.adoptInstead(iter);
|
||||
iter=BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode);
|
||||
CaseMapFriend::adoptIter(csm, iter);
|
||||
}
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
icu::UnicodeString s(srcLength<0, src, srcLength);
|
||||
UnicodeString s(srcLength<0, src, srcLength);
|
||||
iter->setText(s);
|
||||
return ustrcase_mapWithOverlap(
|
||||
&csm, iter,
|
||||
csm, iter,
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToTitle, *pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
icu::LocalPointer<icu::BreakIterator> ownedIter;
|
||||
if(iter==NULL) {
|
||||
if(csm->iter!=NULL) {
|
||||
iter=csm->iter->clone();
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
int32_t CaseMap::toTitle(BreakIterator *it,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
UErrorCode &errorCode) const {
|
||||
LocalPointer<BreakIterator> ownedIter;
|
||||
if(it==NULL) {
|
||||
if(iter!=NULL) {
|
||||
it=iter->clone();
|
||||
} else {
|
||||
iter=icu::BreakIterator::createWordInstance(icu::Locale(csm->locale), errorCode);
|
||||
it=BreakIterator::createWordInstance(locale, errorCode);
|
||||
}
|
||||
ownedIter.adoptInsteadAndCheckErrorCode(iter, errorCode);
|
||||
ownedIter.adoptInsteadAndCheckErrorCode(it, errorCode);
|
||||
}
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
icu::UnicodeString s(srcLength<0, src, srcLength);
|
||||
iter->setText(s);
|
||||
UnicodeString s(srcLength<0, src, srcLength);
|
||||
it->setText(s);
|
||||
return ustrcase_map(
|
||||
csm, iter,
|
||||
*this, it,
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToTitle, edits, errorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_toTitle(UCaseMap *csm,
|
||||
ucasemap_toTitle(UCaseMap *ucsm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
if(csm->iter==NULL) {
|
||||
csm->iter=icu::BreakIterator::createWordInstance(icu::Locale(csm->locale), *pErrorCode);
|
||||
}
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
icu::UnicodeString s(srcLength<0, src, srcLength);
|
||||
csm->iter->setText(s);
|
||||
CaseMap &csm = *CaseMapFriend::fromUCaseMap(ucsm);
|
||||
if (CaseMapFriend::iter(csm) == NULL) {
|
||||
CaseMapFriend::adoptIter(
|
||||
csm, BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode));
|
||||
}
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
UnicodeString s(srcLength<0, src, srcLength);
|
||||
CaseMapFriend::mutableIter(csm)->setText(s);
|
||||
return ustrcase_map(
|
||||
csm, csm->iter,
|
||||
csm, CaseMapFriend::mutableIter(csm),
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToTitle, NULL, *pErrorCode);
|
||||
|
@ -32,6 +32,10 @@
|
||||
#include "ustr_imp.h"
|
||||
#include "uassert.h"
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
using icu::internal::CaseMapFriend;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
@ -538,13 +542,13 @@ utf16_caseContextIterator(void *context, int8_t dir) {
|
||||
* context [0..srcLength[ into account.
|
||||
*/
|
||||
static int32_t
|
||||
_caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
||||
_caseMap(const CaseMap &csm, UCaseMapFull *map,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, UCaseContext *csc,
|
||||
int32_t srcStart, int32_t srcLimit,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
int32_t locCache=csm->locCache;
|
||||
int32_t locCache = CaseMapFriend::caseLocale(csm);
|
||||
|
||||
/* case mapping loop */
|
||||
int32_t srcIndex=srcStart;
|
||||
@ -556,7 +560,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
||||
U16_NEXT(src, srcIndex, srcLimit, c);
|
||||
csc->cpLimit=srcIndex;
|
||||
const UChar *s;
|
||||
c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache);
|
||||
c=map(NULL, c, utf16_caseContextIterator, csc, &s, NULL, &locCache);
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
srcIndex - cpStart, edits);
|
||||
if (destIndex < 0) {
|
||||
@ -571,7 +575,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter,
|
||||
ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
@ -581,7 +585,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter,
|
||||
}
|
||||
|
||||
/* set up local variables */
|
||||
int32_t locCache=csm->locCache;
|
||||
int32_t locCache=CaseMapFriend::caseLocale(csm);
|
||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
@ -622,7 +626,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter,
|
||||
int32_t titleLimit=prev;
|
||||
UChar32 c;
|
||||
U16_NEXT(src, titleLimit, idx, c);
|
||||
if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
|
||||
if((CaseMapFriend::options(csm)&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) {
|
||||
/* Adjust the titlecasing index (titleStart) to the next cased character. */
|
||||
for(;;) {
|
||||
titleStart=titleLimit;
|
||||
@ -634,7 +638,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter,
|
||||
break;
|
||||
}
|
||||
U16_NEXT(src, titleLimit, idx, c);
|
||||
if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
|
||||
if(UCASE_NONE!=ucase_getType(NULL, c)) {
|
||||
break; /* cased letter at [titleStart..titleLimit[ */
|
||||
}
|
||||
}
|
||||
@ -651,8 +655,8 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter,
|
||||
csc.cpStart=titleStart;
|
||||
csc.cpLimit=titleLimit;
|
||||
const UChar *s;
|
||||
c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s,
|
||||
csm->locale, &locCache);
|
||||
c=ucase_toFullTitle(NULL, c, utf16_caseContextIterator, &csc, &s,
|
||||
NULL, &locCache);
|
||||
destIndex=appendResult(dest, destIndex, destCapacity, c, s,
|
||||
titleLimit-titleStart, edits);
|
||||
if(destIndex<0) {
|
||||
@ -662,7 +666,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter,
|
||||
|
||||
/* Special case Dutch IJ titlecasing */
|
||||
if (titleStart+1 < idx &&
|
||||
ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH &&
|
||||
locCache == UCASE_LOC_DUTCH &&
|
||||
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
|
||||
(src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
|
||||
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
|
||||
@ -678,7 +682,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter,
|
||||
|
||||
/* lowercase [titleLimit..index[ */
|
||||
if(titleLimit<idx) {
|
||||
if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
if((CaseMapFriend::options(csm)&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
/* Normal operation: Lowercase the rest of the word. */
|
||||
destIndex+=
|
||||
_caseMap(
|
||||
@ -1193,7 +1197,7 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const UChar *s, int32_t i,
|
||||
* for each character.
|
||||
* TODO: Try to re-consolidate one way or another with the non-Greek function.
|
||||
*/
|
||||
int32_t toUpper(const UCaseMap *csm,
|
||||
int32_t toUpper(const CaseMap & /* unused csm */,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
Edits *edits,
|
||||
@ -1206,7 +1210,7 @@ int32_t toUpper(const UCaseMap *csm,
|
||||
UChar32 c;
|
||||
U16_NEXT(src, nextIndex, srcLength, c);
|
||||
uint32_t nextState = 0;
|
||||
int32_t type = ucase_getTypeOrIgnorable(csm->csp, c);
|
||||
int32_t type = ucase_getTypeOrIgnorable(NULL, c);
|
||||
if ((type & UCASE_IGNORABLE) != 0) {
|
||||
// c is case-ignorable
|
||||
nextState |= (state & AFTER_CASED);
|
||||
@ -1253,7 +1257,7 @@ int32_t toUpper(const UCaseMap *csm,
|
||||
(data & HAS_ACCENT) != 0 &&
|
||||
numYpogegrammeni == 0 &&
|
||||
(state & AFTER_CASED) == 0 &&
|
||||
!isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) {
|
||||
!isFollowedByCasedLetter(NULL, src, nextIndex, srcLength)) {
|
||||
// Keep disjunctive "or" with (only) a tonos.
|
||||
// We use the same "word boundary" conditions as for the Final_Sigma test.
|
||||
if (i == nextIndex) {
|
||||
@ -1322,7 +1326,7 @@ int32_t toUpper(const UCaseMap *csm,
|
||||
}
|
||||
} else {
|
||||
const UChar *s;
|
||||
c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache);
|
||||
c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &locCache);
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
nextIndex - i, edits);
|
||||
if (destIndex < 0) {
|
||||
@ -1343,7 +1347,7 @@ U_NAMESPACE_END
|
||||
/* functions available in the common library (for unistr_case.cpp) */
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
ustrcase_internalToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
@ -1360,13 +1364,13 @@ ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
}
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
ustrcase_internalToUpper(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
int32_t locCache = csm->locCache;
|
||||
if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) {
|
||||
int32_t locCache = CaseMapFriend::caseLocale(csm);
|
||||
if (locCache == UCASE_LOC_GREEK) {
|
||||
return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, errorCode);
|
||||
}
|
||||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
@ -1381,7 +1385,7 @@ ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
}
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
ustrcase_internalFold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
@ -1394,7 +1398,7 @@ ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
UChar32 c;
|
||||
U16_NEXT(src, srcIndex, srcLength, c);
|
||||
const UChar *s;
|
||||
c = ucase_toFullFolding(csm->csp, c, &s, csm->options);
|
||||
c = ucase_toFullFolding(NULL, c, &s, CaseMapFriend::options(csm));
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
srcIndex - cpStart, edits);
|
||||
if (destIndex < 0) {
|
||||
@ -1407,7 +1411,7 @@ ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_map(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
@ -1448,7 +1452,7 @@ ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
ustrcase_mapWithOverlap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UStringCaseMapper *stringCaseMapper,
|
||||
@ -1519,55 +1523,50 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseMap csm=UCASEMAP_INITIALIZER;
|
||||
csm.csp=ucase_getSingleton();
|
||||
csm.options=options;
|
||||
return ustrcase_mapWithOverlap(
|
||||
&csm, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
CaseMap(options, *pErrorCode), UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalFold, *pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_toLowerWithEdits(const UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
int32_t CaseMap::toLower(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) const {
|
||||
return ustrcase_map(
|
||||
csm, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
*this, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToLower, edits, errorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_toUpperWithEdits(const UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
int32_t CaseMap::toUpper(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) const {
|
||||
return ustrcase_map(
|
||||
csm, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
*this, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToUpper, edits, errorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucasemap_foldCaseWithEdits(const UCaseMap *csm,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
int32_t CaseMap::foldCase(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) const {
|
||||
return ustrcase_map(
|
||||
csm, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
*this, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalFold, edits, errorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* case-insensitive string comparisons -------------------------------------- */
|
||||
|
||||
/*
|
||||
|
@ -18,69 +18,71 @@
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "uassert.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "ucase.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_CFUNC void
|
||||
ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale) {
|
||||
/*
|
||||
* We could call ucasemap_setLocale(), but here we really only care about
|
||||
* the initial language subtag, we need not return the real string via
|
||||
* ucasemap_getLocale(), and we don't care about only getting "x" from
|
||||
* "x-some-thing" etc.
|
||||
*
|
||||
* We ignore locales with a longer-than-3 initial subtag.
|
||||
*
|
||||
* We also do not fill in the locCache because it is rarely used,
|
||||
* and not worth setting unless we reuse it for many case mapping operations.
|
||||
* (That's why UCaseMap was created.)
|
||||
*/
|
||||
int i;
|
||||
char c;
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/* the internal functions require locale!=NULL */
|
||||
if(locale==NULL) {
|
||||
// Do not call uprv_getDefaultLocaleID() because that does not see
|
||||
// changes to the default locale via uloc_setDefault().
|
||||
// It would also be inefficient if used frequently because uprv_getDefaultLocaleID()
|
||||
// does not cache the locale ID.
|
||||
//
|
||||
// Unfortunately, uloc_getDefault() has many dependencies.
|
||||
// We only care about a small set of language subtags,
|
||||
// and we do not need the locale ID to be canonicalized.
|
||||
//
|
||||
// Best is to not call case mapping functions with a NULL locale ID.
|
||||
locale=uloc_getDefault();
|
||||
}
|
||||
for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
|
||||
csm->locale[i]=c;
|
||||
}
|
||||
if(i<=3) {
|
||||
csm->locale[i]=0; /* Up to 3 non-separator characters. */
|
||||
} else {
|
||||
csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */
|
||||
// TODO: new casemap_locale.cpp
|
||||
|
||||
CaseMap::CaseMap(const Locale &loc, int32_t caseLoc, uint32_t opts, UErrorCode & /*errorCode*/) :
|
||||
caseLocale(caseLoc), options(opts), locale(loc)
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
, iter(NULL)
|
||||
#endif
|
||||
{
|
||||
if (caseLoc == 0) { // UCASE_LOC_UNKNOWN
|
||||
setCaseLocale(locale.getBaseName());
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
|
||||
* Do this fast because it is called with every function call.
|
||||
*/
|
||||
static inline void
|
||||
setTempCaseMap(UCaseMap *csm, const char *locale) {
|
||||
if(csm->csp==NULL) {
|
||||
csm->csp=ucase_getSingleton();
|
||||
}
|
||||
if(locale!=NULL && locale[0]==0) {
|
||||
csm->locale[0]=0;
|
||||
} else {
|
||||
ustrcase_setTempCaseMapLocale(csm, locale);
|
||||
CaseMap::CaseMap(const Locale &locale, uint32_t options, UErrorCode &errorCode) :
|
||||
CaseMap(locale, /* UCASE_LOC_UNKNOWN = */ 0, options, errorCode) {}
|
||||
|
||||
// small optimization for localeID=="", a little slower otherwise
|
||||
CaseMap::CaseMap(const char *localeID, uint32_t options, UErrorCode &errorCode) :
|
||||
CaseMap(Locale::getRoot(), /* UCASE_LOC_ROOT = */ 1, options, errorCode) {
|
||||
if (localeID == NULL || *localeID != 0) {
|
||||
setLocale(localeID, errorCode); // not root
|
||||
}
|
||||
}
|
||||
|
||||
CaseMap::~CaseMap() {
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
delete iter;
|
||||
#endif
|
||||
}
|
||||
|
||||
void CaseMap::setCaseLocale(const char *localeID) {
|
||||
U_ASSERT(localeID != NULL);
|
||||
caseLocale = UCASE_LOC_UNKNOWN;
|
||||
ucase_getCaseLocale(localeID, &caseLocale);
|
||||
}
|
||||
|
||||
void CaseMap::setLocale(const char *localeID, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (localeID == NULL) {
|
||||
locale = Locale::getDefault();
|
||||
localeID = locale.getBaseName();
|
||||
} else {
|
||||
locale = Locale(localeID);
|
||||
if (locale.isBogus()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
localeID = "";
|
||||
}
|
||||
}
|
||||
setCaseLocale(localeID);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
/* public API functions */
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
@ -88,10 +90,9 @@ u_strToLower(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseMap csm=UCASEMAP_INITIALIZER;
|
||||
setTempCaseMap(&csm, locale);
|
||||
CaseMap csm(locale, 0, *pErrorCode);
|
||||
return ustrcase_mapWithOverlap(
|
||||
&csm, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
csm, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToLower, *pErrorCode);
|
||||
@ -102,10 +103,9 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode) {
|
||||
UCaseMap csm=UCASEMAP_INITIALIZER;
|
||||
setTempCaseMap(&csm, locale);
|
||||
CaseMap csm(locale, 0, *pErrorCode);
|
||||
return ustrcase_mapWithOverlap(
|
||||
&csm, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
csm, UCASEMAP_BREAK_ITERATOR_NULL
|
||||
dest, destCapacity,
|
||||
src, srcLength,
|
||||
ustrcase_internalToUpper, *pErrorCode);
|
||||
|
@ -744,11 +744,12 @@ TestUCaseMap(void) {
|
||||
if(0!=strcmp(locale, "tr")) {
|
||||
log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", locale);
|
||||
}
|
||||
/* overly long locale IDs get truncated to their language code to avoid unnecessary allocation */
|
||||
/* overly long locale IDs may get truncated to their language code to avoid unnecessary allocation */
|
||||
ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode);
|
||||
locale=ucasemap_getLocale(csm);
|
||||
if(0!=strcmp(locale, "i-klingon")) {
|
||||
log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s!=\"i-klingon\"\n", locale);
|
||||
if(0!=strncmp(locale, "i-klingon", 9)) {
|
||||
log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s\n"
|
||||
" does not start with \"i-klingon\"\n", locale);
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
|
Loading…
Reference in New Issue
Block a user