ICU-8262 reorg code for uloc_getDisplayName, add regression test
X-SVN-Rev: 29760
This commit is contained in:
parent
389c986a20
commit
45f8abf19f
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2010, International Business Machines
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -427,8 +427,9 @@ uloc_getDisplayVariant(const char *locale,
|
||||
uloc_getVariant, _kVariants, pErrorCode);
|
||||
}
|
||||
|
||||
/* TODO:dougfelt remove */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayName(const char *locale,
|
||||
uloc_getDisplayNameOld(const char *locale,
|
||||
const char *displayLocale,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode)
|
||||
@ -704,6 +705,286 @@ uloc_getDisplayName(const char *locale,
|
||||
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
|
||||
}
|
||||
|
||||
|
||||
/* Instead of having a separate pass for 'special' patterns, reintegrate the two
|
||||
* so we don't get bitten by preflight bugs again. We can be reasonably efficient
|
||||
* without two separate code paths, this code isn't that performance-critical.
|
||||
*
|
||||
* This code is general enough to deal with patterns that have a prefix or swap the
|
||||
* language and remainder components, since we gave developers enough rope to do such
|
||||
* things if they futz with the pattern data. But since we don't give them a way to
|
||||
* specify a pattern for arbitrary combinations of components, there's not much use in
|
||||
* that. I don't think our data includes such patterns, the only variable I know if is
|
||||
* whether there is a space before the open paren, or not. Oh, and zh uses different
|
||||
* chars than the standard open/close paren (which ja and ko use, btw).
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayName(const char *locale,
|
||||
const char *displayLocale,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
static const UChar defaultSeparator[3] = { 0x002c, 0x0020, 0x0000 }; /* comma + space */
|
||||
static const int32_t defaultSepLen = 2;
|
||||
static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
|
||||
static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
|
||||
static const int32_t subLen = 3;
|
||||
static const UChar defaultPattern[10] = {
|
||||
0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
|
||||
}; /* {0} ({1}) */
|
||||
static const int32_t defaultPatLen = 9;
|
||||
static const int32_t defaultSub0Pos = 0;
|
||||
static const int32_t defaultSub1Pos = 5;
|
||||
|
||||
int32_t length; /* of formatted result */
|
||||
|
||||
const UChar *separator;
|
||||
int32_t sepLen = 0;
|
||||
const UChar *pattern;
|
||||
int32_t patLen = 0;
|
||||
int32_t sub0Pos, sub1Pos;
|
||||
|
||||
UBool haveLang = TRUE; /* assume true, set false if we find we don't have
|
||||
a lang component in the locale */
|
||||
UBool haveRest = TRUE; /* assume true, set false if we find we don't have
|
||||
any other component in the locale */
|
||||
UBool retry = FALSE; /* set true if we need to retry, see below */
|
||||
|
||||
int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status);
|
||||
UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern,
|
||||
NULL, &status);
|
||||
|
||||
separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status);
|
||||
pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status);
|
||||
|
||||
ures_close(dspbundle);
|
||||
ures_close(locbundle);
|
||||
}
|
||||
|
||||
/* If we couldn't find any data, then use the defaults */
|
||||
if(sepLen == 0) {
|
||||
separator = defaultSeparator;
|
||||
sepLen = defaultSepLen;
|
||||
}
|
||||
|
||||
if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
|
||||
pattern=defaultPattern;
|
||||
patLen=defaultPatLen;
|
||||
sub0Pos=defaultSub0Pos;
|
||||
sub1Pos=defaultSub1Pos;
|
||||
} else { /* non-default pattern */
|
||||
UChar *p0=u_strstr(pattern, sub0);
|
||||
UChar *p1=u_strstr(pattern, sub1);
|
||||
if (p0==NULL || p1==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
sub0Pos=p0-pattern;
|
||||
sub1Pos=p1-pattern;
|
||||
if (sub1Pos < sub0Pos) { /* a very odd pattern */
|
||||
int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
|
||||
langi=1;
|
||||
}
|
||||
}
|
||||
|
||||
/* We loop here because there is one case in which after the first pass we could need to
|
||||
* reextract the data. If there's initial padding before the first element, we put in
|
||||
* the padding and then write that element. If it turns out there's no second element,
|
||||
* we didn't need the padding. If we do need the data (no preflight), and the first element
|
||||
* would have fit but for the padding, we need to reextract. In this case (only) we
|
||||
* adjust the parameters so padding is not added, and repeat.
|
||||
*/
|
||||
do {
|
||||
UChar* p=dest;
|
||||
int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
|
||||
int32_t langLen=0; /* length of language substitution */
|
||||
int32_t langPos=0; /* position in output of language substitution */
|
||||
int32_t restLen=0; /* length of 'everything else' substitution */
|
||||
int32_t restPos=0; /* position in output of 'everything else' substitution */
|
||||
UEnumeration* kenum; /* keyword enumeration */
|
||||
|
||||
/* prefix of pattern, extremely likely to be empty */
|
||||
if(sub0Pos) {
|
||||
if(destCapacity >= sub0Pos) {
|
||||
while (patPos < sub0Pos) {
|
||||
*p++ = pattern[patPos++];
|
||||
}
|
||||
} else {
|
||||
patPos=sub0Pos;
|
||||
}
|
||||
length=sub0Pos;
|
||||
} else {
|
||||
length=0;
|
||||
}
|
||||
|
||||
for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
|
||||
UBool subdone = FALSE; /* set true when ready to move to next substitution */
|
||||
|
||||
/* prep p and cap for calls to get display components, pin cap to 0 since
|
||||
they complain if cap is negative */
|
||||
int32_t cap=destCapacity-length;
|
||||
if (cap <= 0) {
|
||||
cap=0;
|
||||
} else {
|
||||
p=dest+length;
|
||||
}
|
||||
|
||||
if (subi == langi) { /* {0}*/
|
||||
if(haveLang) {
|
||||
langPos=length;
|
||||
langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode);
|
||||
length+=langLen;
|
||||
haveLang=langLen>0;
|
||||
}
|
||||
subdone=TRUE;
|
||||
} else { /* {1} */
|
||||
if(!haveRest) {
|
||||
subdone=TRUE;
|
||||
} else {
|
||||
int32_t len; /* length of component (plus other stuff) we just fetched */
|
||||
switch(resti++) {
|
||||
case 0:
|
||||
restPos=length;
|
||||
len=uloc_getDisplayScript(locale, displayLocale, p, cap, pErrorCode);
|
||||
break;
|
||||
case 1:
|
||||
len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode);
|
||||
break;
|
||||
case 2:
|
||||
len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
|
||||
break;
|
||||
case 3:
|
||||
kenum = uloc_openKeywords(locale, pErrorCode);
|
||||
/* fall through */
|
||||
default: {
|
||||
const char* kw=uenum_next(kenum, &len, pErrorCode);
|
||||
if (kw == NULL) {
|
||||
uenum_close(kenum);
|
||||
len=0; /* mark that we didn't add a component */
|
||||
subdone=TRUE;
|
||||
} else {
|
||||
/* incorporating this behavior into the loop made it even more complex,
|
||||
so just special case it here */
|
||||
len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode);
|
||||
if(len) {
|
||||
if(len < cap) {
|
||||
p[len]=0x3d; /* '=', assume we'll need it */
|
||||
}
|
||||
len+=1;
|
||||
|
||||
/* adjust for call to get keyword */
|
||||
cap-=len;
|
||||
if(cap <= 0) {
|
||||
cap=0;
|
||||
} else {
|
||||
p+=len;
|
||||
}
|
||||
}
|
||||
/* reset for call below */
|
||||
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
}
|
||||
int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale,
|
||||
p, cap, pErrorCode);
|
||||
if(len) {
|
||||
if(vlen==0) {
|
||||
--len; /* remove unneeded '=' */
|
||||
}
|
||||
/* restore cap and p to what they were at start */
|
||||
cap=destCapacity-length;
|
||||
if(cap <= 0) {
|
||||
cap=0;
|
||||
} else {
|
||||
p=dest+length;
|
||||
}
|
||||
}
|
||||
len+=vlen; /* total we added for key + '=' + value */
|
||||
}
|
||||
} break;
|
||||
} /* end switch */
|
||||
|
||||
if (len>0) {
|
||||
/* we addeed a component, so add separator and write it if there's room. */
|
||||
if(len+sepLen<=cap) {
|
||||
p+=len;
|
||||
for(int32_t i=0;i<sepLen;++i) {
|
||||
*p++=separator[i];
|
||||
}
|
||||
}
|
||||
length+=len+sepLen;
|
||||
} else if(subdone) {
|
||||
/* remove separator if we added it */
|
||||
if (length!=restPos) {
|
||||
length-=sepLen;
|
||||
}
|
||||
restLen=length-restPos;
|
||||
haveRest=restLen>0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if(subdone) {
|
||||
if(haveLang && haveRest) {
|
||||
/* append internal portion of pattern, the first time,
|
||||
or last portion of pattern the second time */
|
||||
int32_t padLen;
|
||||
patPos+=subLen;
|
||||
padLen=(subi==0 ? sub1Pos : patLen)-patPos;
|
||||
if(length+padLen < destCapacity) {
|
||||
p=dest+length;
|
||||
for(int32_t i=0;i<padLen;++i) {
|
||||
*p++=pattern[patPos++];
|
||||
}
|
||||
} else {
|
||||
patPos+=padLen;
|
||||
}
|
||||
length+=padLen;
|
||||
} else if(subi==0) {
|
||||
/* don't have first component, reset for second component */
|
||||
sub0Pos=0;
|
||||
length=0;
|
||||
} else if(length>0) {
|
||||
/* true length is the length of just the component we got. */
|
||||
length=haveLang?langLen:restLen;
|
||||
if(dest && sub0Pos!=0) {
|
||||
if (sub0Pos+length<=destCapacity) {
|
||||
/* first component not at start of result,
|
||||
but we have full component in buffer. */
|
||||
u_memmove(dest, dest+(haveLang?langPos:restPos), length);
|
||||
} else {
|
||||
/* would have fit, but didn't because of pattern prefix. */
|
||||
sub0Pos=0; /* stops initial padding (and a second retry,
|
||||
so we won't end up here again) */
|
||||
retry=TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
++subi; /* move on to next substitution */
|
||||
}
|
||||
}
|
||||
} while(retry);
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayKeyword(const char* keyword,
|
||||
const char* displayLocale,
|
||||
|
@ -561,6 +561,85 @@ static void TestSimpleResourceInfo() {
|
||||
cleanUpDataTable();
|
||||
}
|
||||
|
||||
/* obviously, on non-ascii platforms this is useless, but it's test/debug code */
|
||||
/* if len < 0, we convert until we hit UChar 0x0000, which is not output. will add trailing null
|
||||
* if there's room but won't be included in result. result < 0 indicates an error.
|
||||
* Returns the number of chars written (not those that would be written if there's enough room.*/
|
||||
static int32_t UCharsToEscapedAscii(const UChar* utext, int32_t len, char* resultChars, int32_t buflen) {
|
||||
#if U_CHARSET_FAMILY != U_ASCII_FAMILY
|
||||
return -1;
|
||||
#else
|
||||
static const UChar ESCAPE_MAP[] = {
|
||||
/*a*/ 0x61, 0x07,
|
||||
/*b*/ 0x62, 0x08,
|
||||
/*e*/ 0x65, 0x1b,
|
||||
/*f*/ 0x66, 0x0c,
|
||||
/*n*/ 0x6E, 0x0a,
|
||||
/*r*/ 0x72, 0x0d,
|
||||
/*t*/ 0x74, 0x09,
|
||||
/*v*/ 0x76, 0x0b
|
||||
};
|
||||
static const int32_t ESCAPE_MAP_LENGTH = sizeof(ESCAPE_MAP)/sizeof(ESCAPE_MAP[0]);
|
||||
static const char HEX_DIGITS[] = {
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
||||
0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66
|
||||
};
|
||||
int32_t i, j, v;
|
||||
int32_t resultLen = 0;
|
||||
const int32_t limit = len<0 ? buflen : len; /* buflen is long enough to hit the buffer limit */
|
||||
const int32_t escapeLimit1 = buflen-2;
|
||||
const int32_t escapeLimit2 = buflen-6;
|
||||
UChar uc;
|
||||
|
||||
if(utext==NULL || resultChars==NULL || buflen<0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for(i=0;i<limit && resultLen<buflen;++i) {
|
||||
uc=utext[i];
|
||||
if(len<0 && uc==0) {
|
||||
break;
|
||||
}
|
||||
if(uc<0x20) {
|
||||
for(j=0;j<ESCAPE_MAP_LENGTH;j+=2) {
|
||||
if(uc==ESCAPE_MAP[j+1]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(j<ESCAPE_MAP_LENGTH) {
|
||||
if(resultLen>escapeLimit1) {
|
||||
break;
|
||||
}
|
||||
resultChars[resultLen++]='\\';
|
||||
resultChars[resultLen++]=ESCAPE_MAP[j];
|
||||
continue;
|
||||
}
|
||||
} else if(uc<0x7f) {
|
||||
resultChars[resultLen++] = uc;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(resultLen>escapeLimit2) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* have to escape the uchar */
|
||||
resultChars[resultLen++]='\\';
|
||||
resultChars[resultLen++]='u';
|
||||
resultChars[resultLen++]=HEX_DIGITS[(uc>>12)&0xff];
|
||||
resultChars[resultLen++]=HEX_DIGITS[(uc>>8)&0xff];
|
||||
resultChars[resultLen++]=HEX_DIGITS[(uc>>4)&0xff];
|
||||
resultChars[resultLen++]=HEX_DIGITS[uc&0xff];
|
||||
}
|
||||
|
||||
if(resultLen<buflen) {
|
||||
resultChars[resultLen] = 0;
|
||||
}
|
||||
|
||||
return resultLen;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Jitterbug 2439 -- markus 20030425
|
||||
*
|
||||
@ -634,6 +713,73 @@ static void TestDisplayNames()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* test that we properly preflight and return data when there's a non-default pattern,
|
||||
see ticket #8262. */
|
||||
{
|
||||
int32_t i, j, v;
|
||||
static const char *locale="az_Cyrl";
|
||||
static const char *displayLocale="ja";
|
||||
static const char *expectedChars =
|
||||
"\\u30a2\\u30bc\\u30eb\\u30d0\\u30a4\\u30b8\\u30e3\\u30f3\\u8a9e"
|
||||
"(\\u30ad\\u30ea\\u30eb\\u6587\\u5b57)";
|
||||
UErrorCode ec=U_ZERO_ERROR;
|
||||
UChar result[256];
|
||||
int32_t len;
|
||||
int32_t preflightLen=uloc_getDisplayName(locale, displayLocale, NULL, 0, &ec);
|
||||
/* inconvenient semantics when preflighting, this condition is expected... */
|
||||
if(ec==U_BUFFER_OVERFLOW_ERROR) {
|
||||
ec=U_ZERO_ERROR;
|
||||
}
|
||||
len=uloc_getDisplayName(locale, displayLocale, result, LENGTHOF(result), &ec);
|
||||
if(U_FAILURE(ec)) {
|
||||
log_err("uloc_getDisplayName(%s, %s...) returned error: %s",
|
||||
locale, displayLocale, u_errorName(ec));
|
||||
} else {
|
||||
UChar *expected=CharsToUChars(expectedChars);
|
||||
int32_t expectedLen=u_strlen(expected);
|
||||
|
||||
if(len!=expectedLen) {
|
||||
log_err("uloc_getDisplayName(%s, %s...) returned string of length %d, expected length %d",
|
||||
locale, displayLocale, len, expectedLen);
|
||||
} else if(preflightLen!=expectedLen) {
|
||||
log_err("uloc_getDisplayName(%s, %s...) returned preflight length %d, expected length %d",
|
||||
locale, displayLocale, preflightLen, expectedLen);
|
||||
} else if(u_strncmp(result, expected, len)) {
|
||||
int32_t cap=len*6+1; /* worst case + space for trailing null */
|
||||
char* resultChars=malloc(cap);
|
||||
int32_t resultCharsLen=UCharsToEscapedAscii(result, len, resultChars, cap);
|
||||
if(resultCharsLen<0 || resultCharsLen<cap-1) {
|
||||
log_err("uloc_getDisplayName(%s, %s...) mismatch", locale, displayLocale);
|
||||
} else {
|
||||
log_err("uloc_getDisplayName(%s, %s...) returned '%s' but expected '%s'",
|
||||
locale, displayLocale, resultChars, expectedChars);
|
||||
}
|
||||
free(resultChars);
|
||||
resultChars=NULL;
|
||||
} else {
|
||||
/* test all buffer sizes */
|
||||
for(i=len+1;i>=0;--i) {
|
||||
len=uloc_getDisplayName(locale, displayLocale, result, i, &ec);
|
||||
if(ec==U_BUFFER_OVERFLOW_ERROR) {
|
||||
ec=U_ZERO_ERROR;
|
||||
}
|
||||
if(U_FAILURE(ec)) {
|
||||
log_err("using buffer of length %d returned error %s", i, u_errorName(ec));
|
||||
break;
|
||||
}
|
||||
if(len!=expectedLen) {
|
||||
log_err("with buffer of length %d, expected length %d but got %d", i, expectedLen, len);
|
||||
break;
|
||||
}
|
||||
/* There's no guarantee about what's in the buffer if we've overflowed, in particular,
|
||||
* we don't know that it's been filled, so no point in checking. */
|
||||
}
|
||||
}
|
||||
|
||||
free(expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user