ICU-12671 Fixed a problem in uloc_toLanguageTag. When LDML attribute is used withtout LDML keywords, the function did not prodce -u- at the right place. This change fixed the LDML attribute handling. The test cases in the ticket were integrated to cintltst.

X-SVN-Rev: 39076
This commit is contained in:
Yoshito Umaoka 2016-08-19 23:29:17 +00:00
parent e864010e9a
commit 8a5b579dd2
2 changed files with 56 additions and 43 deletions

View File

@ -579,6 +579,14 @@ _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool l
cmp = LDMLEXT - *(cur->key);
} else {
cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
/* Both are u extension keys - we need special handling for 'attribute' */
if (cmp != 0) {
if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
cmp = 1;
} else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
cmp = -1;
}
}
}
} else {
cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
@ -894,7 +902,6 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
int32_t attrBufLength = 0;
UBool isAttribute = FALSE;
UEnumeration *keywordEnum = NULL;
int32_t reslen = 0;
@ -921,7 +928,6 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
UBool isBcpUExt;
while (TRUE) {
isAttribute = FALSE;
key = uenum_next(keywordEnum, NULL, status);
if (key == NULL) {
break;
@ -943,7 +949,6 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
/* special keyword used for representing Unicode locale attributes */
if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
isAttribute = TRUE;
if (len > 0) {
int32_t i = 0;
while (TRUE) {
@ -986,6 +991,9 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
}
}
/* for a place holder ExtensionListEntry */
bcpKey = LOCALE_ATTRIBUTE_KEY;
bcpValue = NULL;
}
} else if (isBcpUExt) {
bcpKey = uloc_toUnicodeLocaleKey(key);
@ -1065,22 +1073,20 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
}
if (!isAttribute) {
/* create ExtensionListEntry */
ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
if (ext == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
ext->key = bcpKey;
ext->value = bcpValue;
/* create ExtensionListEntry */
ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
if (ext == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
ext->key = bcpKey;
ext->value = bcpValue;
if (!_addExtensionToList(&firstExt, ext, TRUE)) {
uprv_free(ext);
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
if (!_addExtensionToList(&firstExt, ext, TRUE)) {
uprv_free(ext);
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
}
@ -1103,12 +1109,9 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
UBool startLDMLExtension = FALSE;
attr = firstAttr;
ext = firstExt;
do {
if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
/* write LDML singleton extension */
for (ext = firstExt; ext; ext = ext->next) {
if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
/* first LDML u singlton extension */
if (reslen < capacity) {
*(appendAt + reslen) = SEP;
}
@ -1122,7 +1125,20 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
/* write out the sorted BCP47 attributes, extensions and private use */
if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
/* write the value for the attributes */
for (attr = firstAttr; attr; attr = attr->next) {
if (reslen < capacity) {
*(appendAt + reslen) = SEP;
}
reslen++;
len = (int32_t)uprv_strlen(attr->attribute);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
}
reslen += len;
}
} else {
if (reslen < capacity) {
*(appendAt + reslen) = SEP;
}
@ -1141,23 +1157,8 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
}
reslen += len;
ext = ext->next;
} else if (attr) {
/* write the value for the attributes */
if (reslen < capacity) {
*(appendAt + reslen) = SEP;
}
reslen++;
len = (int32_t)uprv_strlen(attr->attribute);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
}
reslen += len;
attr = attr->next;
}
} while (attr != NULL || ext != NULL);
}
}
cleanup:
/* clean up */

View File

@ -5800,6 +5800,13 @@ const char* const locale_to_langtag[][3] = {
{"en@x=elmer", "en-x-elmer", "en-x-elmer"},
{"@x=elmer;a=exta", "und-a-exta-x-elmer", "und-a-exta-x-elmer"},
{"en_US@attribute=attr1-attr2;calendar=gregorian", "en-US-u-attr1-attr2-ca-gregory", "en-US-u-attr1-attr2-ca-gregory"},
/* #12671 */
{"en@a=bar;attribute=baz", "en-a-bar-u-baz", "en-a-bar-u-baz"},
{"en@a=bar;attribute=baz;x=u-foo", "en-a-bar-u-baz-x-u-foo", "en-a-bar-u-baz-x-u-foo"},
{"en@attribute=baz", "en-u-baz", "en-u-baz"},
{"en@attribute=baz;calendar=islamic-civil", "en-u-baz-ca-islamic-civil", "en-u-baz-ca-islamic-civil"},
{"en@a=bar;calendar=islamic-civil;x=u-foo", "en-a-bar-u-ca-islamic-civil-x-u-foo", "en-a-bar-u-ca-islamic-civil-x-u-foo"},
{"en@a=bar;attribute=baz;calendar=islamic-civil;x=u-foo", "en-a-bar-u-baz-ca-islamic-civil-x-u-foo", "en-a-bar-u-baz-ca-islamic-civil-x-u-foo"},
{NULL, NULL, NULL}
};
@ -5865,7 +5872,6 @@ static const struct {
const char *locID;
int32_t len;
} langtag_to_locale[] = {
{"ja-u-ijkl-efgh-abcd-ca-japanese-xx-yyy-zzz-kn", "ja@attribute=abcd-efgh-ijkl;calendar=japanese;colnumeric=yes;xx=yyy-zzz", FULL_LENGTH},
{"en", "en", FULL_LENGTH},
{"en-us", "en_US", FULL_LENGTH},
{"und-US", "_US", FULL_LENGTH},
@ -5909,9 +5915,15 @@ static const struct {
{"de-u-kn-co-phonebk", "de@collation=phonebook;colnumeric=yes", FULL_LENGTH},
{"en-u-attr2-attr1-kn-kb", "en@attribute=attr1-attr2;colbackwards=yes;colnumeric=yes", FULL_LENGTH},
{"ja-u-ijkl-efgh-abcd-ca-japanese-xx-yyy-zzz-kn", "ja@attribute=abcd-efgh-ijkl;calendar=japanese;colnumeric=yes;xx=yyy-zzz", FULL_LENGTH},
{"de-u-xc-xphonebk-co-phonebk-ca-buddhist-mo-very-lo-extensi-xd-that-de-should-vc-probably-xz-killthebuffer",
"de@calendar=buddhist;collation=phonebook;de=should;lo=extensi;mo=very;vc=probably;xc=xphonebk;xd=that;xz=yes", 91},
/* #12761 */
{"en-a-bar-u-baz", "en@a=bar;attribute=baz", FULL_LENGTH},
{"en-a-bar-u-baz-x-u-foo", "en@a=bar;attribute=baz;x=u-foo", FULL_LENGTH},
{"en-u-baz", "en@attribute=baz", FULL_LENGTH},
{"en-u-baz-ca-islamic-civil", "en@attribute=baz;calendar=islamic-civil", FULL_LENGTH},
{"en-a-bar-u-ca-islamic-civil-x-u-foo", "en@a=bar;calendar=islamic-civil;x=u-foo", FULL_LENGTH},
{"en-a-bar-u-baz-ca-islamic-civil-x-u-foo", "en@a=bar;attribute=baz;calendar=islamic-civil;x=u-foo", FULL_LENGTH},
{NULL, NULL, 0}
};