ICU-20158 Pass ByteSink from Locale::forLanguageTag() to uloc_forLanguageTag().
This eliminates the need for a scratch buffer in Locale::forLanguageTag() and also the need for counting bytes required in uloc_forLanguageTag(), something that ByteSink will now handle correctly.
This commit is contained in:
parent
5b4ac1c77d
commit
47ecbc4915
@ -869,43 +869,16 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
|
||||
// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
|
||||
// and then Locale::init(), instead of just calling the normal constructor.
|
||||
|
||||
// All simple language tags will have the exact same length as ICU locale
|
||||
// ID strings as they have as BCP-47 strings (like "en_US" for "en-US").
|
||||
CharString localeID;
|
||||
int32_t resultCapacity = tag.size();
|
||||
|
||||
char* buffer;
|
||||
int32_t parsedLength, reslen;
|
||||
|
||||
for (;;) {
|
||||
buffer = localeID.getAppendBuffer(
|
||||
/*minCapacity=*/resultCapacity,
|
||||
/*desiredCapacityHint=*/resultCapacity,
|
||||
resultCapacity,
|
||||
status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
reslen = ulocimp_forLanguageTag(
|
||||
int32_t parsedLength;
|
||||
{
|
||||
CharStringByteSink sink(&localeID);
|
||||
ulocimp_forLanguageTag(
|
||||
tag.data(),
|
||||
tag.length(),
|
||||
buffer,
|
||||
resultCapacity,
|
||||
sink,
|
||||
&parsedLength,
|
||||
&status);
|
||||
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
// For all BCP-47 language tags that use extensions, the corresponding
|
||||
// ICU locale ID will be longer but uloc_forLanguageTag() does compute
|
||||
// the exact length needed so this memory reallocation will be done at
|
||||
// most once.
|
||||
resultCapacity = reslen;
|
||||
status = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
@ -917,15 +890,6 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
|
||||
return result;
|
||||
}
|
||||
|
||||
localeID.append(buffer, reslen, status);
|
||||
if (status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
status = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
result.init(localeID.data(), /*canonicalize=*/FALSE);
|
||||
if (result.isBogus()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
|
@ -1645,9 +1645,8 @@ cleanup:
|
||||
}
|
||||
|
||||
|
||||
static int32_t
|
||||
_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
|
||||
int32_t reslen = 0;
|
||||
static void
|
||||
_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) {
|
||||
int32_t i, n;
|
||||
int32_t len;
|
||||
ExtensionListEntry *kwdFirst = NULL;
|
||||
@ -1657,7 +1656,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
|
||||
UBool posixVariant = FALSE;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Determine if variants already exists */
|
||||
@ -1714,10 +1713,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
|
||||
|
||||
if (U_SUCCESS(*status) && posixVariant) {
|
||||
len = (int32_t) uprv_strlen(_POSIX);
|
||||
if (reslen < capacity) {
|
||||
uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
|
||||
}
|
||||
reslen += len;
|
||||
sink.Append(_POSIX, len);
|
||||
}
|
||||
|
||||
if (U_SUCCESS(*status) && kwdFirst != NULL) {
|
||||
@ -1725,37 +1721,21 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
|
||||
UBool firstValue = TRUE;
|
||||
kwd = kwdFirst;
|
||||
do {
|
||||
if (reslen < capacity) {
|
||||
if (firstValue) {
|
||||
/* '@' */
|
||||
*(appendAt + reslen) = LOCALE_EXT_SEP;
|
||||
firstValue = FALSE;
|
||||
} else {
|
||||
/* ';' */
|
||||
*(appendAt + reslen) = LOCALE_KEYWORD_SEP;
|
||||
}
|
||||
if (firstValue) {
|
||||
sink.Append("@", 1);
|
||||
firstValue = FALSE;
|
||||
} else {
|
||||
sink.Append(";", 1);
|
||||
}
|
||||
reslen++;
|
||||
|
||||
/* key */
|
||||
len = (int32_t)uprv_strlen(kwd->key);
|
||||
if (reslen < capacity) {
|
||||
uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
|
||||
}
|
||||
reslen += len;
|
||||
|
||||
/* '=' */
|
||||
if (reslen < capacity) {
|
||||
*(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
|
||||
}
|
||||
reslen++;
|
||||
sink.Append(kwd->key, len);
|
||||
sink.Append("=", 1);
|
||||
|
||||
/* type */
|
||||
len = (int32_t)uprv_strlen(kwd->value);
|
||||
if (reslen < capacity) {
|
||||
uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
|
||||
}
|
||||
reslen += len;
|
||||
sink.Append(kwd->value, len);
|
||||
|
||||
kwd = kwd->next;
|
||||
} while (kwd);
|
||||
@ -1770,10 +1750,8 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
|
||||
}
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
return u_terminateChars(appendAt, capacity, reslen, status);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -2638,25 +2616,37 @@ uloc_forLanguageTag(const char* langtag,
|
||||
int32_t localeIDCapacity,
|
||||
int32_t* parsedLength,
|
||||
UErrorCode* status) {
|
||||
return ulocimp_forLanguageTag(
|
||||
langtag,
|
||||
-1,
|
||||
localeID,
|
||||
localeIDCapacity,
|
||||
parsedLength,
|
||||
status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
icu::CheckedArrayByteSink sink(localeID, localeIDCapacity);
|
||||
ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status);
|
||||
|
||||
int32_t reslen = sink.NumberOfBytesAppended();
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return reslen;
|
||||
}
|
||||
|
||||
if (sink.Overflowed()) {
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
} else {
|
||||
u_terminateChars(localeID, localeIDCapacity, reslen, status);
|
||||
}
|
||||
|
||||
return reslen;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocimp_forLanguageTag(const char* langtag,
|
||||
int32_t tagLen,
|
||||
char* localeID,
|
||||
int32_t localeIDCapacity,
|
||||
icu::ByteSink& sink,
|
||||
int32_t* parsedLength,
|
||||
UErrorCode* status) {
|
||||
ULanguageTag *lt;
|
||||
int32_t reslen = 0;
|
||||
UBool isEmpty = TRUE;
|
||||
const char *subtag, *p;
|
||||
int32_t len;
|
||||
int32_t i, n;
|
||||
@ -2664,7 +2654,7 @@ ulocimp_forLanguageTag(const char* langtag,
|
||||
|
||||
lt = ultag_parse(langtag, tagLen, parsedLength, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* language */
|
||||
@ -2672,10 +2662,8 @@ ulocimp_forLanguageTag(const char* langtag,
|
||||
if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
|
||||
len = (int32_t)uprv_strlen(subtag);
|
||||
if (len > 0) {
|
||||
if (reslen < localeIDCapacity) {
|
||||
uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
|
||||
}
|
||||
reslen += len;
|
||||
sink.Append(subtag, len);
|
||||
isEmpty = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2683,41 +2671,27 @@ ulocimp_forLanguageTag(const char* langtag,
|
||||
subtag = ultag_getScript(lt);
|
||||
len = (int32_t)uprv_strlen(subtag);
|
||||
if (len > 0) {
|
||||
if (reslen < localeIDCapacity) {
|
||||
*(localeID + reslen) = LOCALE_SEP;
|
||||
}
|
||||
reslen++;
|
||||
sink.Append("_", 1);
|
||||
isEmpty = FALSE;
|
||||
|
||||
/* write out the script in title case */
|
||||
p = subtag;
|
||||
while (*p) {
|
||||
if (reslen < localeIDCapacity) {
|
||||
if (p == subtag) {
|
||||
*(localeID + reslen) = uprv_toupper(*p);
|
||||
} else {
|
||||
*(localeID + reslen) = *p;
|
||||
}
|
||||
}
|
||||
reslen++;
|
||||
p++;
|
||||
}
|
||||
char c = uprv_toupper(*subtag);
|
||||
sink.Append(&c, 1);
|
||||
sink.Append(subtag + 1, len - 1);
|
||||
}
|
||||
|
||||
/* region */
|
||||
subtag = ultag_getRegion(lt);
|
||||
len = (int32_t)uprv_strlen(subtag);
|
||||
if (len > 0) {
|
||||
if (reslen < localeIDCapacity) {
|
||||
*(localeID + reslen) = LOCALE_SEP;
|
||||
}
|
||||
reslen++;
|
||||
/* write out the retion in upper case */
|
||||
sink.Append("_", 1);
|
||||
isEmpty = FALSE;
|
||||
|
||||
/* write out the region in upper case */
|
||||
p = subtag;
|
||||
while (*p) {
|
||||
if (reslen < localeIDCapacity) {
|
||||
*(localeID + reslen) = uprv_toupper(*p);
|
||||
}
|
||||
reslen++;
|
||||
char c = uprv_toupper(*p);
|
||||
sink.Append(&c, 1);
|
||||
p++;
|
||||
}
|
||||
noRegion = FALSE;
|
||||
@ -2727,25 +2701,19 @@ ulocimp_forLanguageTag(const char* langtag,
|
||||
n = ultag_getVariantsSize(lt);
|
||||
if (n > 0) {
|
||||
if (noRegion) {
|
||||
if (reslen < localeIDCapacity) {
|
||||
*(localeID + reslen) = LOCALE_SEP;
|
||||
}
|
||||
reslen++;
|
||||
sink.Append("_", 1);
|
||||
isEmpty = FALSE;
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
subtag = ultag_getVariant(lt, i);
|
||||
if (reslen < localeIDCapacity) {
|
||||
*(localeID + reslen) = LOCALE_SEP;
|
||||
}
|
||||
reslen++;
|
||||
sink.Append("_", 1);
|
||||
|
||||
/* write out the variant in upper case */
|
||||
p = subtag;
|
||||
while (*p) {
|
||||
if (reslen < localeIDCapacity) {
|
||||
*(localeID + reslen) = uprv_toupper(*p);
|
||||
}
|
||||
reslen++;
|
||||
char c = uprv_toupper(*p);
|
||||
sink.Append(&c, 1);
|
||||
p++;
|
||||
}
|
||||
}
|
||||
@ -2755,19 +2723,12 @@ ulocimp_forLanguageTag(const char* langtag,
|
||||
n = ultag_getExtensionsSize(lt);
|
||||
subtag = ultag_getPrivateUse(lt);
|
||||
if (n > 0 || uprv_strlen(subtag) > 0) {
|
||||
if (reslen == 0 && n > 0) {
|
||||
if (isEmpty && n > 0) {
|
||||
/* need a language */
|
||||
if (reslen < localeIDCapacity) {
|
||||
uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
|
||||
}
|
||||
reslen += LANG_UND_LEN;
|
||||
sink.Append(LANG_UND, LANG_UND_LEN);
|
||||
}
|
||||
len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
|
||||
reslen += len;
|
||||
_appendKeywords(lt, sink, status);
|
||||
}
|
||||
|
||||
ultag_close(lt);
|
||||
return u_terminateChars(localeID, localeIDCapacity, reslen, status);
|
||||
}
|
||||
|
||||
|
||||
|
@ -101,21 +101,18 @@ ulocimp_toLanguageTag(const char* localeID,
|
||||
* the first paragraph, so some information might be lost.
|
||||
* @param langtag the input BCP47 language tag.
|
||||
* @param tagLen the length of langtag, or -1 to call uprv_strlen().
|
||||
* @param localeID the output buffer receiving a locale ID for the
|
||||
* @param sink the output sink receiving a locale ID for the
|
||||
* specified BCP47 language tag.
|
||||
* @param localeIDCapacity the size of the locale ID output buffer.
|
||||
* @param parsedLength if not NULL, successfully parsed length
|
||||
* for the input language tag is set.
|
||||
* @param err error information if receiving the locald ID
|
||||
* failed.
|
||||
* @return the length of the locale ID.
|
||||
* @internal ICU 63
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocimp_forLanguageTag(const char* langtag,
|
||||
int32_t tagLen,
|
||||
char* localeID,
|
||||
int32_t localeIDCapacity,
|
||||
icu::ByteSink& sink,
|
||||
int32_t* parsedLength,
|
||||
UErrorCode* err);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user