ICU-20158 Pass ByteSink from Locale::forLanguageTag() to uloc_forLanguageTag().

This eliminates the need for a scratch buffer in Locale::forLanguageTag()
and also the need for counting bytes required in uloc_forLanguageTag(),
something that ByteSink will now handle correctly.
This commit is contained in:
Fredrik Roubert 2018-10-30 23:35:03 +01:00 committed by Fredrik Roubert
parent 5b4ac1c77d
commit 47ecbc4915
3 changed files with 67 additions and 145 deletions

View File

@ -869,43 +869,16 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
// and then Locale::init(), instead of just calling the normal constructor.
// All simple language tags will have the exact same length as ICU locale
// ID strings as they have as BCP-47 strings (like "en_US" for "en-US").
CharString localeID;
int32_t resultCapacity = tag.size();
char* buffer;
int32_t parsedLength, reslen;
for (;;) {
buffer = localeID.getAppendBuffer(
/*minCapacity=*/resultCapacity,
/*desiredCapacityHint=*/resultCapacity,
resultCapacity,
status);
if (U_FAILURE(status)) {
return result;
}
reslen = ulocimp_forLanguageTag(
int32_t parsedLength;
{
CharStringByteSink sink(&localeID);
ulocimp_forLanguageTag(
tag.data(),
tag.length(),
buffer,
resultCapacity,
sink,
&parsedLength,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
// For all BCP-47 language tags that use extensions, the corresponding
// ICU locale ID will be longer but uloc_forLanguageTag() does compute
// the exact length needed so this memory reallocation will be done at
// most once.
resultCapacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
@ -917,15 +890,6 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
return result;
}
localeID.append(buffer, reslen, status);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators provided by CharString.
}
if (U_FAILURE(status)) {
return result;
}
result.init(localeID.data(), /*canonicalize=*/FALSE);
if (result.isBogus()) {
status = U_ILLEGAL_ARGUMENT_ERROR;

View File

@ -1645,9 +1645,8 @@ cleanup:
}
static int32_t
_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
int32_t reslen = 0;
static void
_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) {
int32_t i, n;
int32_t len;
ExtensionListEntry *kwdFirst = NULL;
@ -1657,7 +1656,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
UBool posixVariant = FALSE;
if (U_FAILURE(*status)) {
return 0;
return;
}
/* Determine if variants already exists */
@ -1714,10 +1713,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
if (U_SUCCESS(*status) && posixVariant) {
len = (int32_t) uprv_strlen(_POSIX);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
}
reslen += len;
sink.Append(_POSIX, len);
}
if (U_SUCCESS(*status) && kwdFirst != NULL) {
@ -1725,37 +1721,21 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
UBool firstValue = TRUE;
kwd = kwdFirst;
do {
if (reslen < capacity) {
if (firstValue) {
/* '@' */
*(appendAt + reslen) = LOCALE_EXT_SEP;
firstValue = FALSE;
} else {
/* ';' */
*(appendAt + reslen) = LOCALE_KEYWORD_SEP;
}
if (firstValue) {
sink.Append("@", 1);
firstValue = FALSE;
} else {
sink.Append(";", 1);
}
reslen++;
/* key */
len = (int32_t)uprv_strlen(kwd->key);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
}
reslen += len;
/* '=' */
if (reslen < capacity) {
*(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
}
reslen++;
sink.Append(kwd->key, len);
sink.Append("=", 1);
/* type */
len = (int32_t)uprv_strlen(kwd->value);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
}
reslen += len;
sink.Append(kwd->value, len);
kwd = kwd->next;
} while (kwd);
@ -1770,10 +1750,8 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
}
if (U_FAILURE(*status)) {
return 0;
return;
}
return u_terminateChars(appendAt, capacity, reslen, status);
}
static void
@ -2638,25 +2616,37 @@ uloc_forLanguageTag(const char* langtag,
int32_t localeIDCapacity,
int32_t* parsedLength,
UErrorCode* status) {
return ulocimp_forLanguageTag(
langtag,
-1,
localeID,
localeIDCapacity,
parsedLength,
status);
if (U_FAILURE(*status)) {
return 0;
}
icu::CheckedArrayByteSink sink(localeID, localeIDCapacity);
ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status);
int32_t reslen = sink.NumberOfBytesAppended();
if (U_FAILURE(*status)) {
return reslen;
}
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(localeID, localeIDCapacity, reslen, status);
}
return reslen;
}
U_CAPI int32_t U_EXPORT2
U_CAPI void U_EXPORT2
ulocimp_forLanguageTag(const char* langtag,
int32_t tagLen,
char* localeID,
int32_t localeIDCapacity,
icu::ByteSink& sink,
int32_t* parsedLength,
UErrorCode* status) {
ULanguageTag *lt;
int32_t reslen = 0;
UBool isEmpty = TRUE;
const char *subtag, *p;
int32_t len;
int32_t i, n;
@ -2664,7 +2654,7 @@ ulocimp_forLanguageTag(const char* langtag,
lt = ultag_parse(langtag, tagLen, parsedLength, status);
if (U_FAILURE(*status)) {
return 0;
return;
}
/* language */
@ -2672,10 +2662,8 @@ ulocimp_forLanguageTag(const char* langtag,
if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
len = (int32_t)uprv_strlen(subtag);
if (len > 0) {
if (reslen < localeIDCapacity) {
uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
}
reslen += len;
sink.Append(subtag, len);
isEmpty = FALSE;
}
}
@ -2683,41 +2671,27 @@ ulocimp_forLanguageTag(const char* langtag,
subtag = ultag_getScript(lt);
len = (int32_t)uprv_strlen(subtag);
if (len > 0) {
if (reslen < localeIDCapacity) {
*(localeID + reslen) = LOCALE_SEP;
}
reslen++;
sink.Append("_", 1);
isEmpty = FALSE;
/* write out the script in title case */
p = subtag;
while (*p) {
if (reslen < localeIDCapacity) {
if (p == subtag) {
*(localeID + reslen) = uprv_toupper(*p);
} else {
*(localeID + reslen) = *p;
}
}
reslen++;
p++;
}
char c = uprv_toupper(*subtag);
sink.Append(&c, 1);
sink.Append(subtag + 1, len - 1);
}
/* region */
subtag = ultag_getRegion(lt);
len = (int32_t)uprv_strlen(subtag);
if (len > 0) {
if (reslen < localeIDCapacity) {
*(localeID + reslen) = LOCALE_SEP;
}
reslen++;
/* write out the retion in upper case */
sink.Append("_", 1);
isEmpty = FALSE;
/* write out the region in upper case */
p = subtag;
while (*p) {
if (reslen < localeIDCapacity) {
*(localeID + reslen) = uprv_toupper(*p);
}
reslen++;
char c = uprv_toupper(*p);
sink.Append(&c, 1);
p++;
}
noRegion = FALSE;
@ -2727,25 +2701,19 @@ ulocimp_forLanguageTag(const char* langtag,
n = ultag_getVariantsSize(lt);
if (n > 0) {
if (noRegion) {
if (reslen < localeIDCapacity) {
*(localeID + reslen) = LOCALE_SEP;
}
reslen++;
sink.Append("_", 1);
isEmpty = FALSE;
}
for (i = 0; i < n; i++) {
subtag = ultag_getVariant(lt, i);
if (reslen < localeIDCapacity) {
*(localeID + reslen) = LOCALE_SEP;
}
reslen++;
sink.Append("_", 1);
/* write out the variant in upper case */
p = subtag;
while (*p) {
if (reslen < localeIDCapacity) {
*(localeID + reslen) = uprv_toupper(*p);
}
reslen++;
char c = uprv_toupper(*p);
sink.Append(&c, 1);
p++;
}
}
@ -2755,19 +2723,12 @@ ulocimp_forLanguageTag(const char* langtag,
n = ultag_getExtensionsSize(lt);
subtag = ultag_getPrivateUse(lt);
if (n > 0 || uprv_strlen(subtag) > 0) {
if (reslen == 0 && n > 0) {
if (isEmpty && n > 0) {
/* need a language */
if (reslen < localeIDCapacity) {
uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
}
reslen += LANG_UND_LEN;
sink.Append(LANG_UND, LANG_UND_LEN);
}
len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
reslen += len;
_appendKeywords(lt, sink, status);
}
ultag_close(lt);
return u_terminateChars(localeID, localeIDCapacity, reslen, status);
}

View File

@ -101,21 +101,18 @@ ulocimp_toLanguageTag(const char* localeID,
* the first paragraph, so some information might be lost.
* @param langtag the input BCP47 language tag.
* @param tagLen the length of langtag, or -1 to call uprv_strlen().
* @param localeID the output buffer receiving a locale ID for the
* @param sink the output sink receiving a locale ID for the
* specified BCP47 language tag.
* @param localeIDCapacity the size of the locale ID output buffer.
* @param parsedLength if not NULL, successfully parsed length
* for the input language tag is set.
* @param err error information if receiving the locald ID
* failed.
* @return the length of the locale ID.
* @internal ICU 63
*/
U_CAPI int32_t U_EXPORT2
U_CAPI void U_EXPORT2
ulocimp_forLanguageTag(const char* langtag,
int32_t tagLen,
char* localeID,
int32_t localeIDCapacity,
icu::ByteSink& sink,
int32_t* parsedLength,
UErrorCode* err);