ICU-13417 Replace fixed buffers in uloc_tag.cpp with CharString.
This gets rid of those fixed buffers that caused ICU-13417 to be filed in the first place, those that prevent handling language tags with very large amounts of keywords. A number of fixed buffers will still remain in uloc_tag.cpp (and elsewhere in the locale handling code) for the time being, but this change is a necessary first step in cleaning up this code and will alleviate the most pressing problem encountered by ICU4C users. An off-by-one error in _getKeywords() caused uloc_canonicalize() to not write out the final keyword value in case the result would fill up the buffer exactly, resulting in U_STRING_NOT_TERMINATED_WARNING.
This commit is contained in:
parent
24b490dc02
commit
acc3f65a87
@ -798,7 +798,7 @@ _getKeywords(const char *localeID,
|
||||
}
|
||||
keywordsLen += keywordList[i].keywordLen + 1;
|
||||
if(valuesToo) {
|
||||
if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
|
||||
if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
|
||||
uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
|
||||
}
|
||||
keywordsLen += keywordList[i].valueLen;
|
||||
|
@ -12,11 +12,13 @@
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "putilimp.h"
|
||||
#include "uinvchar.h"
|
||||
#include "ulocimp.h"
|
||||
#include "uvector.h"
|
||||
#include "uassert.h"
|
||||
|
||||
|
||||
@ -172,6 +174,46 @@ static const char*
|
||||
ultag_getGrandfathered(const ULanguageTag* langtag);
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
// Helper class to memory manage CharString objects.
|
||||
// Only ever stack-allocated, does not need to inherit UMemory.
|
||||
class CharStringPool {
|
||||
public:
|
||||
CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
|
||||
~CharStringPool() = default;
|
||||
|
||||
CharStringPool(const CharStringPool&) = delete;
|
||||
CharStringPool& operator=(const CharStringPool&) = delete;
|
||||
|
||||
icu::CharString* create() {
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
icu::CharString* const obj = new icu::CharString;
|
||||
if (obj == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
pool.addElement(obj, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete obj;
|
||||
return nullptr;
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
private:
|
||||
static void U_CALLCONV deleter(void* obj) {
|
||||
delete static_cast<icu::CharString*>(obj);
|
||||
}
|
||||
|
||||
UErrorCode status;
|
||||
icu::UVector pool;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/*
|
||||
* -------------------------------------------------
|
||||
*
|
||||
@ -900,7 +942,6 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
||||
|
||||
static int32_t
|
||||
_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
|
||||
char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
|
||||
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
|
||||
int32_t attrBufLength = 0;
|
||||
UEnumeration *keywordEnum = NULL;
|
||||
@ -920,22 +961,48 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
||||
AttributeListEntry *firstAttr = NULL;
|
||||
AttributeListEntry *attr;
|
||||
char *attrValue;
|
||||
char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
|
||||
char *pExtBuf = extBuf;
|
||||
int32_t extBufCapacity = sizeof(extBuf);
|
||||
CharStringPool extBufPool;
|
||||
const char *bcpKey=nullptr, *bcpValue=nullptr;
|
||||
UErrorCode tmpStatus = U_ZERO_ERROR;
|
||||
int32_t keylen;
|
||||
UBool isBcpUExt;
|
||||
|
||||
while (TRUE) {
|
||||
icu::CharString buf;
|
||||
key = uenum_next(keywordEnum, NULL, status);
|
||||
if (key == NULL) {
|
||||
break;
|
||||
}
|
||||
len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
|
||||
/* buf must be null-terminated */
|
||||
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
char* buffer;
|
||||
int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
|
||||
|
||||
for (;;) {
|
||||
buffer = buf.getAppendBuffer(
|
||||
/*minCapacity=*/resultCapacity,
|
||||
/*desiredCapacityHint=*/resultCapacity,
|
||||
resultCapacity,
|
||||
tmpStatus);
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
break;
|
||||
}
|
||||
|
||||
len = uloc_getKeywordValue(
|
||||
localeID, key, buffer, resultCapacity, &tmpStatus);
|
||||
|
||||
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
resultCapacity = len;
|
||||
tmpStatus = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
@ -945,6 +1012,11 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
||||
continue;
|
||||
}
|
||||
|
||||
buf.append(buffer, len, tmpStatus);
|
||||
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
keylen = (int32_t)uprv_strlen(key);
|
||||
isBcpUExt = (keylen > 1);
|
||||
|
||||
@ -1007,7 +1079,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
||||
}
|
||||
|
||||
/* we've checked buf is null-terminated above */
|
||||
bcpValue = uloc_toUnicodeLocaleType(key, buf);
|
||||
bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
|
||||
if (bcpValue == NULL) {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
@ -1015,33 +1087,44 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (bcpValue == buf) {
|
||||
/*
|
||||
if (bcpValue == buf.data()) {
|
||||
/*
|
||||
When uloc_toUnicodeLocaleType(key, buf) returns the
|
||||
input value as is, the value is well-formed, but has
|
||||
no known mapping. This implementation normalizes the
|
||||
the value to lower case
|
||||
value to lower case
|
||||
*/
|
||||
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
|
||||
if (bcpValueLen < extBufCapacity) {
|
||||
uprv_strcpy(pExtBuf, bcpValue);
|
||||
T_CString_toLowerCase(pExtBuf);
|
||||
|
||||
bcpValue = pExtBuf;
|
||||
|
||||
pExtBuf += (bcpValueLen + 1);
|
||||
extBufCapacity -= (bcpValueLen + 1);
|
||||
} else {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
icu::CharString* extBuf = extBufPool.create();
|
||||
if (extBuf == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
|
||||
int32_t resultCapacity;
|
||||
char* pExtBuf = extBuf->getAppendBuffer(
|
||||
/*minCapacity=*/bcpValueLen,
|
||||
/*desiredCapacityHint=*/bcpValueLen,
|
||||
resultCapacity,
|
||||
tmpStatus);
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
break;
|
||||
}
|
||||
|
||||
uprv_strcpy(pExtBuf, bcpValue);
|
||||
T_CString_toLowerCase(pExtBuf);
|
||||
|
||||
extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
break;
|
||||
}
|
||||
|
||||
bcpValue = extBuf->data();
|
||||
}
|
||||
} else {
|
||||
if (*key == PRIVATEUSE) {
|
||||
if (!_isPrivateuseValueSubtags(buf, len)) {
|
||||
if (!_isPrivateuseValueSubtags(buf.data(), len)) {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
@ -1049,7 +1132,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
|
||||
if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
@ -1058,20 +1141,17 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
||||
}
|
||||
}
|
||||
bcpKey = key;
|
||||
if ((len + 1) < extBufCapacity) {
|
||||
uprv_memcpy(pExtBuf, buf, len);
|
||||
bcpValue = pExtBuf;
|
||||
|
||||
pExtBuf += len;
|
||||
|
||||
*pExtBuf = 0;
|
||||
pExtBuf++;
|
||||
|
||||
extBufCapacity -= (len + 1);
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
icu::CharString* extBuf = extBufPool.create();
|
||||
if (extBuf == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
extBuf->append(buf.data(), len, tmpStatus);
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
break;
|
||||
}
|
||||
bcpValue = extBuf->data();
|
||||
}
|
||||
|
||||
/* create ExtensionListEntry */
|
||||
@ -2337,31 +2417,66 @@ uloc_toLanguageTag(const char* localeID,
|
||||
int32_t langtagCapacity,
|
||||
UBool strict,
|
||||
UErrorCode* status) {
|
||||
/* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
|
||||
char canonical[256];
|
||||
int32_t reslen = 0;
|
||||
icu::CharString canonical;
|
||||
int32_t reslen;
|
||||
UErrorCode tmpStatus = U_ZERO_ERROR;
|
||||
UBool hadPosix = FALSE;
|
||||
const char* pKeywordStart;
|
||||
|
||||
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
|
||||
canonical[0] = 0;
|
||||
if (uprv_strlen(localeID) > 0) {
|
||||
uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
|
||||
if (tmpStatus != U_ZERO_ERROR) {
|
||||
int32_t resultCapacity = uprv_strlen(localeID);
|
||||
if (resultCapacity > 0) {
|
||||
char* buffer;
|
||||
|
||||
for (;;) {
|
||||
buffer = canonical.getAppendBuffer(
|
||||
/*minCapacity=*/resultCapacity,
|
||||
/*desiredCapacityHint=*/resultCapacity,
|
||||
resultCapacity,
|
||||
tmpStatus);
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
return 0;
|
||||
}
|
||||
|
||||
reslen =
|
||||
uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
|
||||
|
||||
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
resultCapacity = reslen;
|
||||
tmpStatus = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
canonical.append(buffer, reslen, tmpStatus);
|
||||
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
reslen = 0;
|
||||
|
||||
/* For handling special case - private use only tag */
|
||||
pKeywordStart = locale_getKeywordsStart(canonical);
|
||||
if (pKeywordStart == canonical) {
|
||||
pKeywordStart = locale_getKeywordsStart(canonical.data());
|
||||
if (pKeywordStart == canonical.data()) {
|
||||
UEnumeration *kwdEnum;
|
||||
int kwdCnt = 0;
|
||||
UBool done = FALSE;
|
||||
|
||||
kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
|
||||
kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
|
||||
if (kwdEnum != NULL) {
|
||||
kwdCnt = uenum_count(kwdEnum, &tmpStatus);
|
||||
if (kwdCnt == 1) {
|
||||
@ -2399,12 +2514,12 @@ uloc_toLanguageTag(const char* localeID,
|
||||
}
|
||||
}
|
||||
|
||||
reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
|
||||
reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
|
||||
reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
|
||||
reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
|
||||
reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
|
||||
return reslen;
|
||||
}
|
||||
|
@ -226,6 +226,7 @@ void addLocaleTest(TestNode** root)
|
||||
TESTCASE(TestKeywordVariants);
|
||||
TESTCASE(TestKeywordVariantParsing);
|
||||
TESTCASE(TestCanonicalization);
|
||||
TESTCASE(TestCanonicalizationBuffer);
|
||||
TESTCASE(TestKeywordSet);
|
||||
TESTCASE(TestKeywordSetError);
|
||||
TESTCASE(TestDisplayKeywords);
|
||||
@ -2251,6 +2252,42 @@ static void TestCanonicalization(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void TestCanonicalizationBuffer(void)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
char buffer[256];
|
||||
|
||||
// ULOC_FULLNAME_CAPACITY == 157 (uloc.h)
|
||||
static const char name[] =
|
||||
"zh@x"
|
||||
"=foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-barz"
|
||||
;
|
||||
static const size_t len = sizeof name - 1; // Without NUL terminator.
|
||||
|
||||
int32_t reslen = uloc_canonicalize(name, buffer, len, &status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("FAIL: uloc_canonicalize(%s) => %s, expected !U_FAILURE()\n",
|
||||
name, u_errorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
if (reslen != len) {
|
||||
log_err("FAIL: uloc_canonicalize(%s) => \"%i\", expected \"%u\"\n",
|
||||
name, reslen, len);
|
||||
return;
|
||||
}
|
||||
|
||||
if (uprv_strncmp(name, buffer, len) != 0) {
|
||||
log_err("FAIL: uloc_canonicalize(%s) => \"%.*s\", expected \"%s\"\n",
|
||||
name, reslen, buffer, name);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void TestDisplayKeywords(void)
|
||||
{
|
||||
int32_t i;
|
||||
|
@ -84,6 +84,7 @@ static void TestDisplayNames(void);
|
||||
static void doTestDisplayNames(const char* inLocale, int32_t compareIndex);
|
||||
|
||||
static void TestCanonicalization(void);
|
||||
static void TestCanonicalizationBuffer(void);
|
||||
|
||||
static void TestDisplayKeywords(void);
|
||||
|
||||
|
@ -252,6 +252,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
|
||||
TESTCASE_AUTO(TestToLanguageTag);
|
||||
TESTCASE_AUTO(TestMoveAssign);
|
||||
TESTCASE_AUTO(TestMoveCtor);
|
||||
TESTCASE_AUTO(TestBug13417VeryLongLanguageTag);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
@ -3125,3 +3126,23 @@ void LocaleTest::TestMoveCtor() {
|
||||
assertEquals("variant", l7.getVariant(), l8.getVariant());
|
||||
assertEquals("bogus", l7.isBogus(), l8.isBogus());
|
||||
}
|
||||
|
||||
void LocaleTest::TestBug13417VeryLongLanguageTag() {
|
||||
IcuTestErrorCode status(*this, "TestBug13417VeryLongLanguageTag()");
|
||||
|
||||
static const char tag[] =
|
||||
"zh-x"
|
||||
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
|
||||
"-foo-bar-baz-fxx"
|
||||
;
|
||||
|
||||
Locale l = Locale::forLanguageTag(tag, status);
|
||||
status.errIfFailureAndReset("\"%s\"", tag);
|
||||
assertTrue("!l.isBogus()", !l.isBogus());
|
||||
|
||||
std::string result = l.toLanguageTag<std::string>(status);
|
||||
status.errIfFailureAndReset("\"%s\"", l.getName());
|
||||
assertEquals("equals", tag, result.c_str());
|
||||
}
|
||||
|
@ -124,6 +124,8 @@ public:
|
||||
void TestMoveAssign();
|
||||
void TestMoveCtor();
|
||||
|
||||
void TestBug13417VeryLongLanguageTag();
|
||||
|
||||
private:
|
||||
void _checklocs(const char* label,
|
||||
const char* req,
|
||||
|
Loading…
Reference in New Issue
Block a user