ICU-13417 Add the Locale::(for|to)LanguageTag() functions.
They are C++ wrappers of uloc_forLanguageTag() and uloc_toLanguageTag() respectively, that take care of dynamic memory management.
This commit is contained in:
parent
3e8fb05f7c
commit
5663412172
@ -32,8 +32,10 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/strenum.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "putilimp.h"
|
||||
#include "mutex.h"
|
||||
@ -711,6 +713,161 @@ Locale::setDefault( const Locale& newLocale,
|
||||
locale_set_default_internal(localeID, status);
|
||||
}
|
||||
|
||||
Locale U_EXPORT2
|
||||
Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
|
||||
{
|
||||
Locale result(Locale::eBOGUS);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO: Remove the need for a const char* to a NUL terminated buffer.
|
||||
const CharString tag_nul(tag, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// If a BCP-47 language tag is passed as the language parameter to the
|
||||
// normal Locale constructor, it will actually fall back to invoking
|
||||
// uloc_forLanguageTag() to parse it if it somehow is able to detect that
|
||||
// the string actually is BCP-47. This works well for things like strings
|
||||
// using BCP-47 extensions, but it does not at all work for things like
|
||||
// BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
|
||||
// interpret as ICU locale IDs and because of that won't trigger the BCP-47
|
||||
// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
|
||||
// and then Locale::init(), instead of just calling the normal constructor.
|
||||
|
||||
// All simple language tags will have the exact same length as ICU locale
|
||||
// ID strings as they have as BCP-47 strings (like "en_US" for "en-US").
|
||||
CharString localeID;
|
||||
int32_t resultCapacity = tag.size();
|
||||
|
||||
char* buffer;
|
||||
int32_t parsedLength, reslen;
|
||||
|
||||
for (;;) {
|
||||
buffer = localeID.getAppendBuffer(
|
||||
/*minCapacity=*/resultCapacity,
|
||||
/*desiredCapacityHint=*/resultCapacity,
|
||||
resultCapacity,
|
||||
status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
reslen = uloc_forLanguageTag(
|
||||
tag_nul.data(),
|
||||
buffer,
|
||||
resultCapacity,
|
||||
&parsedLength,
|
||||
&status);
|
||||
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
// For all BCP-47 language tags that use extensions, the corresponding
|
||||
// ICU locale ID will be longer but uloc_forLanguageTag() does compute
|
||||
// the exact length needed so this memory reallocation will be done at
|
||||
// most once.
|
||||
resultCapacity = reslen;
|
||||
status = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (parsedLength != tag.size()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return result;
|
||||
}
|
||||
|
||||
localeID.append(buffer, reslen, status);
|
||||
if (status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
status = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
result.init(localeID.data(), /*canonicalize=*/FALSE);
|
||||
if (result.isBogus()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (fIsBogus) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// All simple language tags will have the exact same length as BCP-47
|
||||
// strings as they have as ICU locale IDs (like "en-US" for "en_US").
|
||||
LocalMemory<char> scratch;
|
||||
int32_t scratch_capacity = uprv_strlen(fullName);
|
||||
|
||||
if (scratch_capacity == 0) {
|
||||
scratch_capacity = 3; // "und"
|
||||
}
|
||||
|
||||
char* buffer;
|
||||
int32_t result_capacity, reslen;
|
||||
|
||||
for (;;) {
|
||||
if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
buffer = sink.GetAppendBuffer(
|
||||
/*min_capacity=*/scratch_capacity,
|
||||
/*desired_capacity_hint=*/scratch_capacity,
|
||||
scratch.getAlias(),
|
||||
scratch_capacity,
|
||||
&result_capacity);
|
||||
|
||||
reslen = uloc_toLanguageTag(
|
||||
fullName,
|
||||
buffer,
|
||||
result_capacity,
|
||||
/*strict=*/FALSE,
|
||||
&status);
|
||||
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
// For some very few edge cases a language tag will be longer as a
|
||||
// BCP-47 string than it is as an ICU locale ID. Most notoriously "C"
|
||||
// expands to the BCP-47 tag "en-US-u-va-posix", 16 times longer, and
|
||||
// it'll take several calls to uloc_toLanguageTag() to figure that out.
|
||||
// https://unicode-org.atlassian.net/browse/ICU-20132
|
||||
scratch_capacity = reslen;
|
||||
status = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
sink.Append(buffer, reslen);
|
||||
if (status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
status = U_ZERO_ERROR; // Terminators not used.
|
||||
}
|
||||
}
|
||||
|
||||
Locale U_EXPORT2
|
||||
Locale::createFromName (const char *name)
|
||||
{
|
||||
|
@ -31,6 +31,8 @@
|
||||
#ifndef LOCID_H
|
||||
#define LOCID_H
|
||||
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/putil.h"
|
||||
@ -362,6 +364,55 @@ public:
|
||||
UErrorCode& success);
|
||||
#endif /* U_HIDE_SYSTEM_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Returns a Locale for the specified BCP47 language tag string.
|
||||
* If the specified language tag contains any ill-formed subtags,
|
||||
* the first such subtag and all following subtags are ignored.
|
||||
* <p>
|
||||
* This implements the 'Language-Tag' production of BCP47, and so
|
||||
* supports grandfathered (regular and irregular) as well as private
|
||||
* use language tags. Private use tags are represented as 'x-whatever',
|
||||
* and grandfathered tags are converted to their canonical replacements
|
||||
* where they exist. Note that a few grandfathered tags have no modern
|
||||
* replacement, these will be converted using the fallback described in
|
||||
* the first paragraph, so some information might be lost.
|
||||
* @param tag the input BCP47 language tag.
|
||||
* @param status error information if creating the Locale failed.
|
||||
* @return the Locale for the specified BCP47 language tag.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Returns a well-formed language tag for this Locale.
|
||||
* <p>
|
||||
* <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
|
||||
* requirement will be silently omitted from the result.
|
||||
*
|
||||
* If this function fails, partial output may have been written to the sink.
|
||||
*
|
||||
* @param sink the output sink receiving the BCP47 language
|
||||
* tag for this Locale.
|
||||
* @param status error information if creating the language tag failed.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
void toLanguageTag(ByteSink& sink, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Returns a well-formed language tag for this Locale.
|
||||
* <p>
|
||||
* <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
|
||||
* requirement will be silently omitted from the result.
|
||||
*
|
||||
* @param status error information if creating the language tag failed.
|
||||
* @return the BCP47 language tag for this Locale.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
template<typename StringClass>
|
||||
inline StringClass toLanguageTag(UErrorCode& status) const;
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Creates a locale which has had minimal canonicalization
|
||||
* as per uloc_getName().
|
||||
@ -775,6 +826,17 @@ Locale::operator!=(const Locale& other) const
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
template<typename StringClass> inline StringClass
|
||||
Locale::toLanguageTag(UErrorCode& status) const
|
||||
{
|
||||
StringClass result;
|
||||
StringByteSink<StringClass> sink(&result);
|
||||
toLanguageTag(sink, status);
|
||||
return result;
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
inline const char *
|
||||
Locale::getCountry() const
|
||||
{
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/coll.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/std_string.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
@ -233,6 +234,8 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
|
||||
TESTCASE_AUTO(TestIsRightToLeft);
|
||||
TESTCASE_AUTO(TestBug13277);
|
||||
TESTCASE_AUTO(TestBug13554);
|
||||
TESTCASE_AUTO(TestForLanguageTag);
|
||||
TESTCASE_AUTO(TestToLanguageTag);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
@ -2748,4 +2751,97 @@ void LocaleTest::TestBug13554() {
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleTest::TestForLanguageTag() {
|
||||
IcuTestErrorCode status(*this, "TestForLanguageTag()");
|
||||
|
||||
static const char tag_en[] = "en-US";
|
||||
static const char tag_oed[] = "en-GB-oed";
|
||||
static const char tag_af[] = "af-t-ar-i0-handwrit-u-ca-coptic-x-foo";
|
||||
static const char tag_ill[] = "!";
|
||||
static const char tag_no_nul[] = { 'e', 'n', '-', 'G', 'B' };
|
||||
|
||||
static const Locale loc_en("en_US");
|
||||
static const Locale loc_oed("en_GB@x=oed");
|
||||
static const Locale loc_af("af@calendar=coptic;t=ar-i0-handwrit;x=foo");
|
||||
static const Locale loc_null("");
|
||||
static const Locale loc_gb("en_GB");
|
||||
|
||||
Locale result_en = Locale::forLanguageTag(tag_en, status);
|
||||
status.errIfFailureAndReset("\"%s\"", tag_en);
|
||||
assertEquals(tag_en, loc_en.getName(), result_en.getName());
|
||||
|
||||
Locale result_oed = Locale::forLanguageTag(tag_oed, status);
|
||||
status.errIfFailureAndReset("\"%s\"", tag_oed);
|
||||
assertEquals(tag_oed, loc_oed.getName(), result_oed.getName());
|
||||
|
||||
Locale result_af = Locale::forLanguageTag(tag_af, status);
|
||||
status.errIfFailureAndReset("\"%s\"", tag_af);
|
||||
assertEquals(tag_af, loc_af.getName(), result_af.getName());
|
||||
|
||||
Locale result_ill = Locale::forLanguageTag(tag_ill, status);
|
||||
assertEquals(tag_ill, U_ILLEGAL_ARGUMENT_ERROR, status.reset());
|
||||
assertTrue(result_ill.getName(), result_ill.isBogus());
|
||||
|
||||
Locale result_null = Locale::forLanguageTag(nullptr, status);
|
||||
status.errIfFailureAndReset("nullptr");
|
||||
assertEquals("nullptr", loc_null.getName(), result_null.getName());
|
||||
|
||||
StringPiece sp_substr(tag_oed, 5); // "en-GB", no NUL.
|
||||
Locale result_substr = Locale::forLanguageTag(sp_substr, status);
|
||||
status.errIfFailureAndReset("\"%.*s\"", sp_substr.size(), sp_substr.data());
|
||||
assertEquals(CharString(sp_substr, status).data(),
|
||||
loc_gb.getName(), result_substr.getName());
|
||||
|
||||
StringPiece sp_no_nul(tag_no_nul, sizeof tag_no_nul); // "en-GB", no NUL.
|
||||
Locale result_no_nul = Locale::forLanguageTag(sp_no_nul, status);
|
||||
status.errIfFailureAndReset("\"%.*s\"", sp_no_nul.size(), sp_no_nul.data());
|
||||
assertEquals(CharString(sp_no_nul, status).data(),
|
||||
loc_gb.getName(), result_no_nul.getName());
|
||||
}
|
||||
|
||||
void LocaleTest::TestToLanguageTag() {
|
||||
IcuTestErrorCode status(*this, "TestToLanguageTag()");
|
||||
|
||||
static const Locale loc_c("C");
|
||||
static const Locale loc_en("en_US");
|
||||
static const Locale loc_af("af@calendar=coptic;t=ar-i0-handwrit;x=foo");
|
||||
static const Locale loc_empty("");
|
||||
static const Locale loc_ill("!");
|
||||
|
||||
static const char tag_c[] = "en-US-u-va-posix";
|
||||
static const char tag_en[] = "en-US";
|
||||
static const char tag_af[] = "af-t-ar-i0-handwrit-u-ca-coptic-x-foo";
|
||||
static const char tag_und[] = "und";
|
||||
|
||||
std::string result;
|
||||
StringByteSink<std::string> sink(&result);
|
||||
loc_c.toLanguageTag(sink, status);
|
||||
status.errIfFailureAndReset("\"%s\"", loc_c.getName());
|
||||
assertEquals(loc_c.getName(), tag_c, result.c_str());
|
||||
|
||||
std::string result_c = loc_c.toLanguageTag<std::string>(status);
|
||||
status.errIfFailureAndReset("\"%s\"", loc_c.getName());
|
||||
assertEquals(loc_c.getName(), tag_c, result_c.c_str());
|
||||
|
||||
std::string result_en = loc_en.toLanguageTag<std::string>(status);
|
||||
status.errIfFailureAndReset("\"%s\"", loc_en.getName());
|
||||
assertEquals(loc_en.getName(), tag_en, result_en.c_str());
|
||||
|
||||
std::string result_af = loc_af.toLanguageTag<std::string>(status);
|
||||
status.errIfFailureAndReset("\"%s\"", loc_af.getName());
|
||||
assertEquals(loc_af.getName(), tag_af, result_af.c_str());
|
||||
|
||||
std::string result_empty = loc_empty.toLanguageTag<std::string>(status);
|
||||
status.errIfFailureAndReset("\"%s\"", loc_empty.getName());
|
||||
assertEquals(loc_empty.getName(), tag_und, result_empty.c_str());
|
||||
|
||||
std::string result_ill = loc_ill.toLanguageTag<std::string>(status);
|
||||
status.errIfFailureAndReset("\"%s\"", loc_ill.getName());
|
||||
assertEquals(loc_ill.getName(), tag_und, result_ill.c_str());
|
||||
|
||||
Locale loc_bogus;
|
||||
loc_bogus.setToBogus();
|
||||
std::string result_bogus = loc_bogus.toLanguageTag<std::string>(status);
|
||||
assertEquals("bogus", U_ILLEGAL_ARGUMENT_ERROR, status.reset());
|
||||
assertTrue(result_bogus.c_str(), result_bogus.empty());
|
||||
}
|
||||
|
@ -108,6 +108,9 @@ public:
|
||||
void TestBug13277();
|
||||
void TestBug13554();
|
||||
|
||||
void TestForLanguageTag();
|
||||
void TestToLanguageTag();
|
||||
|
||||
private:
|
||||
void _checklocs(const char* label,
|
||||
const char* req,
|
||||
|
Loading…
Reference in New Issue
Block a user