6942013a38
Design Doc: https://goo.gl/Qf12p3
437 lines
13 KiB
C++
437 lines
13 KiB
C++
// © 2019 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
|
|
#include <utility>
|
|
|
|
#include "bytesinkutil.h" // CharStringByteSink
|
|
#include "charstr.h"
|
|
#include "cstring.h"
|
|
#include "ulocimp.h"
|
|
#include "unicode/localebuilder.h"
|
|
#include "unicode/locid.h"
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
|
|
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
|
|
|
|
const char* kAttributeKey = "attribute";
|
|
|
|
static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
|
|
switch (uprv_tolower(key)) {
|
|
case 'u':
|
|
return ultag_isUnicodeExtensionSubtags(s, len);
|
|
case 't':
|
|
return ultag_isTransformedExtensionSubtags(s, len);
|
|
case 'x':
|
|
return ultag_isPrivateuseValueSubtags(s, len);
|
|
default:
|
|
return ultag_isExtensionSubtags(s, len);
|
|
}
|
|
}
|
|
|
|
LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
|
|
script_(), region_(), variant_(nullptr), extensions_(nullptr)
|
|
{
|
|
language_[0] = 0;
|
|
script_[0] = 0;
|
|
region_[0] = 0;
|
|
}
|
|
|
|
LocaleBuilder::~LocaleBuilder()
|
|
{
|
|
delete variant_;
|
|
delete extensions_;
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
|
|
{
|
|
clear();
|
|
setLanguage(locale.getLanguage());
|
|
setScript(locale.getScript());
|
|
setRegion(locale.getCountry());
|
|
setVariant(locale.getVariant());
|
|
extensions_ = locale.clone();
|
|
if (extensions_ == nullptr) {
|
|
status_ = U_MEMORY_ALLOCATION_ERROR;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
|
|
{
|
|
Locale l = Locale::forLanguageTag(tag, status_);
|
|
if (U_FAILURE(status_)) { return *this; }
|
|
// Because setLocale will reset status_ we need to return
|
|
// first if we have error in forLanguageTag.
|
|
setLocale(l);
|
|
return *this;
|
|
}
|
|
|
|
static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
|
|
UBool (*test)(const char*, int32_t)) {
|
|
if (U_FAILURE(errorCode)) { return; }
|
|
if (input.empty()) {
|
|
dest[0] = '\0';
|
|
} else if (test(input.data(), input.length())) {
|
|
uprv_memcpy(dest, input.data(), input.length());
|
|
dest[input.length()] = '\0';
|
|
} else {
|
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
|
}
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
|
|
{
|
|
setField(language, language_, status_, &ultag_isLanguageSubtag);
|
|
return *this;
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
|
|
{
|
|
setField(script, script_, status_, &ultag_isScriptSubtag);
|
|
return *this;
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
|
|
{
|
|
setField(region, region_, status_, &ultag_isRegionSubtag);
|
|
return *this;
|
|
}
|
|
|
|
static void transform(char* data, int32_t len) {
|
|
for (int32_t i = 0; i < len; i++, data++) {
|
|
if (*data == '_') {
|
|
*data = '-';
|
|
} else {
|
|
*data = uprv_tolower(*data);
|
|
}
|
|
}
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
|
|
{
|
|
if (U_FAILURE(status_)) { return *this; }
|
|
if (variant.empty()) {
|
|
delete variant_;
|
|
variant_ = nullptr;
|
|
return *this;
|
|
}
|
|
CharString* new_variant = new CharString(variant, status_);
|
|
if (U_FAILURE(status_)) { return *this; }
|
|
if (new_variant == nullptr) {
|
|
status_ = U_MEMORY_ALLOCATION_ERROR;
|
|
return *this;
|
|
}
|
|
transform(new_variant->data(), new_variant->length());
|
|
if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
|
|
delete new_variant;
|
|
status_ = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return *this;
|
|
}
|
|
delete variant_;
|
|
variant_ = new_variant;
|
|
return *this;
|
|
}
|
|
|
|
static bool
|
|
_isKeywordValue(const char* key, const char* value, int32_t value_len)
|
|
{
|
|
if (key[1] == '\0') {
|
|
// one char key
|
|
return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
|
|
_isExtensionSubtags(key[0], value, value_len));
|
|
} else if (uprv_strcmp(key, kAttributeKey) == 0) {
|
|
// unicode attributes
|
|
return ultag_isUnicodeLocaleAttributes(value, value_len);
|
|
}
|
|
// otherwise: unicode extension value
|
|
// We need to convert from legacy key/value to unicode
|
|
// key/value
|
|
const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
|
|
const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
|
|
|
|
return unicode_locale_key && unicode_locale_type &&
|
|
ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
|
|
ultag_isUnicodeLocaleType(unicode_locale_type, -1);
|
|
}
|
|
|
|
static void
|
|
_copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode)
|
|
{
|
|
if (U_FAILURE(errorCode)) { return; }
|
|
LocalPointer<icu::StringEnumeration> iter(from.createKeywords(errorCode));
|
|
if (U_FAILURE(errorCode) || iter.isNull()) { return; }
|
|
const char* key;
|
|
while ((key = iter->next(nullptr, errorCode)) != nullptr) {
|
|
CharString value;
|
|
CharStringByteSink sink(&value);
|
|
from.getKeywordValue(key, sink, errorCode);
|
|
if (U_FAILURE(errorCode)) { return; }
|
|
if (uprv_strcmp(key, kAttributeKey) == 0) {
|
|
transform(value.data(), value.length());
|
|
}
|
|
if (validate &&
|
|
!_isKeywordValue(key, value.data(), value.length())) {
|
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
to->setKeywordValue(key, value.data(), errorCode);
|
|
if (U_FAILURE(errorCode)) { return; }
|
|
}
|
|
}
|
|
|
|
void static
|
|
_clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode)
|
|
{
|
|
// Clear Unicode attributes
|
|
locale->setKeywordValue(kAttributeKey, "", errorCode);
|
|
|
|
// Clear all Unicode keyword values
|
|
LocalPointer<icu::StringEnumeration> iter(locale->createUnicodeKeywords(errorCode));
|
|
if (U_FAILURE(errorCode) || iter.isNull()) { return; }
|
|
const char* key;
|
|
while ((key = iter->next(nullptr, errorCode)) != nullptr) {
|
|
locale->setUnicodeKeywordValue(key, nullptr, errorCode);
|
|
}
|
|
}
|
|
|
|
static void
|
|
_setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode)
|
|
{
|
|
// Add the unicode extensions to extensions_
|
|
CharString locale_str("und-u-", errorCode);
|
|
locale_str.append(value, errorCode);
|
|
_copyExtensions(
|
|
Locale::forLanguageTag(locale_str.data(), errorCode),
|
|
locale, false, errorCode);
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
|
|
{
|
|
if (U_FAILURE(status_)) { return *this; }
|
|
if (!UPRV_ISALPHANUM(key)) {
|
|
status_ = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return *this;
|
|
}
|
|
CharString value_str(value, status_);
|
|
if (U_FAILURE(status_)) { return *this; }
|
|
transform(value_str.data(), value_str.length());
|
|
if (!value_str.isEmpty() &&
|
|
!_isExtensionSubtags(key, value_str.data(), value_str.length())) {
|
|
status_ = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return *this;
|
|
}
|
|
if (extensions_ == nullptr) {
|
|
extensions_ = new Locale();
|
|
if (extensions_ == nullptr) {
|
|
status_ = U_MEMORY_ALLOCATION_ERROR;
|
|
return *this;
|
|
}
|
|
}
|
|
if (uprv_tolower(key) != 'u') {
|
|
// for t, x and others extension.
|
|
extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
|
|
status_);
|
|
return *this;
|
|
}
|
|
_clearUAttributesAndKeyType(extensions_, status_);
|
|
if (U_FAILURE(status_)) { return *this; }
|
|
if (!value.empty()) {
|
|
_setUnicodeExtensions(extensions_, value_str, status_);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
|
|
StringPiece key, StringPiece type)
|
|
{
|
|
if (U_FAILURE(status_)) { return *this; }
|
|
if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
|
|
(!type.empty() &&
|
|
!ultag_isUnicodeLocaleType(type.data(), type.length()))) {
|
|
status_ = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return *this;
|
|
}
|
|
if (extensions_ == nullptr) {
|
|
extensions_ = new Locale();
|
|
}
|
|
if (extensions_ == nullptr) {
|
|
status_ = U_MEMORY_ALLOCATION_ERROR;
|
|
return *this;
|
|
}
|
|
extensions_->setUnicodeKeywordValue(key, type, status_);
|
|
return *this;
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
|
|
StringPiece value)
|
|
{
|
|
CharString value_str(value, status_);
|
|
if (U_FAILURE(status_)) { return *this; }
|
|
transform(value_str.data(), value_str.length());
|
|
if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
|
|
status_ = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return *this;
|
|
}
|
|
if (extensions_ == nullptr) {
|
|
extensions_ = new Locale();
|
|
if (extensions_ == nullptr) {
|
|
status_ = U_MEMORY_ALLOCATION_ERROR;
|
|
return *this;
|
|
}
|
|
extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
|
|
return *this;
|
|
}
|
|
|
|
CharString attributes;
|
|
CharStringByteSink sink(&attributes);
|
|
UErrorCode localErrorCode = U_ZERO_ERROR;
|
|
extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
|
|
if (U_FAILURE(localErrorCode)) {
|
|
CharString new_attributes(value_str.data(), status_);
|
|
// No attributes, set the attribute.
|
|
extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
|
|
return *this;
|
|
}
|
|
|
|
transform(attributes.data(),attributes.length());
|
|
const char* start = attributes.data();
|
|
const char* limit = attributes.data() + attributes.length();
|
|
CharString new_attributes;
|
|
bool inserted = false;
|
|
while (start < limit) {
|
|
if (!inserted) {
|
|
int cmp = uprv_strcmp(start, value_str.data());
|
|
if (cmp == 0) { return *this; } // Found it in attributes: Just return
|
|
if (cmp > 0) {
|
|
if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
|
|
new_attributes.append(value_str.data(), status_);
|
|
inserted = true;
|
|
}
|
|
}
|
|
if (!new_attributes.isEmpty()) {
|
|
new_attributes.append('_', status_);
|
|
}
|
|
new_attributes.append(start, status_);
|
|
start += uprv_strlen(start) + 1;
|
|
}
|
|
if (!inserted) {
|
|
if (!new_attributes.isEmpty()) {
|
|
new_attributes.append('_', status_);
|
|
}
|
|
new_attributes.append(value_str.data(), status_);
|
|
}
|
|
// Not yet in the attributes, set the attribute.
|
|
extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
|
|
return *this;
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
|
|
StringPiece value)
|
|
{
|
|
CharString value_str(value, status_);
|
|
if (U_FAILURE(status_)) { return *this; }
|
|
transform(value_str.data(), value_str.length());
|
|
if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
|
|
status_ = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return *this;
|
|
}
|
|
if (extensions_ == nullptr) { return *this; }
|
|
UErrorCode localErrorCode = U_ZERO_ERROR;
|
|
CharString attributes;
|
|
CharStringByteSink sink(&attributes);
|
|
extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
|
|
// get failure, just return
|
|
if (U_FAILURE(localErrorCode)) { return *this; }
|
|
// Do not have any attributes, just return.
|
|
if (attributes.isEmpty()) { return *this; }
|
|
|
|
char* p = attributes.data();
|
|
// Replace null terminiator in place for _ and - so later
|
|
// we can use uprv_strcmp to compare.
|
|
for (int32_t i = 0; i < attributes.length(); i++, p++) {
|
|
*p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
|
|
}
|
|
|
|
const char* start = attributes.data();
|
|
const char* limit = attributes.data() + attributes.length();
|
|
CharString new_attributes;
|
|
bool found = false;
|
|
while (start < limit) {
|
|
if (uprv_strcmp(start, value_str.data()) == 0) {
|
|
found = true;
|
|
} else {
|
|
if (!new_attributes.isEmpty()) {
|
|
new_attributes.append('_', status_);
|
|
}
|
|
new_attributes.append(start, status_);
|
|
}
|
|
start += uprv_strlen(start) + 1;
|
|
}
|
|
// Found the value in attributes, set the attribute.
|
|
if (found) {
|
|
extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::clear()
|
|
{
|
|
status_ = U_ZERO_ERROR;
|
|
language_[0] = 0;
|
|
script_[0] = 0;
|
|
region_[0] = 0;
|
|
delete variant_;
|
|
variant_ = nullptr;
|
|
clearExtensions();
|
|
return *this;
|
|
}
|
|
|
|
LocaleBuilder& LocaleBuilder::clearExtensions()
|
|
{
|
|
delete extensions_;
|
|
extensions_ = nullptr;
|
|
return *this;
|
|
}
|
|
|
|
Locale makeBogusLocale() {
|
|
Locale bogus;
|
|
bogus.setToBogus();
|
|
return bogus;
|
|
}
|
|
|
|
Locale LocaleBuilder::build(UErrorCode& errorCode)
|
|
{
|
|
if (U_FAILURE(errorCode)) {
|
|
return makeBogusLocale();
|
|
}
|
|
if (U_FAILURE(status_)) {
|
|
errorCode = status_;
|
|
return makeBogusLocale();
|
|
}
|
|
CharString locale_str(language_, errorCode);
|
|
if (uprv_strlen(script_) > 0) {
|
|
locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
|
|
}
|
|
if (uprv_strlen(region_) > 0) {
|
|
locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
|
|
}
|
|
if (variant_ != nullptr) {
|
|
locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
|
|
}
|
|
if (U_FAILURE(errorCode)) {
|
|
return makeBogusLocale();
|
|
}
|
|
Locale product(locale_str.data());
|
|
if (extensions_ != nullptr) {
|
|
_copyExtensions(*extensions_, &product, true, errorCode);
|
|
}
|
|
if (U_FAILURE(errorCode)) {
|
|
return makeBogusLocale();
|
|
}
|
|
return product;
|
|
}
|
|
|
|
U_NAMESPACE_END
|