scuffed-code/icu4c/source/common/locid.cpp

1554 lines
42 KiB
C++
Raw Normal View History

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
1999-08-16 21:50:52 +00:00
/*
**********************************************************************
* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
1999-08-16 21:50:52 +00:00
*
* File locid.cpp
*
* Created by: Richard Gillam
*
* Modification History:
*
* Date Name Description
* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
1999-08-16 21:50:52 +00:00
* methods to get and set it.
* 04/02/97 aliu Made operator!= inline; fixed return value
1999-08-16 21:50:52 +00:00
* of getName().
* 04/15/97 aliu Cleanup for AIX/Win32.
* 04/24/97 aliu Numerous changes per code review.
* 08/18/98 stephen Changed getDisplayName()
* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
* Added getISOCountries(), getISOLanguages(),
* getLanguagesForCountry()
* 03/16/99 bertrand rehaul.
* 07/21/99 stephen Added U_CFUNC setDefault
* 11/09/99 weiv Added const char * getName() const;
* 04/12/00 srl removing unicodestring api's and cached hash code
* 08/10/01 grhoten Change the static Locales to accessor functions
******************************************************************************
1999-08-16 21:50:52 +00:00
*/
#include <utility>
1999-08-16 21:50:52 +00:00
#include "unicode/bytestream.h"
#include "unicode/locid.h"
#include "unicode/strenum.h"
#include "unicode/stringpiece.h"
#include "unicode/uloc.h"
#include "putilimp.h"
#include "mutex.h"
#include "umutex.h"
#include "uassert.h"
1999-08-16 21:50:52 +00:00
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
#include "uhash.h"
#include "ulocimp.h"
#include "ucln_cmn.h"
#include "ustr_imp.h"
#include "charstr.h"
#include "bytesinkutil.h"
1999-08-16 21:50:52 +00:00
U_CDECL_BEGIN
static UBool U_CALLCONV locale_cleanup(void);
U_CDECL_END
U_NAMESPACE_BEGIN
static Locale *gLocaleCache = NULL;
static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER;
// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
static UMutex gDefaultLocaleMutex = U_MUTEX_INITIALIZER;
static UHashtable *gDefaultLocalesHashT = NULL;
static Locale *gDefaultLocale = NULL;
/**
* \def ULOC_STRING_LIMIT
* strings beyond this value crash in CharString
*/
#define ULOC_STRING_LIMIT 357913941
U_NAMESPACE_END
typedef enum ELocalePos {
eENGLISH,
eFRENCH,
eGERMAN,
eITALIAN,
eJAPANESE,
eKOREAN,
eCHINESE,
eFRANCE,
eGERMANY,
eITALY,
eJAPAN,
eKOREA,
eCHINA, /* Alias for PRC */
eTAIWAN,
eUK,
eUS,
eCANADA,
eCANADA_FRENCH,
eROOT,
//eDEFAULT,
eMAX_LOCALES
} ELocalePos;
U_CFUNC int32_t locale_getKeywords(const char *localeID,
char prev,
char *keywords, int32_t keywordCapacity,
char *values, int32_t valuesCapacity, int32_t *valLen,
UBool valuesToo,
UErrorCode *status);
U_CDECL_BEGIN
//
// Deleter function for Locales owned by the default Locale hash table/
//
static void U_CALLCONV
deleteLocale(void *obj) {
delete (icu::Locale *) obj;
}
static UBool U_CALLCONV locale_cleanup(void)
{
U_NAMESPACE_USE
delete [] gLocaleCache;
gLocaleCache = NULL;
gLocaleCacheInitOnce.reset();
if (gDefaultLocalesHashT) {
uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
gDefaultLocalesHashT = NULL;
}
gDefaultLocale = NULL;
return TRUE;
}
static void U_CALLCONV locale_init(UErrorCode &status) {
U_NAMESPACE_USE
U_ASSERT(gLocaleCache == NULL);
gLocaleCache = new Locale[(int)eMAX_LOCALES];
if (gLocaleCache == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
gLocaleCache[eROOT] = Locale("");
gLocaleCache[eENGLISH] = Locale("en");
gLocaleCache[eFRENCH] = Locale("fr");
gLocaleCache[eGERMAN] = Locale("de");
gLocaleCache[eITALIAN] = Locale("it");
gLocaleCache[eJAPANESE] = Locale("ja");
gLocaleCache[eKOREAN] = Locale("ko");
gLocaleCache[eCHINESE] = Locale("zh");
gLocaleCache[eFRANCE] = Locale("fr", "FR");
gLocaleCache[eGERMANY] = Locale("de", "DE");
gLocaleCache[eITALY] = Locale("it", "IT");
gLocaleCache[eJAPAN] = Locale("ja", "JP");
gLocaleCache[eKOREA] = Locale("ko", "KR");
gLocaleCache[eCHINA] = Locale("zh", "CN");
gLocaleCache[eTAIWAN] = Locale("zh", "TW");
gLocaleCache[eUK] = Locale("en", "GB");
gLocaleCache[eUS] = Locale("en", "US");
gLocaleCache[eCANADA] = Locale("en", "CA");
gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
}
U_CDECL_END
U_NAMESPACE_BEGIN
Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
// Synchronize this entire function.
Mutex lock(&gDefaultLocaleMutex);
UBool canonicalize = FALSE;
// If given a NULL string for the locale id, grab the default
// name from the system.
// (Different from most other locale APIs, where a null name means use
// the current ICU default locale.)
if (id == NULL) {
id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
canonicalize = TRUE; // always canonicalize host ID
}
char localeNameBuf[512];
if (canonicalize) {
uloc_canonicalize(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
} else {
uloc_getName(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
}
localeNameBuf[sizeof(localeNameBuf)-1] = 0; // Force null termination in event of
// a long name filling the buffer.
// (long names are truncated.)
//
if (U_FAILURE(status)) {
return gDefaultLocale;
}
if (gDefaultLocalesHashT == NULL) {
gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
if (U_FAILURE(status)) {
return gDefaultLocale;
}
uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
}
Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf);
if (newDefault == NULL) {
newDefault = new Locale(Locale::eBOGUS);
if (newDefault == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return gDefaultLocale;
}
newDefault->init(localeNameBuf, FALSE);
uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
if (U_FAILURE(status)) {
return gDefaultLocale;
}
}
gDefaultLocale = newDefault;
return gDefaultLocale;
}
U_NAMESPACE_END
/* sfb 07/21/99 */
U_CFUNC void
locale_set_default(const char *id)
{
U_NAMESPACE_USE
UErrorCode status = U_ZERO_ERROR;
locale_set_default_internal(id, status);
}
/* end */
U_CFUNC const char *
locale_get_default(void)
{
U_NAMESPACE_USE
return Locale::getDefault().getName();
}
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
/*Character separating the posix id fields*/
// '_'
// In the platform codepage.
#define SEP_CHAR '_'
1999-08-16 21:50:52 +00:00
Locale::~Locale()
{
if (baseName != fullName) {
uprv_free(baseName);
}
baseName = NULL;
/*if fullName is on the heap, we free it*/
if (fullName != fullNameBuffer)
1999-08-16 21:50:52 +00:00
{
uprv_free(fullName);
fullName = NULL;
1999-08-16 21:50:52 +00:00
}
}
Locale::Locale()
: UObject(), fullName(fullNameBuffer), baseName(NULL)
1999-08-16 21:50:52 +00:00
{
init(NULL, FALSE);
1999-08-16 21:50:52 +00:00
}
/*
* Internal constructor to allow construction of a locale object with
* NO side effects. (Default constructor tries to get
* the default locale.)
*/
Locale::Locale(Locale::ELocaleType)
: UObject(), fullName(fullNameBuffer), baseName(NULL)
{
setToBogus();
}
Locale::Locale( const char * newLanguage,
const char * newCountry,
const char * newVariant,
const char * newKeywords)
: UObject(), fullName(fullNameBuffer), baseName(NULL)
1999-08-16 21:50:52 +00:00
{
if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) )
{
init(NULL, FALSE); /* shortcut */
}
else
{
UErrorCode status = U_ZERO_ERROR;
int32_t size = 0;
int32_t lsize = 0;
int32_t csize = 0;
int32_t vsize = 0;
int32_t ksize = 0;
// Calculate the size of the resulting string.
// Language
if ( newLanguage != NULL )
{
lsize = (int32_t)uprv_strlen(newLanguage);
if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
setToBogus();
return;
}
size = lsize;
}
CharString togo(newLanguage, lsize, status); // start with newLanguage
// _Country
if ( newCountry != NULL )
{
csize = (int32_t)uprv_strlen(newCountry);
if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
setToBogus();
return;
}
size += csize;
}
// _Variant
if ( newVariant != NULL )
{
// remove leading _'s
while(newVariant[0] == SEP_CHAR)
{
newVariant++;
}
// remove trailing _'s
vsize = (int32_t)uprv_strlen(newVariant);
if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
setToBogus();
return;
}
while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
{
vsize--;
}
}
if( vsize > 0 )
{
size += vsize;
}
// Separator rules:
if ( vsize > 0 )
{
size += 2; // at least: __v
}
else if ( csize > 0 )
{
size += 1; // at least: _v
}
if ( newKeywords != NULL)
{
ksize = (int32_t)uprv_strlen(newKeywords);
if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
setToBogus();
return;
}
size += ksize + 1;
}
// NOW we have the full locale string..
// Now, copy it back.
// newLanguage is already copied
if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
{ // ^
togo.append(SEP_CHAR, status);
}
if ( csize != 0 )
{
togo.append(newCountry, status);
}
if ( vsize != 0)
{
togo.append(SEP_CHAR, status)
.append(newVariant, vsize, status);
}
if ( ksize != 0)
{
if (uprv_strchr(newKeywords, '=')) {
togo.append('@', status); /* keyword parsing */
}
else {
togo.append('_', status); /* Variant parsing with a script */
if ( vsize == 0) {
togo.append('_', status); /* No country found */
}
}
togo.append(newKeywords, status);
}
if (U_FAILURE(status)) {
// Something went wrong with appending, etc.
setToBogus();
return;
}
// Parse it, because for example 'language' might really be a complete
// string.
init(togo.data(), FALSE);
}
1999-08-16 21:50:52 +00:00
}
Locale::Locale(const Locale &other)
: UObject(other), fullName(fullNameBuffer), baseName(NULL)
1999-08-16 21:50:52 +00:00
{
*this = other;
1999-08-16 21:50:52 +00:00
}
Locale::Locale(Locale&& other) U_NOEXCEPT
: UObject(other), fullName(fullNameBuffer), baseName(fullName) {
*this = std::move(other);
}
Locale& Locale::operator=(const Locale& other) {
if (this == &other) {
return *this;
}
setToBogus();
if (other.fullName == other.fullNameBuffer) {
uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
} else if (other.fullName == nullptr) {
fullName = nullptr;
} else {
fullName = uprv_strdup(other.fullName);
if (fullName == nullptr) return *this;
}
if (other.baseName == other.fullName) {
baseName = fullName;
} else if (other.baseName != nullptr) {
baseName = uprv_strdup(other.baseName);
if (baseName == nullptr) return *this;
}
uprv_strcpy(language, other.language);
uprv_strcpy(script, other.script);
uprv_strcpy(country, other.country);
variantBegin = other.variantBegin;
fIsBogus = other.fIsBogus;
return *this;
}
Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
if (baseName != fullName) uprv_free(baseName);
if (fullName != fullNameBuffer) uprv_free(fullName);
if (other.fullName == other.fullNameBuffer) {
uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
fullName = fullNameBuffer;
} else {
fullName = other.fullName;
}
if (other.baseName == other.fullName) {
baseName = fullName;
} else {
baseName = other.baseName;
}
uprv_strcpy(language, other.language);
uprv_strcpy(script, other.script);
uprv_strcpy(country, other.country);
variantBegin = other.variantBegin;
fIsBogus = other.fIsBogus;
other.baseName = other.fullName = other.fullNameBuffer;
return *this;
}
Locale *
Locale::clone() const {
return new Locale(*this);
}
UBool
Locale::operator==( const Locale& other) const
{
return (uprv_strcmp(other.fullName, fullName) == 0);
1999-08-16 21:50:52 +00:00
}
#define ISASCIIALPHA(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
1999-08-16 21:50:52 +00:00
/*This function initializes a Locale from a C locale ID*/
Locale& Locale::init(const char* localeID, UBool canonicalize)
1999-08-16 21:50:52 +00:00
{
fIsBogus = FALSE;
/* Free our current storage */
if (baseName != fullName) {
uprv_free(baseName);
}
baseName = NULL;
if(fullName != fullNameBuffer) {
uprv_free(fullName);
fullName = fullNameBuffer;
}
// not a loop:
// just an easy way to have a common error-exit
// without goto and without another function
do {
char *separator;
char *field[5] = {0};
int32_t fieldLen[5] = {0};
int32_t fieldIdx;
int32_t variantField;
int32_t length;
UErrorCode err;
if(localeID == NULL) {
// not an error, just set the default locale
return *this = getDefault();
}
/* preset all fields to empty */
language[0] = script[0] = country[0] = 0;
// "canonicalize" the locale ID to ICU/Java format
err = U_ZERO_ERROR;
length = canonicalize ?
uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
/*Go to heap for the fullName if necessary*/
fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
if(fullName == 0) {
fullName = fullNameBuffer;
break; // error: out of memory
}
err = U_ZERO_ERROR;
length = canonicalize ?
uloc_canonicalize(localeID, fullName, length+1, &err) :
uloc_getName(localeID, fullName, length+1, &err);
}
if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
/* should never occur */
break;
}
variantBegin = length;
/* after uloc_getName/canonicalize() we know that only '_' are separators */
separator = field[0] = fullName;
fieldIdx = 1;
while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 && fieldIdx < UPRV_LENGTHOF(field)-1) {
field[fieldIdx] = separator + 1;
fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
fieldIdx++;
}
// variant may contain @foo or .foo POSIX cruft; remove it
separator = uprv_strchr(field[fieldIdx-1], '@');
char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
if (separator!=NULL || sep2!=NULL) {
if (separator==NULL || (sep2!=NULL && separator > sep2)) {
separator = sep2;
}
fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
} else {
fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
}
if (fieldLen[0] >= (int32_t)(sizeof(language)))
{
break; // error: the language field is too long
}
variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
if (fieldLen[0] > 0) {
/* We have a language */
uprv_memcpy(language, fullName, fieldLen[0]);
language[fieldLen[0]] = 0;
}
if (fieldLen[1] == 4 && ISASCIIALPHA(field[1][0]) &&
ISASCIIALPHA(field[1][1]) && ISASCIIALPHA(field[1][2]) &&
ISASCIIALPHA(field[1][3])) {
/* We have at least a script */
uprv_memcpy(script, field[1], fieldLen[1]);
script[fieldLen[1]] = 0;
variantField++;
}
if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
/* We have a country */
uprv_memcpy(country, field[variantField], fieldLen[variantField]);
country[fieldLen[variantField]] = 0;
variantField++;
} else if (fieldLen[variantField] == 0) {
variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
}
if (fieldLen[variantField] > 0) {
/* We have a variant */
variantBegin = (int32_t)(field[variantField] - fullName);
}
1999-08-16 21:50:52 +00:00
err = U_ZERO_ERROR;
initBaseName(err);
if (U_FAILURE(err)) {
break;
}
// successful end of init()
return *this;
2005-08-15 21:50:56 +00:00
} while(0); /*loop doesn't iterate*/
// when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
setToBogus();
return *this;
1999-08-16 21:50:52 +00:00
}
/*
* Set up the base name.
* If there are no key words, it's exactly the full name.
* If key words exist, it's the full name truncated at the '@' character.
* Need to set up both at init() and after setting a keyword.
*/
void
Locale::initBaseName(UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
U_ASSERT(baseName==NULL || baseName==fullName);
const char *atPtr = uprv_strchr(fullName, '@');
const char *eqPtr = uprv_strchr(fullName, '=');
if (atPtr && eqPtr && atPtr < eqPtr) {
// Key words exist.
int32_t baseNameLength = (int32_t)(atPtr - fullName);
baseName = (char *)uprv_malloc(baseNameLength + 1);
if (baseName == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_strncpy(baseName, fullName, baseNameLength);
baseName[baseNameLength] = 0;
// The original computation of variantBegin leaves it equal to the length
// of fullName if there is no variant. It should instead be
// the length of the baseName.
if (variantBegin > baseNameLength) {
variantBegin = baseNameLength;
}
} else {
baseName = fullName;
}
}
1999-08-16 21:50:52 +00:00
int32_t
Locale::hashCode() const
1999-08-16 21:50:52 +00:00
{
return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
1999-08-16 21:50:52 +00:00
}
void
Locale::setToBogus() {
/* Free our current storage */
if(baseName != fullName) {
uprv_free(baseName);
}
baseName = NULL;
if(fullName != fullNameBuffer) {
uprv_free(fullName);
fullName = fullNameBuffer;
}
*fullNameBuffer = 0;
*language = 0;
*script = 0;
*country = 0;
fIsBogus = TRUE;
variantBegin = 0;
}
1999-08-16 21:50:52 +00:00
const Locale& U_EXPORT2
Locale::getDefault()
1999-08-16 21:50:52 +00:00
{
{
Mutex lock(&gDefaultLocaleMutex);
if (gDefaultLocale != NULL) {
return *gDefaultLocale;
}
}
UErrorCode status = U_ZERO_ERROR;
return *locale_set_default_internal(NULL, status);
1999-08-16 21:50:52 +00:00
}
void U_EXPORT2
Locale::setDefault( const Locale& newLocale,
UErrorCode& status)
1999-08-16 21:50:52 +00:00
{
if (U_FAILURE(status)) {
return;
}
/* Set the default from the full name string of the supplied locale.
* This is a convenient way to access the default locale caching mechanisms.
*/
const char *localeID = newLocale.getName();
locale_set_default_internal(localeID, status);
1999-08-16 21:50:52 +00:00
}
void
Locale::addLikelySubtags(UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
// The maximized locale ID string is often longer, but there is no good
// heuristic to estimate just how much longer. Leave that to CharString.
CharString maximizedLocaleID;
int32_t maximizedLocaleIDCapacity = static_cast<int32_t>(uprv_strlen(fullName));
char* buffer;
int32_t reslen;
for (;;) {
buffer = maximizedLocaleID.getAppendBuffer(
/*minCapacity=*/maximizedLocaleIDCapacity,
/*desiredCapacityHint=*/maximizedLocaleIDCapacity,
maximizedLocaleIDCapacity,
status);
if (U_FAILURE(status)) {
return;
}
reslen = uloc_addLikelySubtags(
fullName,
buffer,
maximizedLocaleIDCapacity,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
maximizedLocaleIDCapacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return;
}
maximizedLocaleID.append(buffer, reslen, status);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators provided by CharString.
}
if (U_FAILURE(status)) {
return;
}
init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
if (isBogus()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
void
Locale::minimizeSubtags(UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
// Except for a few edge cases (like the empty string, that is minimized to
// "en__POSIX"), minimized locale ID strings will be either the same length
// or shorter than their input.
CharString minimizedLocaleID;
int32_t minimizedLocaleIDCapacity = static_cast<int32_t>(uprv_strlen(fullName));
char* buffer;
int32_t reslen;
for (;;) {
buffer = minimizedLocaleID.getAppendBuffer(
/*minCapacity=*/minimizedLocaleIDCapacity,
/*desiredCapacityHint=*/minimizedLocaleIDCapacity,
minimizedLocaleIDCapacity,
status);
if (U_FAILURE(status)) {
return;
}
reslen = uloc_minimizeSubtags(
fullName,
buffer,
minimizedLocaleIDCapacity,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
// Because of the internal minimal buffer size of CharString, I can't
// think of any input data for which this could possibly ever happen.
// Maybe it would be better replaced with an assertion instead?
minimizedLocaleIDCapacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return;
}
minimizedLocaleID.append(buffer, reslen, status);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators provided by CharString.
}
if (U_FAILURE(status)) {
return;
}
init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
if (isBogus()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
Locale U_EXPORT2
Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
{
Locale result(Locale::eBOGUS);
if (U_FAILURE(status)) {
return result;
}
// If a BCP-47 language tag is passed as the language parameter to the
// normal Locale constructor, it will actually fall back to invoking
// uloc_forLanguageTag() to parse it if it somehow is able to detect that
// the string actually is BCP-47. This works well for things like strings
// using BCP-47 extensions, but it does not at all work for things like
// BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
// interpret as ICU locale IDs and because of that won't trigger the BCP-47
// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
// and then Locale::init(), instead of just calling the normal constructor.
// All simple language tags will have the exact same length as ICU locale
// ID strings as they have as BCP-47 strings (like "en_US" for "en-US").
CharString localeID;
int32_t resultCapacity = tag.size();
char* buffer;
int32_t parsedLength, reslen;
for (;;) {
buffer = localeID.getAppendBuffer(
/*minCapacity=*/resultCapacity,
/*desiredCapacityHint=*/resultCapacity,
resultCapacity,
status);
if (U_FAILURE(status)) {
return result;
}
reslen = ulocimp_forLanguageTag(
tag.data(),
tag.length(),
buffer,
resultCapacity,
&parsedLength,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
// For all BCP-47 language tags that use extensions, the corresponding
// ICU locale ID will be longer but uloc_forLanguageTag() does compute
// the exact length needed so this memory reallocation will be done at
// most once.
resultCapacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return result;
}
if (parsedLength != tag.size()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return result;
}
localeID.append(buffer, reslen, status);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators provided by CharString.
}
if (U_FAILURE(status)) {
return result;
}
result.init(localeID.data(), /*canonicalize=*/FALSE);
if (result.isBogus()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
return result;
}
void
Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
{
if (U_FAILURE(status)) {
return;
}
if (fIsBogus) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// All simple language tags will have the exact same length as BCP-47
// strings as they have as ICU locale IDs (like "en-US" for "en_US").
LocalMemory<char> scratch;
int32_t scratch_capacity = static_cast<int32_t>(uprv_strlen(fullName));
if (scratch_capacity == 0) {
scratch_capacity = 3; // "und"
}
char* buffer;
int32_t result_capacity, reslen;
for (;;) {
if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
buffer = sink.GetAppendBuffer(
/*min_capacity=*/scratch_capacity,
/*desired_capacity_hint=*/scratch_capacity,
scratch.getAlias(),
scratch_capacity,
&result_capacity);
reslen = uloc_toLanguageTag(
fullName,
buffer,
result_capacity,
/*strict=*/FALSE,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
// For some very few edge cases a language tag will be longer as a
// BCP-47 string than it is as an ICU locale ID. Most notoriously "C"
// expands to the BCP-47 tag "en-US-u-va-posix", 16 times longer, and
// it'll take several calls to uloc_toLanguageTag() to figure that out.
// https://unicode-org.atlassian.net/browse/ICU-20132
scratch_capacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return;
}
sink.Append(buffer, reslen);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators not used.
}
}
Locale U_EXPORT2
Locale::createFromName (const char *name)
{
if (name) {
Locale l("");
l.init(name, FALSE);
return l;
}
else {
return getDefault();
}
}
Locale U_EXPORT2
Locale::createCanonical(const char* name) {
Locale loc("");
loc.init(name, TRUE);
return loc;
}
const char *
Locale::getISO3Language() const
{
return uloc_getISO3Language(fullName);
}
const char *
Locale::getISO3Country() const
{
return uloc_getISO3Country(fullName);
}
1999-08-16 21:50:52 +00:00
/**
* Return the LCID value as specified in the "LocaleID" resource for this
* locale. The LocaleID must be expressed as a hexadecimal number, from
* one to four digits. If the LocaleID resource is not present, or is
* in an incorrect format, 0 is returned. The LocaleID is for use in
* Windows (it is an LCID), but is available on all platforms.
*/
uint32_t
1999-08-16 21:50:52 +00:00
Locale::getLCID() const
{
return uloc_getLCID(fullName);
}
const char* const* U_EXPORT2 Locale::getISOCountries()
{
return uloc_getISOCountries();
}
1999-08-16 21:50:52 +00:00
const char* const* U_EXPORT2 Locale::getISOLanguages()
{
return uloc_getISOLanguages();
}
// Set the locale's data based on a posix id.
void Locale::setFromPOSIXID(const char *posixID)
{
init(posixID, TRUE);
}
const Locale & U_EXPORT2
Locale::getRoot(void)
{
return getLocale(eROOT);
}
const Locale & U_EXPORT2
Locale::getEnglish(void)
1999-08-16 21:50:52 +00:00
{
return getLocale(eENGLISH);
}
1999-08-16 21:50:52 +00:00
const Locale & U_EXPORT2
Locale::getFrench(void)
{
return getLocale(eFRENCH);
}
1999-08-16 21:50:52 +00:00
const Locale & U_EXPORT2
Locale::getGerman(void)
{
return getLocale(eGERMAN);
1999-08-16 21:50:52 +00:00
}
const Locale & U_EXPORT2
Locale::getItalian(void)
1999-08-16 21:50:52 +00:00
{
return getLocale(eITALIAN);
1999-08-16 21:50:52 +00:00
}
const Locale & U_EXPORT2
Locale::getJapanese(void)
{
return getLocale(eJAPANESE);
}
const Locale & U_EXPORT2
Locale::getKorean(void)
{
return getLocale(eKOREAN);
}
const Locale & U_EXPORT2
Locale::getChinese(void)
{
return getLocale(eCHINESE);
}
const Locale & U_EXPORT2
Locale::getSimplifiedChinese(void)
{
return getLocale(eCHINA);
}
const Locale & U_EXPORT2
Locale::getTraditionalChinese(void)
{
return getLocale(eTAIWAN);
}
const Locale & U_EXPORT2
Locale::getFrance(void)
{
return getLocale(eFRANCE);
1999-08-16 21:50:52 +00:00
}
const Locale & U_EXPORT2
Locale::getGermany(void)
{
return getLocale(eGERMANY);
}
1999-08-16 21:50:52 +00:00
const Locale & U_EXPORT2
Locale::getItaly(void)
{
return getLocale(eITALY);
}
1999-08-16 21:50:52 +00:00
const Locale & U_EXPORT2
Locale::getJapan(void)
{
return getLocale(eJAPAN);
}
1999-08-16 21:50:52 +00:00
const Locale & U_EXPORT2
Locale::getKorea(void)
{
return getLocale(eKOREA);
}
1999-08-16 21:50:52 +00:00
const Locale & U_EXPORT2
Locale::getChina(void)
{
return getLocale(eCHINA);
}
1999-08-16 21:50:52 +00:00
const Locale & U_EXPORT2
Locale::getPRC(void)
1999-08-16 21:50:52 +00:00
{
return getLocale(eCHINA);
1999-08-16 21:50:52 +00:00
}
const Locale & U_EXPORT2
Locale::getTaiwan(void)
{
return getLocale(eTAIWAN);
}
const Locale & U_EXPORT2
Locale::getUK(void)
{
return getLocale(eUK);
}
const Locale & U_EXPORT2
Locale::getUS(void)
1999-08-16 21:50:52 +00:00
{
return getLocale(eUS);
}
const Locale & U_EXPORT2
Locale::getCanada(void)
{
return getLocale(eCANADA);
1999-08-16 21:50:52 +00:00
}
const Locale & U_EXPORT2
Locale::getCanadaFrench(void)
{
return getLocale(eCANADA_FRENCH);
}
const Locale &
Locale::getLocale(int locid)
{
Locale *localeCache = getLocaleCache();
2005-08-15 21:50:56 +00:00
U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
if (localeCache == NULL) {
// Failure allocating the locale cache.
// The best we can do is return a NULL reference.
locid = 0;
}
2005-08-15 21:50:56 +00:00
return localeCache[locid]; /*operating on NULL*/
}
/*
This function is defined this way in order to get around static
initialization and static destruction.
*/
Locale *
Locale::getLocaleCache(void)
{
UErrorCode status = U_ZERO_ERROR;
umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
return gLocaleCache;
}
class KeywordEnumeration : public StringEnumeration {
private:
char *keywords;
char *current;
int32_t length;
UnicodeString currUSKey;
static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
public:
static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
public:
KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
: keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
if(U_SUCCESS(status) && keywordLen != 0) {
if(keys == NULL || keywordLen < 0) {
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
keywords = (char *)uprv_malloc(keywordLen+1);
if (keywords == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
else {
uprv_memcpy(keywords, keys, keywordLen);
keywords[keywordLen] = 0;
current = keywords + currentIndex;
length = keywordLen;
}
}
}
}
virtual ~KeywordEnumeration();
virtual StringEnumeration * clone() const
{
UErrorCode status = U_ZERO_ERROR;
return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
}
virtual int32_t count(UErrorCode &/*status*/) const {
char *kw = keywords;
int32_t result = 0;
while(*kw) {
result++;
kw += uprv_strlen(kw)+1;
}
return result;
}
virtual const char* next(int32_t* resultLength, UErrorCode& status) {
const char* result;
int32_t len;
if(U_SUCCESS(status) && *current != 0) {
result = current;
len = (int32_t)uprv_strlen(current);
current += len+1;
if(resultLength != NULL) {
*resultLength = len;
}
} else {
if(resultLength != NULL) {
*resultLength = 0;
}
result = NULL;
}
return result;
}
virtual const UnicodeString* snext(UErrorCode& status) {
int32_t resultLength = 0;
const char *s = next(&resultLength, status);
return setChars(s, resultLength, status);
}
virtual void reset(UErrorCode& /*status*/) {
current = keywords;
}
};
const char KeywordEnumeration::fgClassID = '\0';
KeywordEnumeration::~KeywordEnumeration() {
uprv_free(keywords);
}
// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
// the next() method for each keyword before returning it.
class UnicodeKeywordEnumeration : public KeywordEnumeration {
public:
using KeywordEnumeration::KeywordEnumeration;
virtual ~UnicodeKeywordEnumeration() = default;
virtual const char* next(int32_t* resultLength, UErrorCode& status) {
const char* legacy_key = KeywordEnumeration::next(nullptr, status);
if (U_SUCCESS(status) && legacy_key != nullptr) {
const char* key = uloc_toUnicodeLocaleKey(legacy_key);
if (key == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
if (resultLength != nullptr) {
*resultLength = static_cast<int32_t>(uprv_strlen(key));
}
return key;
}
}
if (resultLength != nullptr) *resultLength = 0;
return nullptr;
}
};
StringEnumeration *
Locale::createKeywords(UErrorCode &status) const
{
char keywords[256];
int32_t keywordCapacity = sizeof keywords;
StringEnumeration *result = NULL;
if (U_FAILURE(status)) {
return result;
}
const char* variantStart = uprv_strchr(fullName, '@');
const char* assignment = uprv_strchr(fullName, '=');
if(variantStart) {
if(assignment > variantStart) {
int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
if(U_SUCCESS(status) && keyLen) {
result = new KeywordEnumeration(keywords, keyLen, 0, status);
if (!result) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
} else {
status = U_INVALID_FORMAT_ERROR;
}
}
return result;
}
StringEnumeration *
Locale::createUnicodeKeywords(UErrorCode &status) const
{
char keywords[256];
int32_t keywordCapacity = sizeof keywords;
StringEnumeration *result = NULL;
if (U_FAILURE(status)) {
return result;
}
const char* variantStart = uprv_strchr(fullName, '@');
const char* assignment = uprv_strchr(fullName, '=');
if(variantStart) {
if(assignment > variantStart) {
int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
if(U_SUCCESS(status) && keyLen) {
result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status);
if (!result) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
} else {
status = U_INVALID_FORMAT_ERROR;
}
}
return result;
}
int32_t
Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
{
return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
}
void
Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
if (U_FAILURE(status)) {
return;
}
if (fIsBogus) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
if (U_FAILURE(status)) {
return;
}
LocalMemory<char> scratch;
int32_t scratch_capacity = 16; // Arbitrarily chosen default size.
char* buffer;
int32_t result_capacity, reslen;
for (;;) {
if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
buffer = sink.GetAppendBuffer(
/*min_capacity=*/scratch_capacity,
/*desired_capacity_hint=*/scratch_capacity,
scratch.getAlias(),
scratch_capacity,
&result_capacity);
reslen = uloc_getKeywordValue(
fullName,
keywordName_nul.data(),
buffer,
result_capacity,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
scratch_capacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return;
}
sink.Append(buffer, reslen);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators not used.
}
}
void
Locale::getUnicodeKeywordValue(StringPiece keywordName,
ByteSink& sink,
UErrorCode& status) const {
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
if (U_FAILURE(status)) {
return;
}
const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
if (legacy_key == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
CharString legacy_value;
{
CharStringByteSink sink(&legacy_value);
getKeywordValue(legacy_key, sink, status);
}
if (U_FAILURE(status)) {
return;
}
const char* unicode_value = uloc_toUnicodeLocaleType(
keywordName_nul.data(), legacy_value.data());
if (unicode_value == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
}
void
Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
{
uloc_setKeywordValue(keywordName, keywordValue, fullName, ULOC_FULLNAME_CAPACITY, &status);
if (U_SUCCESS(status) && baseName == fullName) {
// May have added the first keyword, meaning that the fullName is no longer also the baseName.
initBaseName(status);
}
}
void
Locale::setKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status) {
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
const CharString keywordValue_nul(keywordValue, status);
setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
}
void
Locale::setUnicodeKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status) {
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
const CharString keywordValue_nul(keywordValue, status);
if (U_FAILURE(status)) {
return;
}
const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
if (legacy_key == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
const char* legacy_value =
uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
if (legacy_value == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
setKeywordValue(legacy_key, legacy_value, status);
}
const char *
Locale::getBaseName() const {
return baseName;
}
1999-08-16 21:50:52 +00:00
//eof
U_NAMESPACE_END