ICU-10751 uscript_getCode(locale) use likely subtags not LocaleScript locale data
X-SVN-Rev: 36280
This commit is contained in:
parent
c955c14a6b
commit
e353b8e897
@ -423,15 +423,16 @@ typedef enum UScriptCode {
|
||||
} UScriptCode;
|
||||
|
||||
/**
|
||||
* Gets script codes associated with the given locale or ISO 15924 abbreviation or name.
|
||||
* Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
|
||||
* Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
|
||||
* Fills in USCRIPT_LATIN given "en" OR "en_US"
|
||||
* If required capacity is greater than capacity of the destination buffer then the error code
|
||||
* is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
|
||||
* If the required capacity is greater than the capacity of the destination buffer,
|
||||
* then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
|
||||
*
|
||||
* <p>Note: To search by short or long script alias only, use
|
||||
* u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does
|
||||
* u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does
|
||||
* a fast lookup with no access of the locale data.
|
||||
*
|
||||
* @param nameOrAbbrOrLocale name of the script, as given in
|
||||
* PropertyValueAliases.txt, or ISO 15924 code or locale
|
||||
* @param fillIn the UScriptCode buffer to fill in the script code
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Copyright (C) 1997-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
@ -13,85 +13,126 @@
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "uprops.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
||||
static const char kLocaleScript[] = "LocaleScript";
|
||||
static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
|
||||
static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
|
||||
static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
|
||||
|
||||
/* TODO: this is a bad API should be deprecated */
|
||||
static int32_t
|
||||
setCodes(const UScriptCode *src, int32_t length,
|
||||
UScriptCode *dest, int32_t capacity, UErrorCode *err) {
|
||||
int32_t i;
|
||||
if(U_FAILURE(*err)) { return 0; }
|
||||
if(length > capacity) {
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
return length;
|
||||
}
|
||||
for(i = 0; i < length; ++i) {
|
||||
dest[i] = src[i];
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
|
||||
if(U_FAILURE(*err)) { return 0; }
|
||||
if(1 > capacity) {
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
return 1;
|
||||
}
|
||||
scripts[0] = script;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
getCodesFromLocale(const char *locale,
|
||||
UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
|
||||
UErrorCode internalErrorCode = U_ZERO_ERROR;
|
||||
char lang[8];
|
||||
char script[8];
|
||||
int32_t langLength, scriptLength;
|
||||
if(U_FAILURE(*err)) { return 0; }
|
||||
// Multi-script languages, equivalent to the LocaleScript data
|
||||
// that we used to load from locale resource bundles.
|
||||
langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode);
|
||||
if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
return 0;
|
||||
}
|
||||
if(0 == uprv_strcmp(lang, "ja")) {
|
||||
return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
|
||||
}
|
||||
if(0 == uprv_strcmp(lang, "ko")) {
|
||||
return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
|
||||
}
|
||||
scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode);
|
||||
if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
return 0;
|
||||
}
|
||||
if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) {
|
||||
return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
|
||||
}
|
||||
// Explicit script code.
|
||||
if(scriptLength != 0) {
|
||||
UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
|
||||
if(scriptCode != USCRIPT_INVALID_CODE) {
|
||||
if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
|
||||
scriptCode = USCRIPT_HAN;
|
||||
}
|
||||
return setOneCode(scriptCode, scripts, capacity, err);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* TODO: this is a bad API and should be deprecated, ticket #11141 */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uscript_getCode(const char* nameOrAbbrOrLocale,
|
||||
UScriptCode* fillIn,
|
||||
int32_t capacity,
|
||||
UErrorCode* err){
|
||||
|
||||
UScriptCode code = USCRIPT_INVALID_CODE;
|
||||
int32_t numFilled=0;
|
||||
int32_t len=0;
|
||||
/* check arguments */
|
||||
if(err==NULL ||U_FAILURE(*err)){
|
||||
return numFilled;
|
||||
if(U_FAILURE(*err)) {
|
||||
return 0;
|
||||
}
|
||||
if(nameOrAbbrOrLocale==NULL || fillIn == NULL || capacity<0){
|
||||
if(nameOrAbbrOrLocale==NULL ||
|
||||
(fillIn == NULL ? capacity != 0 : capacity < 0)) {
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return numFilled;
|
||||
return 0;
|
||||
}
|
||||
|
||||
UBool triedCode = FALSE;
|
||||
if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){
|
||||
/* try long and abbreviated script names first */
|
||||
code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
|
||||
|
||||
}
|
||||
if(code==(UScriptCode)UCHAR_INVALID_CODE){
|
||||
/* Do not propagate error codes from just not finding a locale bundle. */
|
||||
UErrorCode localErrorCode = U_ZERO_ERROR;
|
||||
UResourceBundle* resB = ures_open(NULL,nameOrAbbrOrLocale,&localErrorCode);
|
||||
if(U_SUCCESS(localErrorCode)&& localErrorCode != U_USING_DEFAULT_WARNING){
|
||||
UResourceBundle* resD = ures_getByKey(resB,kLocaleScript,NULL,&localErrorCode);
|
||||
if(U_SUCCESS(localErrorCode) ){
|
||||
len =0;
|
||||
while(ures_hasNext(resD)){
|
||||
const UChar* name = ures_getNextString(resD,&len,NULL,&localErrorCode);
|
||||
if(U_SUCCESS(localErrorCode)){
|
||||
char cName[50] = {'\0'};
|
||||
u_UCharsToChars(name,cName,len);
|
||||
code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, cName);
|
||||
/* got the script code now fill in the buffer */
|
||||
if(numFilled<capacity){
|
||||
*(fillIn)++=code;
|
||||
numFilled++;
|
||||
}else{
|
||||
ures_close(resD);
|
||||
ures_close(resB);
|
||||
*err=U_BUFFER_OVERFLOW_ERROR;
|
||||
return len;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ures_close(resD);
|
||||
UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
|
||||
if(code!=USCRIPT_INVALID_CODE) {
|
||||
return setOneCode(code, fillIn, capacity, err);
|
||||
}
|
||||
ures_close(resB);
|
||||
code = USCRIPT_INVALID_CODE;
|
||||
triedCode = TRUE;
|
||||
}
|
||||
if(code==(UScriptCode)UCHAR_INVALID_CODE){
|
||||
/* still not found .. try long and abbreviated script names again */
|
||||
code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
|
||||
char likely[ULOC_FULLNAME_CAPACITY];
|
||||
UErrorCode internalErrorCode = U_ZERO_ERROR;
|
||||
int32_t length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
|
||||
if(U_FAILURE(*err) || length != 0) {
|
||||
return length;
|
||||
}
|
||||
if(code!=(UScriptCode)UCHAR_INVALID_CODE){
|
||||
/* we found it */
|
||||
if(numFilled<capacity){
|
||||
*(fillIn)++=code;
|
||||
numFilled++;
|
||||
}else{
|
||||
*err=U_BUFFER_OVERFLOW_ERROR;
|
||||
return len;
|
||||
(void)uloc_addLikelySubtags(nameOrAbbrOrLocale,
|
||||
likely, UPRV_LENGTHOF(likely), &internalErrorCode);
|
||||
if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
|
||||
length = getCodesFromLocale(likely, fillIn, capacity, err);
|
||||
if(U_FAILURE(*err) || length != 0) {
|
||||
return length;
|
||||
}
|
||||
}
|
||||
return numFilled;
|
||||
if(!triedCode) {
|
||||
/* still not found .. try long and abbreviated script names again */
|
||||
UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
|
||||
if(code!=USCRIPT_INVALID_CODE) {
|
||||
return setOneCode(code, fillIn, capacity, err);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -11,6 +11,38 @@
|
||||
#include "cucdapi.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
|
||||
int32_t i;
|
||||
if(length == 0) {
|
||||
strcpy(s, "(no scripts)");
|
||||
return;
|
||||
}
|
||||
s[0] = 0;
|
||||
for(i = 0; i < length; ++i) {
|
||||
if(i > 0) {
|
||||
strcat(s, " ");
|
||||
}
|
||||
strcat(s, uscript_getShortName(scripts[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static void assertEqualScripts(const char *msg,
|
||||
const UScriptCode scripts1[], int32_t length1,
|
||||
const UScriptCode scripts2[], int32_t length2,
|
||||
UErrorCode errorCode) {
|
||||
char s1[80];
|
||||
char s2[80];
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
scriptsToString(scripts1, length1, s1);
|
||||
scriptsToString(scripts2, length2, s2);
|
||||
if(0!=strcmp(s1, s2)) {
|
||||
log_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
|
||||
}
|
||||
}
|
||||
|
||||
void TestUScriptCodeAPI(){
|
||||
int i =0;
|
||||
int numErrors =0;
|
||||
@ -112,6 +144,50 @@ void TestUScriptCodeAPI(){
|
||||
}
|
||||
|
||||
}
|
||||
{
|
||||
static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
|
||||
static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
|
||||
static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
|
||||
static const UScriptCode HAN[1] = { USCRIPT_HAN };
|
||||
static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
|
||||
static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
|
||||
static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
|
||||
UScriptCode scripts[5];
|
||||
UErrorCode err;
|
||||
int32_t num;
|
||||
|
||||
// Should work regardless of whether we have locale data for the language.
|
||||
err = U_ZERO_ERROR;
|
||||
num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||
assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err); // Tajik
|
||||
err = U_ZERO_ERROR;
|
||||
num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||
assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err); // Sherpa
|
||||
|
||||
// Multi-script languages.
|
||||
err = U_ZERO_ERROR;
|
||||
num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||
assertEqualScripts("ja scripts: Kana Hira Hani",
|
||||
JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
|
||||
err = U_ZERO_ERROR;
|
||||
num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||
assertEqualScripts("ko scripts: Hang Hani",
|
||||
KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
|
||||
err = U_ZERO_ERROR;
|
||||
num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||
assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
|
||||
err = U_ZERO_ERROR;
|
||||
num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||
assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
|
||||
err = U_ZERO_ERROR;
|
||||
num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||
assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
|
||||
|
||||
// Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
|
||||
err = U_ZERO_ERROR;
|
||||
num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||
assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
|
||||
}
|
||||
|
||||
{
|
||||
UScriptCode testAbbr[]={
|
||||
|
Loading…
Reference in New Issue
Block a user