ICU-10751 uscript_getCode(locale) use likely subtags not LocaleScript locale data
X-SVN-Rev: 36280
This commit is contained in:
parent
c955c14a6b
commit
e353b8e897
@ -423,15 +423,16 @@ typedef enum UScriptCode {
|
|||||||
} UScriptCode;
|
} UScriptCode;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets script codes associated with the given locale or ISO 15924 abbreviation or name.
|
* Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
|
||||||
* Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
|
* Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
|
||||||
* Fills in USCRIPT_LATIN given "en" OR "en_US"
|
* Fills in USCRIPT_LATIN given "en" OR "en_US"
|
||||||
* If required capacity is greater than capacity of the destination buffer then the error code
|
* If the required capacity is greater than the capacity of the destination buffer,
|
||||||
* is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
|
* then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
|
||||||
*
|
*
|
||||||
* <p>Note: To search by short or long script alias only, use
|
* <p>Note: To search by short or long script alias only, use
|
||||||
* u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does
|
* u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does
|
||||||
* a fast lookup with no access of the locale data.
|
* a fast lookup with no access of the locale data.
|
||||||
|
*
|
||||||
* @param nameOrAbbrOrLocale name of the script, as given in
|
* @param nameOrAbbrOrLocale name of the script, as given in
|
||||||
* PropertyValueAliases.txt, or ISO 15924 code or locale
|
* PropertyValueAliases.txt, or ISO 15924 code or locale
|
||||||
* @param fillIn the UScriptCode buffer to fill in the script code
|
* @param fillIn the UScriptCode buffer to fill in the script code
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
* Copyright (C) 1997-2011, International Business Machines
|
* Copyright (C) 1997-2014, International Business Machines
|
||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
**********************************************************************
|
**********************************************************************
|
||||||
*
|
*
|
||||||
@ -13,85 +13,126 @@
|
|||||||
******************************************************************************
|
******************************************************************************
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "unicode/uscript.h"
|
|
||||||
#include "unicode/ures.h"
|
|
||||||
#include "unicode/uchar.h"
|
#include "unicode/uchar.h"
|
||||||
#include "unicode/putil.h"
|
#include "unicode/uscript.h"
|
||||||
#include "uprops.h"
|
#include "unicode/uloc.h"
|
||||||
#include "cmemory.h"
|
#include "cmemory.h"
|
||||||
#include "cstring.h"
|
#include "cstring.h"
|
||||||
|
|
||||||
static const char kLocaleScript[] = "LocaleScript";
|
static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
|
||||||
|
static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
|
||||||
|
static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
|
||||||
|
|
||||||
/* TODO: this is a bad API should be deprecated */
|
static int32_t
|
||||||
|
setCodes(const UScriptCode *src, int32_t length,
|
||||||
|
UScriptCode *dest, int32_t capacity, UErrorCode *err) {
|
||||||
|
int32_t i;
|
||||||
|
if(U_FAILURE(*err)) { return 0; }
|
||||||
|
if(length > capacity) {
|
||||||
|
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
for(i = 0; i < length; ++i) {
|
||||||
|
dest[i] = src[i];
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32_t
|
||||||
|
setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
|
||||||
|
if(U_FAILURE(*err)) { return 0; }
|
||||||
|
if(1 > capacity) {
|
||||||
|
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
scripts[0] = script;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32_t
|
||||||
|
getCodesFromLocale(const char *locale,
|
||||||
|
UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
|
||||||
|
UErrorCode internalErrorCode = U_ZERO_ERROR;
|
||||||
|
char lang[8];
|
||||||
|
char script[8];
|
||||||
|
int32_t langLength, scriptLength;
|
||||||
|
if(U_FAILURE(*err)) { return 0; }
|
||||||
|
// Multi-script languages, equivalent to the LocaleScript data
|
||||||
|
// that we used to load from locale resource bundles.
|
||||||
|
langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode);
|
||||||
|
if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if(0 == uprv_strcmp(lang, "ja")) {
|
||||||
|
return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
|
||||||
|
}
|
||||||
|
if(0 == uprv_strcmp(lang, "ko")) {
|
||||||
|
return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
|
||||||
|
}
|
||||||
|
scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode);
|
||||||
|
if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) {
|
||||||
|
return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
|
||||||
|
}
|
||||||
|
// Explicit script code.
|
||||||
|
if(scriptLength != 0) {
|
||||||
|
UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
|
||||||
|
if(scriptCode != USCRIPT_INVALID_CODE) {
|
||||||
|
if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
|
||||||
|
scriptCode = USCRIPT_HAN;
|
||||||
|
}
|
||||||
|
return setOneCode(scriptCode, scripts, capacity, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: this is a bad API and should be deprecated, ticket #11141 */
|
||||||
U_CAPI int32_t U_EXPORT2
|
U_CAPI int32_t U_EXPORT2
|
||||||
uscript_getCode(const char* nameOrAbbrOrLocale,
|
uscript_getCode(const char* nameOrAbbrOrLocale,
|
||||||
UScriptCode* fillIn,
|
UScriptCode* fillIn,
|
||||||
int32_t capacity,
|
int32_t capacity,
|
||||||
UErrorCode* err){
|
UErrorCode* err){
|
||||||
|
if(U_FAILURE(*err)) {
|
||||||
UScriptCode code = USCRIPT_INVALID_CODE;
|
return 0;
|
||||||
int32_t numFilled=0;
|
|
||||||
int32_t len=0;
|
|
||||||
/* check arguments */
|
|
||||||
if(err==NULL ||U_FAILURE(*err)){
|
|
||||||
return numFilled;
|
|
||||||
}
|
}
|
||||||
if(nameOrAbbrOrLocale==NULL || fillIn == NULL || capacity<0){
|
if(nameOrAbbrOrLocale==NULL ||
|
||||||
|
(fillIn == NULL ? capacity != 0 : capacity < 0)) {
|
||||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
return numFilled;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UBool triedCode = FALSE;
|
||||||
if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){
|
if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){
|
||||||
/* try long and abbreviated script names first */
|
/* try long and abbreviated script names first */
|
||||||
code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
|
UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
|
||||||
|
if(code!=USCRIPT_INVALID_CODE) {
|
||||||
}
|
return setOneCode(code, fillIn, capacity, err);
|
||||||
if(code==(UScriptCode)UCHAR_INVALID_CODE){
|
|
||||||
/* Do not propagate error codes from just not finding a locale bundle. */
|
|
||||||
UErrorCode localErrorCode = U_ZERO_ERROR;
|
|
||||||
UResourceBundle* resB = ures_open(NULL,nameOrAbbrOrLocale,&localErrorCode);
|
|
||||||
if(U_SUCCESS(localErrorCode)&& localErrorCode != U_USING_DEFAULT_WARNING){
|
|
||||||
UResourceBundle* resD = ures_getByKey(resB,kLocaleScript,NULL,&localErrorCode);
|
|
||||||
if(U_SUCCESS(localErrorCode) ){
|
|
||||||
len =0;
|
|
||||||
while(ures_hasNext(resD)){
|
|
||||||
const UChar* name = ures_getNextString(resD,&len,NULL,&localErrorCode);
|
|
||||||
if(U_SUCCESS(localErrorCode)){
|
|
||||||
char cName[50] = {'\0'};
|
|
||||||
u_UCharsToChars(name,cName,len);
|
|
||||||
code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, cName);
|
|
||||||
/* got the script code now fill in the buffer */
|
|
||||||
if(numFilled<capacity){
|
|
||||||
*(fillIn)++=code;
|
|
||||||
numFilled++;
|
|
||||||
}else{
|
|
||||||
ures_close(resD);
|
|
||||||
ures_close(resB);
|
|
||||||
*err=U_BUFFER_OVERFLOW_ERROR;
|
|
||||||
return len;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ures_close(resD);
|
|
||||||
}
|
}
|
||||||
ures_close(resB);
|
triedCode = TRUE;
|
||||||
code = USCRIPT_INVALID_CODE;
|
|
||||||
}
|
}
|
||||||
if(code==(UScriptCode)UCHAR_INVALID_CODE){
|
char likely[ULOC_FULLNAME_CAPACITY];
|
||||||
/* still not found .. try long and abbreviated script names again */
|
UErrorCode internalErrorCode = U_ZERO_ERROR;
|
||||||
code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
|
int32_t length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
|
||||||
|
if(U_FAILURE(*err) || length != 0) {
|
||||||
|
return length;
|
||||||
}
|
}
|
||||||
if(code!=(UScriptCode)UCHAR_INVALID_CODE){
|
(void)uloc_addLikelySubtags(nameOrAbbrOrLocale,
|
||||||
/* we found it */
|
likely, UPRV_LENGTHOF(likely), &internalErrorCode);
|
||||||
if(numFilled<capacity){
|
if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
|
||||||
*(fillIn)++=code;
|
length = getCodesFromLocale(likely, fillIn, capacity, err);
|
||||||
numFilled++;
|
if(U_FAILURE(*err) || length != 0) {
|
||||||
}else{
|
return length;
|
||||||
*err=U_BUFFER_OVERFLOW_ERROR;
|
|
||||||
return len;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return numFilled;
|
if(!triedCode) {
|
||||||
|
/* still not found .. try long and abbreviated script names again */
|
||||||
|
UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
|
||||||
|
if(code!=USCRIPT_INVALID_CODE) {
|
||||||
|
return setOneCode(code, fillIn, capacity, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -11,6 +11,38 @@
|
|||||||
#include "cucdapi.h"
|
#include "cucdapi.h"
|
||||||
#include "cmemory.h"
|
#include "cmemory.h"
|
||||||
|
|
||||||
|
static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
|
||||||
|
int32_t i;
|
||||||
|
if(length == 0) {
|
||||||
|
strcpy(s, "(no scripts)");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
s[0] = 0;
|
||||||
|
for(i = 0; i < length; ++i) {
|
||||||
|
if(i > 0) {
|
||||||
|
strcat(s, " ");
|
||||||
|
}
|
||||||
|
strcat(s, uscript_getShortName(scripts[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void assertEqualScripts(const char *msg,
|
||||||
|
const UScriptCode scripts1[], int32_t length1,
|
||||||
|
const UScriptCode scripts2[], int32_t length2,
|
||||||
|
UErrorCode errorCode) {
|
||||||
|
char s1[80];
|
||||||
|
char s2[80];
|
||||||
|
if(U_FAILURE(errorCode)) {
|
||||||
|
log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
scriptsToString(scripts1, length1, s1);
|
||||||
|
scriptsToString(scripts2, length2, s2);
|
||||||
|
if(0!=strcmp(s1, s2)) {
|
||||||
|
log_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TestUScriptCodeAPI(){
|
void TestUScriptCodeAPI(){
|
||||||
int i =0;
|
int i =0;
|
||||||
int numErrors =0;
|
int numErrors =0;
|
||||||
@ -112,6 +144,50 @@ void TestUScriptCodeAPI(){
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
|
||||||
|
static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
|
||||||
|
static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
|
||||||
|
static const UScriptCode HAN[1] = { USCRIPT_HAN };
|
||||||
|
static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
|
||||||
|
static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
|
||||||
|
static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
|
||||||
|
UScriptCode scripts[5];
|
||||||
|
UErrorCode err;
|
||||||
|
int32_t num;
|
||||||
|
|
||||||
|
// Should work regardless of whether we have locale data for the language.
|
||||||
|
err = U_ZERO_ERROR;
|
||||||
|
num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||||
|
assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err); // Tajik
|
||||||
|
err = U_ZERO_ERROR;
|
||||||
|
num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||||
|
assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err); // Sherpa
|
||||||
|
|
||||||
|
// Multi-script languages.
|
||||||
|
err = U_ZERO_ERROR;
|
||||||
|
num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||||
|
assertEqualScripts("ja scripts: Kana Hira Hani",
|
||||||
|
JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
|
||||||
|
err = U_ZERO_ERROR;
|
||||||
|
num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||||
|
assertEqualScripts("ko scripts: Hang Hani",
|
||||||
|
KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
|
||||||
|
err = U_ZERO_ERROR;
|
||||||
|
num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||||
|
assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
|
||||||
|
err = U_ZERO_ERROR;
|
||||||
|
num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||||
|
assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
|
||||||
|
err = U_ZERO_ERROR;
|
||||||
|
num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||||
|
assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
|
||||||
|
|
||||||
|
// Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
|
||||||
|
err = U_ZERO_ERROR;
|
||||||
|
num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
|
||||||
|
assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
UScriptCode testAbbr[]={
|
UScriptCode testAbbr[]={
|
||||||
|
Loading…
Reference in New Issue
Block a user