scuffed-code/icu4c/source/common/uscript.c
Markus Scherer 2a992add12 ICU-1721 get UScriptCode from uprops.dat
X-SVN-Rev: 7827
2002-03-01 01:59:59 +00:00

377 lines
12 KiB
C

/*
**********************************************************************
* Copyright (C) 1997-2001, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File USCRIPT.C
*
* Modification History:
*
* Date Name Description
* 07/06/2001 Ram Creation.
******************************************************************************
*/
#include "unicode/uscript.h"
#include "unicode/ures.h"
#include "cstring.h"
#define USCRIPT_CODE_ARRAY_SIZE 440
static const char kLocaleScript[] = "LocaleScript";
static const char * const scriptNames[]={
"ARABIC", /* USCRIPT_ARABIC */
"ARMENIAN", /* USCRIPT_ARMENIAN */
"BENGALI", /* USCRIPT_BENGALI */
"BOPOMOFO", /* USCRIPT_BOPOMOFO */
"CANADIAN-ABORIGINAL", /* USCRIPT_UCAS */
"CHEROKEE", /* USCRIPT_CHEROKEE */
"COMMON", /* USCRIPT_COMMON */
"CYRILLIC", /* USCRIPT_CYRILLIC */
"DESERET", /* USCRIPT_DESERET */
"DEVANAGARI", /* USCRIPT_DEVANAGARI */
"ETHIOPIC", /* USCRIPT_ETHIOPIC */
"GEORGIAN", /* USCRIPT_GEORGIAN */
"GOTHIC", /* USCRIPT_GOTHIC */
"GREEK", /* USCRIPT_GREEK */
"GUJARATI", /* USCRIPT_GUJARATI */
"GURMUKHI", /* USCRIPT_GURMUKHI */
"HAN", /* USCRIPT_HAN */
"HANGUL", /* USCRIPT_HANGUL */
"HEBREW", /* USCRIPT_HEBREW */
"HIRAGANA", /* USCRIPT_HIRAGANA */
"INHERITED", /* USCRIPT_INHERITED */
"KANNADA", /* USCRIPT_KANNADA */
"KATAKANA", /* USCRIPT_KATAKANA */
"KHMER", /* USCRIPT_KHMER */
"LATIN", /* USCRIPT_LATIN */
"MALAYALAM", /* USCRIPT_MALAYALAM */
"MONGOLIAN", /* USCRIPT_MONGOLIAN */
"MYANMAR", /* USCRIPT_MYANMAR */
"OGHAM", /* USCRIPT_OGHAM */
"OLD-ITALIC", /* USCRIPT_OLD_ITALIC */
"ORIYA", /* USCRIPT_ORIYA */
"RUNIC", /* USCRIPT_RUNIC */
"SINHALA", /* USCRIPT_SINHALA */
"SYRIAC", /* USCRIPT_SYRIAC */
"TAMIL", /* USCRIPT_TAMIL */
"TELUGU", /* USCRIPT_TELUGU */
"THAANA", /* USCRIPT_THANA */
"THAI", /* USCRIPT_THAI */
"TIBETAN", /* USCRIPT_TIBETAN */
"UCAS", /* USCRIPT_UCAS */
"YI" /* USCRIPT_YI */
};
static const char * const scriptAbbr[]= {
"Arab", /* USCRIPT_ARABIC */
"Armn", /* USCRIPT_ARMENIAN */
"Beng", /* USCRIPT_BENGALI */
"Bopo", /* USCRIPT_BOPOMOFO */
"Cans", /* USCRIPT_UCAS */
"Cher", /* USCRIPT_CHEROKEE */
"Cyrl", /* USCRIPT_CYRILLIC */
/* "Cyrs", */ /* USCRIPT_CYRILLIC */
"Deva", /* USCRIPT_DEVANAGARI */
"Dsrt", /* USCRIPT_DESERET */
"Ethi", /* USCRIPT_ETHIOPIC */
/* "Geoa", */ /* USCRIPT_GEORGIAN */
/* "Geon", */ /* USCRIPT_GEORGIAN */
"Geor", /* USCRIPT_GEORGIAN */
"Goth", /* USCRIPT_GOTHIC */
"Grek", /* USCRIPT_GREEK */
"Gujr", /* USCRIPT_GUJARATI */
"Guru", /* USCRIPT_GURMUKHI */
"Hang", /* USCRIPT_HANGUL */
"Hani", /* USCRIPT_HAN */
"Hebr", /* USCRIPT_HEBREW */
"Hira", /* USCRIPT_HIRAGANA */
"Ital", /* USCRIPT_OLD_ITALIC */
"Kana", /* USCRIPT_KATAKANA */
"Khmr", /* USCRIPT_KHMER */
"Knda", /* USCRIPT_KANNADA */
"Lao", /* USCRIPT_LAO */
/*"Laoo", */ /* USCRIPT_LAO */
/*"Latf", */ /* USCRIPT_LATIN */
/*"Latg", */ /* USCRIPT_LATIN */
"Latn", /* USCRIPT_LATIN */
"Mlym", /* USCRIPT_MALAYALAM */
"Mong", /* USCRIPT_MONGOLIAN */
"Mymr", /* USCRIPT_MYANMAR */
"Ogam", /* USCRIPT_OGHAM */
"Orya", /* USCRIPT_ORIYA */
"Qaac", /* USCRIPT_COPTIC */
"Qaai", /* USCRIPT_INHERITED */
"Runr", /* USCRIPT_RUNIC */
"Sinh", /* USCRIPT_SINHALA */
"Syrc", /* USCRIPT_SYRIAC */
/* "Syre", */ /* USCRIPT_SYRIAC */
/* "Syrj", */ /* USCRIPT_SYRIAC */
/* "Syrn", */ /* USCRIPT_SYRIAC */
"Taml", /* USCRIPT_TAMIL */
"Telu", /* USCRIPT_TELUGU */
"Thaa", /* USCRIPT_THANA */
"Thai", /* USCRIPT_THAI */
"Tibt", /* USCRIPT_TIBETAN */
"Yiii", /* USCRIPT_YI */
"Zyyy" /* USCRIPT_COMMON */
};
static const UScriptCode scriptNameCodes[]= {
USCRIPT_ARABIC ,
USCRIPT_ARMENIAN ,
USCRIPT_BENGALI ,
USCRIPT_BOPOMOFO ,
USCRIPT_UCAS ,
USCRIPT_CHEROKEE ,
USCRIPT_COMMON ,
USCRIPT_CYRILLIC ,
USCRIPT_DESERET ,
USCRIPT_DEVANAGARI ,
USCRIPT_ETHIOPIC ,
USCRIPT_GEORGIAN ,
USCRIPT_GOTHIC ,
USCRIPT_GREEK ,
USCRIPT_GUJARATI ,
USCRIPT_GURMUKHI ,
USCRIPT_HAN ,
USCRIPT_HANGUL ,
USCRIPT_HEBREW ,
USCRIPT_HIRAGANA ,
USCRIPT_INHERITED ,
USCRIPT_KANNADA ,
USCRIPT_KATAKANA ,
USCRIPT_KHMER ,
USCRIPT_LATIN ,
USCRIPT_MALAYALAM ,
USCRIPT_MONGOLIAN ,
USCRIPT_MYANMAR ,
USCRIPT_OGHAM ,
USCRIPT_OLD_ITALIC ,
USCRIPT_ORIYA ,
USCRIPT_RUNIC ,
USCRIPT_SINHALA ,
USCRIPT_SYRIAC ,
USCRIPT_TAMIL ,
USCRIPT_TELUGU ,
USCRIPT_THAANA ,
USCRIPT_THAI ,
USCRIPT_TIBETAN ,
USCRIPT_UCAS ,
USCRIPT_YI
};
static const UScriptCode scriptAbbrCodes[] = {
USCRIPT_ARABIC ,
USCRIPT_ARMENIAN ,
USCRIPT_BENGALI ,
USCRIPT_BOPOMOFO ,
USCRIPT_UCAS ,
USCRIPT_CHEROKEE ,
USCRIPT_CYRILLIC ,
/* USCRIPT_CYRILLIC , */
USCRIPT_DEVANAGARI ,
USCRIPT_DESERET ,
USCRIPT_ETHIOPIC ,
/* USCRIPT_GEORGIAN , */
/* USCRIPT_GEORGIAN , */
USCRIPT_GEORGIAN ,
USCRIPT_GOTHIC ,
USCRIPT_GREEK ,
USCRIPT_GUJARATI ,
USCRIPT_GURMUKHI ,
USCRIPT_HANGUL ,
USCRIPT_HAN ,
USCRIPT_HEBREW ,
USCRIPT_HIRAGANA ,
USCRIPT_OLD_ITALIC ,
USCRIPT_KATAKANA ,
USCRIPT_KHMER ,
USCRIPT_KANNADA ,
USCRIPT_LAO ,
/* USCRIPT_LAO , */
/* USCRIPT_LATIN , */
/* USCRIPT_LATIN , */
USCRIPT_LATIN ,
USCRIPT_MALAYALAM ,
USCRIPT_MONGOLIAN ,
USCRIPT_MYANMAR ,
USCRIPT_OGHAM ,
USCRIPT_ORIYA ,
USCRIPT_COPTIC ,
USCRIPT_INHERITED ,
USCRIPT_RUNIC ,
USCRIPT_SINHALA ,
USCRIPT_SYRIAC ,
/* USCRIPT_SYRIAC , */
/* USCRIPT_SYRIAC , */
/* USCRIPT_SYRIAC , */
USCRIPT_TAMIL ,
USCRIPT_TELUGU ,
USCRIPT_THAANA ,
USCRIPT_THAI ,
USCRIPT_TIBETAN ,
USCRIPT_YI ,
USCRIPT_COMMON
};
/* binary search the string array */
U_INLINE static int
findStringIndex(const char* const *sortedArr, const char* target, int size){
int left, middle, right,rc;
left =0;
right= size-1;
while(left <= right){
middle = (left+right)/2;
rc=uprv_stricmp(sortedArr[middle],target);
if(rc<0){
left = middle+1;
}else if(rc >0){
right = middle -1;
}else{
return middle;
}
}
return -1;
}
/*
static int
findCodeIndex(const UScriptCode sorted[], const UScriptCode target, int size){
int left, middle, right;
left =0;
right= size-1;
while(left <= right){
middle = (left+right)/2;
if(sorted[middle] < target){
left=middle+1;
}else if(sorted[middle]>target){
right=middle-1;
}else{
return middle;
}
}
return -1;
}
*/
/* linearly search the array and return the index */
U_INLINE static int
findCodeIndex(const UScriptCode unsorted[], const UScriptCode target, int size){
int i=0;
while(i<size){
if(target == unsorted[i]){
return i;
}
i++;
}
return -1;
}
U_CAPI int32_t U_EXPORT2
uscript_getCode(const char* nameOrAbbrOrLocale,
UScriptCode* fillIn,
int32_t capacity,
UErrorCode* err){
UScriptCode code = USCRIPT_INVALID_CODE;
int strIndex=0;
int32_t numFilled=0;
int32_t len=0;
/* check arguments */
if(U_FAILURE(*err)){
return numFilled;
}
/* try the Names array first */
strIndex = findStringIndex(scriptNames, nameOrAbbrOrLocale, sizeof(scriptNames)/sizeof(*scriptNames));
if(strIndex>=0){
code = (UScriptCode) scriptNameCodes[strIndex];
len = 1;
}
/* we did not find in names array so try abbr array*/
if(code ==USCRIPT_INVALID_CODE){
strIndex = findStringIndex(scriptAbbr, nameOrAbbrOrLocale, sizeof(scriptAbbr)/sizeof(*scriptAbbr));
if(strIndex>=0){
code = (UScriptCode) scriptAbbrCodes[strIndex];
len = 1;
}
}
/* we still haven't found it try locale */
if(code==USCRIPT_INVALID_CODE){
UResourceBundle* resB = ures_open(u_getDataDirectory(),nameOrAbbrOrLocale,err);
if(U_SUCCESS(*err)&& *err != U_USING_DEFAULT_ERROR){
UResourceBundle* resD = ures_getByKey(resB,kLocaleScript,NULL,err);
int index =0;
if(U_SUCCESS(*err) ){
len =0;
while(ures_hasNext(resD)){
const UChar* name = ures_getNextString(resD,&len,NULL,err);
if(U_SUCCESS(*err)){
char cName[50] = {'\0'};
u_UCharsToChars(name,cName,len);
index = findStringIndex(scriptAbbr, cName, sizeof(scriptAbbr)/sizeof(*scriptAbbr));
code = (UScriptCode) scriptAbbrCodes[index];
/* got the script code now fill in the buffer */
if(numFilled<=capacity){
*(fillIn)++=code;
numFilled++;
}else{
ures_close(resD);
ures_close(resB);
*err=U_BUFFER_OVERFLOW_ERROR;
return len;
}
}
}
}
ures_close(resD);
}
ures_close(resB);
}else{
/* we found it */
if(numFilled<=capacity){
*(fillIn)++=code;
numFilled++;
}else{
*err=U_BUFFER_OVERFLOW_ERROR;
return len;
}
}
return numFilled;
}
U_CAPI const char* U_EXPORT2
uscript_getName(UScriptCode scriptCode){
int index = -1;
if(scriptCode > USCRIPT_CODE_LIMIT){
return "";
}
index = findCodeIndex(scriptNameCodes,scriptCode,sizeof(scriptNameCodes)/sizeof(*scriptNameCodes));
if(index >=0){
return scriptNames[index];
}else{
return "";
}
}
U_CAPI const char* U_EXPORT2
uscript_getShortName(UScriptCode scriptCode){
int index = -1;
if(scriptCode > USCRIPT_CODE_LIMIT){
return "";
}
index = findCodeIndex(scriptAbbrCodes,scriptCode,sizeof(scriptAbbrCodes)/sizeof(*scriptAbbrCodes));
if(index >=0){
return scriptAbbr[index];
}else{
return "";
}
}