2001-07-10 18:33:40 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
|
|
|
* Copyright (C) 1997-2001, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
**********************************************************************
|
|
|
|
*
|
|
|
|
* File USCRIPT.C
|
|
|
|
*
|
|
|
|
* Modification History:
|
|
|
|
*
|
|
|
|
* Date Name Description
|
|
|
|
* 07/06/2001 Ram Creation.
|
|
|
|
******************************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "unicode/uscript.h"
|
|
|
|
|
|
|
|
#define U_SCRIPT_NAMES_ARRAY_SIZE 38
|
|
|
|
#define U_SCRIPT_ABBR_ARRAY_SIZE 41
|
|
|
|
|
2001-08-24 00:22:07 +00:00
|
|
|
static const char * const scriptNames[]={
|
2001-07-10 18:33:40 +00:00
|
|
|
"ARABIC", /* U_ARABIC */
|
|
|
|
"ARMENIAN", /* U_ARMENIAN */
|
|
|
|
"BENGALI", /* U_BENGALI */
|
|
|
|
"BOPOMOFO", /* U_BOPOMOFO */
|
|
|
|
"CHEROKEE", /* U_CHEROKEE */
|
|
|
|
"CYRILLIC", /* U_CYRILLIC */
|
|
|
|
"DESERET", /* U_DESERET */
|
|
|
|
"DEVANAGARI", /* U_DEVANAGARI */
|
|
|
|
"ETHIOPIC", /* U_ETHIOPIC */
|
|
|
|
"GEORGIAN", /* U_GEORGIAN */
|
|
|
|
"GOTHIC", /* U_GOTHIC */
|
|
|
|
"GREEK", /* U_GREEK */
|
|
|
|
"GUJARATI", /* U_GUJARATI */
|
|
|
|
"GURMUKHI", /* U_GURMUKHI */
|
|
|
|
"HAN", /* U_HAN */
|
|
|
|
"HANGUL", /* U_HANGUL */
|
|
|
|
"HEBREW", /* U_HEBREW */
|
|
|
|
"HIRAGANA", /* U_HIRAGANA */
|
|
|
|
"INHERITED", /* U_INHERITED */
|
|
|
|
"KANNADA", /* U_KANNADA */
|
|
|
|
"KATAKANA", /* U_KATAKANA */
|
|
|
|
"KHMER", /* U_KHMER */
|
|
|
|
"LATIN", /* U_LATIN */
|
|
|
|
"MALAYALAM", /* U_MALAYALAM */
|
|
|
|
"MONGOLIAN", /* U_MONGOLIAN */
|
|
|
|
"MYANMAR", /* U_MYANMAR */
|
|
|
|
"OGHAM", /* U_OGHAM */
|
|
|
|
"OLD_ITALIC", /* U_OLD_ITALIC */
|
|
|
|
"ORIYA", /* U_ORIYA */
|
|
|
|
"RUNIC", /* U_RUNIC */
|
|
|
|
"SINHALA", /* U_SINHALA */
|
|
|
|
"SYRIAC", /* U_SYRIAC */
|
|
|
|
"TAMIL", /* U_TAMIL */
|
|
|
|
"TELUGU", /* U_TELUGU */
|
|
|
|
"THAANA", /* U_THANA */
|
|
|
|
"THAI", /* U_THAI */
|
|
|
|
"TIBETAN", /* U_TIBETAN */
|
|
|
|
"UCAS", /* U_UCAS */
|
2001-07-11 23:50:24 +00:00
|
|
|
|
2001-07-10 18:33:40 +00:00
|
|
|
};
|
|
|
|
|
2001-08-24 00:22:07 +00:00
|
|
|
static const char * const scriptAbbr[]= {
|
2001-07-10 18:33:40 +00:00
|
|
|
"Arab", /* U_ARABIC */
|
|
|
|
"Armn", /* U_ARMENIAN */
|
|
|
|
"Beng", /* U_BENGALI */
|
|
|
|
"Bopo", /* U_BOPOMOFO */
|
|
|
|
"Cans", /* U_UCAS */
|
|
|
|
"Cher", /* U_CHEROKEE */
|
|
|
|
"Cyrl", /* U_CYRILLIC */
|
2001-08-17 00:24:58 +00:00
|
|
|
/* "Cyrs", */ /* U_CYRILLIC */
|
2001-07-10 18:33:40 +00:00
|
|
|
"Deva", /* U_DEVANAGARI */
|
|
|
|
"Dsrt", /* U_DESERET */
|
|
|
|
"Ethi", /* U_ETHIOPIC */
|
2001-08-17 00:24:58 +00:00
|
|
|
/* "Geoa", */ /* U_GEORGIAN */
|
|
|
|
/* "Geon", */ /* U_GEORGIAN */
|
2001-07-10 18:33:40 +00:00
|
|
|
"Geor", /* U_GEORGIAN */
|
|
|
|
"Goth", /* U_GOTHIC */
|
|
|
|
"Grek", /* U_GREEK */
|
|
|
|
"Gujr", /* U_GUJARATI */
|
|
|
|
"Guru", /* U_GURMUKHI */
|
|
|
|
"Hang", /* U_HANGUL */
|
|
|
|
"Hani", /* U_HAN */
|
|
|
|
"Hebr", /* U_HEBREW */
|
|
|
|
"Hira", /* U_HIRAGANA */
|
|
|
|
"Ital", /* U_OLD_ITALIC */
|
|
|
|
"Kana", /* U_KATAKANA */
|
|
|
|
"Khmr", /* U_KHMER */
|
|
|
|
"Knda", /* U_KANNADA */
|
|
|
|
"Lao", /* U_LAO */
|
2001-08-17 00:24:58 +00:00
|
|
|
/*"Laoo", */ /* U_LAO */
|
|
|
|
/*"Latf", */ /* U_LATIN */
|
|
|
|
/*"Latg", */ /* U_LATIN */
|
2001-07-10 18:33:40 +00:00
|
|
|
"Latn", /* U_LATIN */
|
|
|
|
"Mlym", /* U_MALAYALAM */
|
|
|
|
"Mong", /* U_MONGOLIAN */
|
|
|
|
"Mymr", /* U_MYANMAR */
|
|
|
|
"Ogam", /* U_OGHAM */
|
|
|
|
"Orya", /* U_ORIYA */
|
|
|
|
"Qaac", /* U_COPTIC */
|
|
|
|
"Qaai", /* U_INHERITED */
|
|
|
|
"Runr", /* U_RUNIC */
|
|
|
|
"Sinh", /* U_SINHALA */
|
|
|
|
"Syrc", /* U_SYRIAC */
|
2001-08-17 00:24:58 +00:00
|
|
|
/* "Syre", */ /* U_SYRIAC */
|
|
|
|
/* "Syrj", */ /* U_SYRIAC */
|
|
|
|
/* "Syrn", */ /* U_SYRIAC */
|
2001-07-10 18:33:40 +00:00
|
|
|
"Taml", /* U_TAMIL */
|
|
|
|
"Telu", /* U_TELUGU */
|
|
|
|
"Thaa", /* U_THANA */
|
|
|
|
"Thai", /* U_THAI */
|
|
|
|
"Tibt", /* U_TIBETAN */
|
|
|
|
"Yiii", /* U_YI */
|
|
|
|
"Zyyy", /* U_COMMON */
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
static const UScriptCode scriptNameCodes[]= {
|
|
|
|
U_ARABIC ,
|
|
|
|
U_ARMENIAN ,
|
|
|
|
U_BENGALI ,
|
|
|
|
U_BOPOMOFO ,
|
|
|
|
U_CHEROKEE ,
|
|
|
|
U_CYRILLIC ,
|
|
|
|
U_DESERET ,
|
|
|
|
U_DEVANAGARI ,
|
|
|
|
U_ETHIOPIC ,
|
|
|
|
U_GEORGIAN ,
|
|
|
|
U_GOTHIC ,
|
|
|
|
U_GREEK ,
|
|
|
|
U_GUJARATI ,
|
|
|
|
U_GURMUKHI ,
|
|
|
|
U_HAN ,
|
|
|
|
U_HANGUL ,
|
|
|
|
U_HEBREW ,
|
|
|
|
U_HIRAGANA ,
|
|
|
|
U_INHERITED ,
|
|
|
|
U_KANNADA ,
|
|
|
|
U_KATAKANA ,
|
|
|
|
U_KHMER ,
|
|
|
|
U_LATIN ,
|
|
|
|
U_MALAYALAM ,
|
|
|
|
U_MONGOLIAN ,
|
|
|
|
U_MYANMAR ,
|
|
|
|
U_OGHAM ,
|
|
|
|
U_OLD_ITALIC ,
|
|
|
|
U_ORIYA ,
|
|
|
|
U_RUNIC ,
|
|
|
|
U_SINHALA ,
|
|
|
|
U_SYRIAC ,
|
|
|
|
U_TAMIL ,
|
|
|
|
U_TELUGU ,
|
|
|
|
U_THAANA ,
|
|
|
|
U_THAI ,
|
|
|
|
U_TIBETAN ,
|
|
|
|
U_UCAS ,
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
static const UScriptCode scriptAbbrCodes[] = {
|
|
|
|
U_ARABIC ,
|
|
|
|
U_ARMENIAN ,
|
|
|
|
U_BENGALI ,
|
|
|
|
U_BOPOMOFO ,
|
|
|
|
U_UCAS ,
|
|
|
|
U_CHEROKEE ,
|
|
|
|
U_CYRILLIC ,
|
|
|
|
/* U_CYRILLIC ,*/
|
|
|
|
U_DEVANAGARI ,
|
|
|
|
U_DESERET ,
|
|
|
|
U_ETHIOPIC ,
|
|
|
|
/* U_GEORGIAN , */
|
|
|
|
/* U_GEORGIAN , */
|
|
|
|
U_GEORGIAN ,
|
|
|
|
U_GOTHIC ,
|
|
|
|
U_GREEK ,
|
|
|
|
U_GUJARATI ,
|
|
|
|
U_GURMUKHI ,
|
|
|
|
U_HANGUL ,
|
|
|
|
U_HAN ,
|
|
|
|
U_HEBREW ,
|
|
|
|
U_HIRAGANA ,
|
|
|
|
U_OLD_ITALIC ,
|
|
|
|
U_KATAKANA ,
|
|
|
|
U_KHMER ,
|
|
|
|
U_KANNADA ,
|
|
|
|
U_LAO ,
|
|
|
|
/* U_LAO , */
|
|
|
|
/* U_LATIN , */
|
|
|
|
/* U_LATIN , */
|
|
|
|
U_LATIN ,
|
|
|
|
U_MALAYALAM ,
|
|
|
|
U_MONGOLIAN ,
|
|
|
|
U_MYANMAR ,
|
|
|
|
U_OGHAM ,
|
|
|
|
U_ORIYA ,
|
|
|
|
U_COPTIC ,
|
|
|
|
U_INHERITED ,
|
|
|
|
U_RUNIC ,
|
|
|
|
U_SINHALA ,
|
|
|
|
U_SYRIAC ,
|
|
|
|
/* U_SYRIAC , */
|
|
|
|
/* U_SYRIAC , */
|
|
|
|
U_SYRIAC ,
|
|
|
|
U_TAMIL ,
|
|
|
|
U_TELUGU ,
|
|
|
|
U_THAANA ,
|
|
|
|
U_THAI ,
|
|
|
|
U_TIBETAN ,
|
|
|
|
U_YI ,
|
|
|
|
U_COMMON ,
|
|
|
|
};
|
2001-08-24 00:22:07 +00:00
|
|
|
static const char * const scriptCodeName[]={
|
2001-07-11 23:50:24 +00:00
|
|
|
"U_INVALID_CODE",
|
|
|
|
"U_COMMON",
|
|
|
|
"U_INHERITED",
|
|
|
|
"U_ARABIC",
|
|
|
|
"U_ARMENIAN",
|
|
|
|
"U_BENGALI",
|
|
|
|
"U_BOPOMOFO",
|
|
|
|
"U_CHEROKEE",
|
|
|
|
"U_COPTIC",
|
|
|
|
"U_CYRILLIC",
|
|
|
|
"U_DESERET",
|
|
|
|
"U_DEVANAGARI",
|
|
|
|
"U_ETHIOPIC",
|
|
|
|
"U_GEORGIAN",
|
|
|
|
"U_GOTHIC",
|
|
|
|
"U_GREEK",
|
|
|
|
"U_GUJARATI",
|
|
|
|
"U_GURMUKHI",
|
|
|
|
"U_HAN",
|
|
|
|
"U_HANGUL",
|
|
|
|
"U_HEBREW",
|
|
|
|
"U_HIRAGANA",
|
|
|
|
"U_KANNADA",
|
|
|
|
"U_KATAKANA",
|
|
|
|
"U_KHMER",
|
|
|
|
"U_LAO",
|
|
|
|
"U_LATIN",
|
|
|
|
"U_MALAYALAM",
|
|
|
|
"U_MONGOLIAN",
|
|
|
|
"U_MYANMAR",
|
|
|
|
"U_OGHAM",
|
|
|
|
"U_OLD_ITALIC",
|
|
|
|
"U_ORIYA",
|
|
|
|
"U_RUNIC",
|
|
|
|
"U_SINHALA",
|
|
|
|
"U_SYRIAC",
|
|
|
|
"U_TAMIL",
|
|
|
|
"U_TELUGU",
|
|
|
|
"U_THAANA",
|
|
|
|
"U_THAI",
|
|
|
|
"U_TIBETAN",
|
|
|
|
"U_UCAS",
|
|
|
|
"U_YI",
|
|
|
|
"U_SCRIPT_CODE_LIMIT",
|
|
|
|
};
|
2001-07-10 18:33:40 +00:00
|
|
|
|
|
|
|
/* binary search the string array */
|
|
|
|
static int
|
2001-08-24 00:22:07 +00:00
|
|
|
findStringIndex(const char* const *sortedArr, const char* target, int size){
|
2001-07-11 23:50:24 +00:00
|
|
|
int left, middle, right,rc;
|
2001-07-10 18:33:40 +00:00
|
|
|
left =0;
|
|
|
|
right= size-1;
|
2001-07-11 23:50:24 +00:00
|
|
|
|
2001-07-10 18:33:40 +00:00
|
|
|
while(left <= right){
|
|
|
|
middle = (left+right)/2;
|
2001-07-11 23:50:24 +00:00
|
|
|
rc=uprv_stricmp(sortedArr[middle],target);
|
|
|
|
if(rc<0){
|
2001-07-10 18:33:40 +00:00
|
|
|
left = middle+1;
|
2001-07-11 23:50:24 +00:00
|
|
|
}else if(rc >0){
|
2001-07-10 18:33:40 +00:00
|
|
|
right = middle -1;
|
2001-07-11 23:50:24 +00:00
|
|
|
}else{
|
2001-07-10 18:33:40 +00:00
|
|
|
return middle;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
static int
|
|
|
|
findCodeIndex(const UScriptCode sorted[], const UScriptCode target, int size){
|
|
|
|
int left, middle, right;
|
|
|
|
left =0;
|
|
|
|
right= size-1;
|
|
|
|
while(left <= right){
|
|
|
|
middle = (left+right)/2;
|
|
|
|
if(sorted[middle] < target){
|
|
|
|
left=middle+1;
|
|
|
|
}else if(sorted[middle]>target){
|
|
|
|
right=middle-1;
|
|
|
|
}else{
|
|
|
|
return middle;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
/* linearly search the array and return the index */
|
|
|
|
static int
|
|
|
|
findCodeIndex(const UScriptCode unsorted[], const UScriptCode target, int size){
|
|
|
|
int i=0;
|
|
|
|
while(i<size){
|
|
|
|
if(target == unsorted[i]){
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI UScriptCode
|
|
|
|
uchar_getScriptCode(const char* nameOrAbbrOrLocale, UErrorCode* err){
|
2001-08-02 16:44:21 +00:00
|
|
|
UScriptCode code = U_INVALID_SCRIPT_CODE;
|
2001-08-17 00:24:58 +00:00
|
|
|
int strIndex=0;
|
2001-07-10 18:33:40 +00:00
|
|
|
|
|
|
|
/* check arguments */
|
|
|
|
if(U_FAILURE(*err)){
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
/* try the Names array first */
|
2001-08-17 00:24:58 +00:00
|
|
|
strIndex = findStringIndex(scriptNames, nameOrAbbrOrLocale, U_SCRIPT_NAMES_ARRAY_SIZE);
|
2001-07-10 18:33:40 +00:00
|
|
|
|
2001-08-17 00:24:58 +00:00
|
|
|
if(strIndex>=0 && strIndex < U_SCRIPT_NAMES_ARRAY_SIZE){
|
|
|
|
code = (UScriptCode) scriptNameCodes[strIndex];
|
2001-07-10 18:33:40 +00:00
|
|
|
}
|
|
|
|
/* we did not find in names array so try abbr array*/
|
2001-08-02 16:44:21 +00:00
|
|
|
if(code ==U_INVALID_SCRIPT_CODE){
|
2001-08-17 00:24:58 +00:00
|
|
|
strIndex = findStringIndex(scriptAbbr, nameOrAbbrOrLocale, U_SCRIPT_ABBR_ARRAY_SIZE);
|
|
|
|
if(strIndex>=0 && strIndex < U_SCRIPT_NAMES_ARRAY_SIZE){
|
|
|
|
code = (UScriptCode) scriptAbbrCodes[strIndex];
|
2001-07-10 18:33:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
/* we still haven't found it try locale */
|
2001-08-02 16:44:21 +00:00
|
|
|
if(code==U_INVALID_SCRIPT_CODE){
|
2001-07-10 18:33:40 +00:00
|
|
|
UResourceBundle* resB = ures_open(u_getDataDirectory(),nameOrAbbrOrLocale,err);
|
2001-08-02 16:44:21 +00:00
|
|
|
if(U_SUCCESS(*err)&& *err != U_USING_DEFAULT_ERROR){
|
2001-07-10 18:33:40 +00:00
|
|
|
int32_t len=0;
|
|
|
|
UResourceBundle* resD = ures_getByKey(resB,"LocaleScript",NULL,err);
|
|
|
|
int index =0;
|
|
|
|
const UChar* name = ures_getStringByIndex(resD,0,&len,err);
|
2001-08-02 16:44:21 +00:00
|
|
|
if(U_SUCCESS(*err) ){
|
2001-07-10 18:33:40 +00:00
|
|
|
char cName[50] = {'\0'};
|
|
|
|
u_UCharsToChars(name,cName,len);
|
|
|
|
index = findStringIndex(scriptNames, cName, U_SCRIPT_NAMES_ARRAY_SIZE);
|
|
|
|
code = (UScriptCode) scriptNameCodes[index];
|
|
|
|
}
|
|
|
|
ures_close(resD);
|
|
|
|
|
|
|
|
}
|
|
|
|
ures_close(resB);
|
|
|
|
}
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI const char*
|
|
|
|
uchar_getScriptName(UScriptCode scriptCode){
|
|
|
|
int index = -1;
|
|
|
|
if(scriptCode > U_SCRIPT_CODE_LIMIT){
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
index = findCodeIndex(scriptNameCodes,scriptCode,U_SCRIPT_NAMES_ARRAY_SIZE);
|
|
|
|
if(index >=0){
|
|
|
|
return scriptNames[index];
|
|
|
|
}else{
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
U_CAPI const char*
|
|
|
|
uchar_getScriptAbbr(UScriptCode scriptCode){
|
|
|
|
int index = -1;
|
|
|
|
if(scriptCode > U_SCRIPT_CODE_LIMIT){
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
index = findCodeIndex(scriptAbbrCodes,scriptCode,U_SCRIPT_ABBR_ARRAY_SIZE);
|
|
|
|
if(index >=0){
|
|
|
|
return scriptAbbr[index];
|
|
|
|
}else{
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI const char*
|
|
|
|
uchar_scriptCodeName(UScriptCode code){
|
|
|
|
if(code>=0 && code<U_SCRIPT_CODE_LIMIT) {
|
|
|
|
return scriptCodeName[code+1];
|
|
|
|
} else{
|
2001-08-02 16:44:21 +00:00
|
|
|
return scriptCodeName[U_INVALID_SCRIPT_CODE+1];
|
2001-07-10 18:33:40 +00:00
|
|
|
}
|
|
|
|
}
|