2017-01-20 00:20:31 +00:00
// © 2016 and later: Unicode, Inc. and others.
2016-06-15 18:58:17 +00:00
// License & terms of use: http://www.unicode.org/copyright.html
2003-05-15 16:28:58 +00:00
/********************************************************************
2016-05-31 21:45:07 +00:00
* Copyright ( c ) 1997 - 2016 , International Business Machines
* Corporation and others . All Rights Reserved .
2003-05-15 16:28:58 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include <string.h>
# include "unicode/utypes.h"
# include "unicode/uscript.h"
# include "unicode/uchar.h"
# include "cintltst.h"
2003-12-05 22:48:48 +00:00
# include "cucdapi.h"
2014-08-28 14:55:34 +00:00
# include "cmemory.h"
2006-02-09 01:28:31 +00:00
2014-08-28 23:51:38 +00:00
static void scriptsToString ( const UScriptCode scripts [ ] , int32_t length , char s [ ] ) {
int32_t i ;
if ( length = = 0 ) {
strcpy ( s , " (no scripts) " ) ;
return ;
}
s [ 0 ] = 0 ;
for ( i = 0 ; i < length ; + + i ) {
if ( i > 0 ) {
strcat ( s , " " ) ;
}
strcat ( s , uscript_getShortName ( scripts [ i ] ) ) ;
}
}
static void assertEqualScripts ( const char * msg ,
const UScriptCode scripts1 [ ] , int32_t length1 ,
const UScriptCode scripts2 [ ] , int32_t length2 ,
UErrorCode errorCode ) {
char s1 [ 80 ] ;
char s2 [ 80 ] ;
if ( U_FAILURE ( errorCode ) ) {
log_err ( " Failed: %s - %s \n " , msg , u_errorName ( errorCode ) ) ;
return ;
}
scriptsToString ( scripts1 , length1 , s1 ) ;
scriptsToString ( scripts2 , length2 , s2 ) ;
if ( 0 ! = strcmp ( s1 , s2 ) ) {
2014-09-15 21:51:57 +00:00
log_data_err ( " Failed: %s: expected %s but got %s \n " , msg , s1 , s2 ) ;
2014-08-28 23:51:38 +00:00
}
}
2003-05-15 16:28:58 +00:00
void TestUScriptCodeAPI ( ) {
int i = 0 ;
int numErrors = 0 ;
{
const char * testNames [ ] = {
/* test locale */
" en " , " en_US " , " sr " , " ta " , " te_IN " ,
" hi " , " he " , " ar " ,
/* test abbr */
" Hani " , " Hang " , " Hebr " , " Hira " ,
" Knda " , " Kana " , " Khmr " , " Lao " ,
" Latn " , /*"Latf","Latg",*/
" Mlym " , " Mong " ,
/* test names */
" CYRILLIC " , " DESERET " , " DEVANAGARI " , " ETHIOPIC " , " GEORGIAN " ,
" GOTHIC " , " GREEK " , " GUJARATI " , " COMMON " , " INHERITED " ,
/* test lower case names */
" malayalam " , " mongolian " , " myanmar " , " ogham " , " old-italic " ,
" oriya " , " runic " , " sinhala " , " syriac " , " tamil " ,
" telugu " , " thaana " , " thai " , " tibetan " ,
/* test the bounds*/
" tagb " , " arabic " ,
/* test bogus */
" asfdasd " , " 5464 " , " 12235 " ,
/* test the last index */
" zyyy " , " YI " ,
2014-01-10 02:24:54 +00:00
NULL
2003-05-15 16:28:58 +00:00
} ;
UScriptCode expected [ ] = {
/* locales should return */
USCRIPT_LATIN , USCRIPT_LATIN , USCRIPT_CYRILLIC , USCRIPT_TAMIL , USCRIPT_TELUGU ,
USCRIPT_DEVANAGARI , USCRIPT_HEBREW , USCRIPT_ARABIC ,
/* abbr should return */
USCRIPT_HAN , USCRIPT_HANGUL , USCRIPT_HEBREW , USCRIPT_HIRAGANA ,
USCRIPT_KANNADA , USCRIPT_KATAKANA , USCRIPT_KHMER , USCRIPT_LAO ,
USCRIPT_LATIN , /* USCRIPT_LATIN, USCRIPT_LATIN,*/
USCRIPT_MALAYALAM , USCRIPT_MONGOLIAN ,
/* names should return */
USCRIPT_CYRILLIC , USCRIPT_DESERET , USCRIPT_DEVANAGARI , USCRIPT_ETHIOPIC , USCRIPT_GEORGIAN ,
USCRIPT_GOTHIC , USCRIPT_GREEK , USCRIPT_GUJARATI , USCRIPT_COMMON , USCRIPT_INHERITED ,
/* lower case names should return */
USCRIPT_MALAYALAM , USCRIPT_MONGOLIAN , USCRIPT_MYANMAR , USCRIPT_OGHAM , USCRIPT_OLD_ITALIC ,
USCRIPT_ORIYA , USCRIPT_RUNIC , USCRIPT_SINHALA , USCRIPT_SYRIAC , USCRIPT_TAMIL ,
USCRIPT_TELUGU , USCRIPT_THAANA , USCRIPT_THAI , USCRIPT_TIBETAN ,
/* bounds */
USCRIPT_TAGBANWA , USCRIPT_ARABIC ,
/* bogus names should return invalid code */
USCRIPT_INVALID_CODE , USCRIPT_INVALID_CODE , USCRIPT_INVALID_CODE ,
USCRIPT_COMMON , USCRIPT_YI ,
} ;
UErrorCode err = U_ZERO_ERROR ;
const int32_t capacity = 10 ;
2014-01-10 02:24:54 +00:00
for ( ; testNames [ i ] ! = NULL ; i + + ) {
2003-05-15 16:28:58 +00:00
UScriptCode script [ 10 ] = { USCRIPT_INVALID_CODE } ;
uscript_getCode ( testNames [ i ] , script , capacity , & err ) ;
if ( script [ 0 ] ! = expected [ i ] ) {
2009-06-09 21:28:13 +00:00
log_data_err ( " Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?) \n " ,
2003-05-15 16:28:58 +00:00
script [ 0 ] , expected [ i ] , testNames [ i ] ) ;
numErrors + + ;
}
}
if ( numErrors > 0 ) {
log_data_err ( " Errors uchar_getScriptCode() : %i \n " , numErrors ) ;
}
}
{
UErrorCode err = U_ZERO_ERROR ;
int32_t capacity = 0 ;
2007-07-25 18:46:17 +00:00
int32_t j ;
2005-05-19 20:28:20 +00:00
UScriptCode jaCode [ ] = { USCRIPT_KATAKANA , USCRIPT_HIRAGANA , USCRIPT_HAN } ;
2003-05-15 16:28:58 +00:00
UScriptCode script [ 10 ] = { USCRIPT_INVALID_CODE } ;
int32_t num = uscript_getCode ( " ja " , script , capacity , & err ) ;
/* preflight */
if ( err = = U_BUFFER_OVERFLOW_ERROR ) {
err = U_ZERO_ERROR ;
capacity = 10 ;
num = uscript_getCode ( " ja " , script , capacity , & err ) ;
2016-02-24 21:48:56 +00:00
if ( num ! = UPRV_LENGTHOF ( jaCode ) ) {
2007-07-25 18:46:17 +00:00
log_err ( " Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n " ,
2016-02-24 21:48:56 +00:00
num , UPRV_LENGTHOF ( jaCode ) ) ;
2007-07-25 18:46:17 +00:00
}
2016-02-24 21:48:56 +00:00
for ( j = 0 ; j < UPRV_LENGTHOF ( jaCode ) ; j + + ) {
2007-07-25 18:46:17 +00:00
if ( script [ j ] ! = jaCode [ j ] ) {
log_err ( " Japanese locale: code #%d was %d (%s) but expected %d (%s) \n " , j ,
script [ j ] , uscript_getName ( script [ j ] ) ,
jaCode [ j ] , uscript_getName ( jaCode [ j ] ) ) ;
}
2003-05-15 16:28:58 +00:00
}
} else {
log_data_err ( " Errors in uscript_getScriptCode() expected error : %s got: %s \n " ,
" U_BUFFER_OVERFLOW_ERROR " ,
u_errorName ( err ) ) ;
}
}
2014-08-28 23:51:38 +00:00
{
static const UScriptCode LATIN [ 1 ] = { USCRIPT_LATIN } ;
static const UScriptCode CYRILLIC [ 1 ] = { USCRIPT_CYRILLIC } ;
static const UScriptCode DEVANAGARI [ 1 ] = { USCRIPT_DEVANAGARI } ;
static const UScriptCode HAN [ 1 ] = { USCRIPT_HAN } ;
static const UScriptCode JAPANESE [ 3 ] = { USCRIPT_KATAKANA , USCRIPT_HIRAGANA , USCRIPT_HAN } ;
static const UScriptCode KOREAN [ 2 ] = { USCRIPT_HANGUL , USCRIPT_HAN } ;
static const UScriptCode HAN_BOPO [ 2 ] = { USCRIPT_HAN , USCRIPT_BOPOMOFO } ;
UScriptCode scripts [ 5 ] ;
UErrorCode err ;
int32_t num ;
// Should work regardless of whether we have locale data for the language.
err = U_ZERO_ERROR ;
num = uscript_getCode ( " tg " , scripts , UPRV_LENGTHOF ( scripts ) , & err ) ;
assertEqualScripts ( " tg script: Cyrl " , CYRILLIC , 1 , scripts , num , err ) ; // Tajik
err = U_ZERO_ERROR ;
num = uscript_getCode ( " xsr " , scripts , UPRV_LENGTHOF ( scripts ) , & err ) ;
assertEqualScripts ( " xsr script: Deva " , DEVANAGARI , 1 , scripts , num , err ) ; // Sherpa
// Multi-script languages.
err = U_ZERO_ERROR ;
num = uscript_getCode ( " ja " , scripts , UPRV_LENGTHOF ( scripts ) , & err ) ;
assertEqualScripts ( " ja scripts: Kana Hira Hani " ,
JAPANESE , UPRV_LENGTHOF ( JAPANESE ) , scripts , num , err ) ;
err = U_ZERO_ERROR ;
num = uscript_getCode ( " ko " , scripts , UPRV_LENGTHOF ( scripts ) , & err ) ;
assertEqualScripts ( " ko scripts: Hang Hani " ,
KOREAN , UPRV_LENGTHOF ( KOREAN ) , scripts , num , err ) ;
err = U_ZERO_ERROR ;
num = uscript_getCode ( " zh " , scripts , UPRV_LENGTHOF ( scripts ) , & err ) ;
assertEqualScripts ( " zh script: Hani " , HAN , 1 , scripts , num , err ) ;
err = U_ZERO_ERROR ;
num = uscript_getCode ( " zh-Hant " , scripts , UPRV_LENGTHOF ( scripts ) , & err ) ;
assertEqualScripts ( " zh-Hant scripts: Hani Bopo " , HAN_BOPO , 2 , scripts , num , err ) ;
err = U_ZERO_ERROR ;
num = uscript_getCode ( " zh-TW " , scripts , UPRV_LENGTHOF ( scripts ) , & err ) ;
assertEqualScripts ( " zh-TW scripts: Hani Bopo " , HAN_BOPO , 2 , scripts , num , err ) ;
// Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
err = U_ZERO_ERROR ;
num = uscript_getCode ( " ro-RO " , scripts , UPRV_LENGTHOF ( scripts ) , & err ) ;
assertEqualScripts ( " ro-RO script: Latn " , LATIN , 1 , scripts , num , err ) ;
}
2003-05-15 16:28:58 +00:00
{
UScriptCode testAbbr [ ] = {
/* names should return */
USCRIPT_CYRILLIC , USCRIPT_DESERET , USCRIPT_DEVANAGARI , USCRIPT_ETHIOPIC , USCRIPT_GEORGIAN ,
USCRIPT_GOTHIC , USCRIPT_GREEK , USCRIPT_GUJARATI ,
} ;
const char * expectedNames [ ] = {
/* test names */
" Cyrillic " , " Deseret " , " Devanagari " , " Ethiopic " , " Georgian " ,
" Gothic " , " Greek " , " Gujarati " ,
2014-01-10 02:24:54 +00:00
NULL
2003-05-15 16:28:58 +00:00
} ;
i = 0 ;
2016-02-24 21:48:56 +00:00
while ( i < UPRV_LENGTHOF ( testAbbr ) ) {
2003-05-15 16:28:58 +00:00
const char * name = uscript_getName ( testAbbr [ i ] ) ;
if ( name = = NULL ) {
log_data_err ( " Couldn't get script name \n " ) ;
return ;
}
numErrors = 0 ;
if ( strcmp ( expectedNames [ i ] , name ) ! = 0 ) {
log_err ( " Error getting abbreviations Got: %s Expected: %s \n " , name , expectedNames [ i ] ) ;
numErrors + + ;
}
if ( numErrors > 0 ) {
if ( numErrors > 0 ) {
log_err ( " Errors uchar_getScriptAbbr() : %i \n " , numErrors ) ;
}
}
i + + ;
}
}
{
UScriptCode testAbbr [ ] = {
/* abbr should return */
USCRIPT_HAN , USCRIPT_HANGUL , USCRIPT_HEBREW , USCRIPT_HIRAGANA ,
USCRIPT_KANNADA , USCRIPT_KATAKANA , USCRIPT_KHMER , USCRIPT_LAO ,
USCRIPT_LATIN ,
USCRIPT_MALAYALAM , USCRIPT_MONGOLIAN ,
} ;
const char * expectedAbbr [ ] = {
/* test abbr */
" Hani " , " Hang " , " Hebr " , " Hira " ,
" Knda " , " Kana " , " Khmr " , " Laoo " ,
" Latn " ,
" Mlym " , " Mong " ,
2014-01-10 02:24:54 +00:00
NULL
2003-05-15 16:28:58 +00:00
} ;
i = 0 ;
2016-02-24 21:48:56 +00:00
while ( i < UPRV_LENGTHOF ( testAbbr ) ) {
2003-05-15 16:28:58 +00:00
const char * name = uscript_getShortName ( testAbbr [ i ] ) ;
numErrors = 0 ;
if ( strcmp ( expectedAbbr [ i ] , name ) ! = 0 ) {
log_err ( " Error getting abbreviations Got: %s Expected: %s \n " , name , expectedAbbr [ i ] ) ;
numErrors + + ;
}
if ( numErrors > 0 ) {
if ( numErrors > 0 ) {
log_err ( " Errors uchar_getScriptAbbr() : %i \n " , numErrors ) ;
}
}
i + + ;
}
}
/* now test uscript_getScript() API */
{
uint32_t codepoints [ ] = {
0x0000FF9D , /* USCRIPT_KATAKANA*/
0x0000FFBE , /* USCRIPT_HANGUL*/
0x0000FFC7 , /* USCRIPT_HANGUL*/
0x0000FFCF , /* USCRIPT_HANGUL*/
0x0000FFD7 , /* USCRIPT_HANGUL*/
0x0000FFDC , /* USCRIPT_HANGUL*/
0x00010300 , /* USCRIPT_OLD_ITALIC*/
0x00010330 , /* USCRIPT_GOTHIC*/
0x0001034A , /* USCRIPT_GOTHIC*/
0x00010400 , /* USCRIPT_DESERET*/
0x00010428 , /* USCRIPT_DESERET*/
0x0001D167 , /* USCRIPT_INHERITED*/
0x0001D17B , /* USCRIPT_INHERITED*/
0x0001D185 , /* USCRIPT_INHERITED*/
0x0001D1AA , /* USCRIPT_INHERITED*/
0x00020000 , /* USCRIPT_HAN*/
0x00000D02 , /* USCRIPT_MALAYALAM*/
2017-04-26 21:17:13 +00:00
0x00050005 , /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
2003-05-15 16:28:58 +00:00
0x00000000 , /* USCRIPT_COMMON*/
0x0001D169 , /* USCRIPT_INHERITED*/
0x0001D182 , /* USCRIPT_INHERITED*/
0x0001D18B , /* USCRIPT_INHERITED*/
0x0001D1AD , /* USCRIPT_INHERITED*/
} ;
UScriptCode expected [ ] = {
USCRIPT_KATAKANA ,
USCRIPT_HANGUL ,
USCRIPT_HANGUL ,
USCRIPT_HANGUL ,
USCRIPT_HANGUL ,
USCRIPT_HANGUL ,
USCRIPT_OLD_ITALIC ,
USCRIPT_GOTHIC ,
USCRIPT_GOTHIC ,
USCRIPT_DESERET ,
USCRIPT_DESERET ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_HAN ,
USCRIPT_MALAYALAM ,
2006-03-03 20:59:01 +00:00
USCRIPT_UNKNOWN ,
2003-05-15 16:28:58 +00:00
USCRIPT_COMMON ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
} ;
UScriptCode code = USCRIPT_INVALID_CODE ;
UErrorCode status = U_ZERO_ERROR ;
UBool passed = TRUE ;
2014-08-28 22:13:45 +00:00
for ( i = 0 ; i < UPRV_LENGTHOF ( codepoints ) ; + + i ) {
2003-05-15 16:28:58 +00:00
code = uscript_getScript ( codepoints [ i ] , & status ) ;
if ( U_SUCCESS ( status ) ) {
if ( code ! = expected [ i ] | |
code ! = ( UScriptCode ) u_getIntPropertyValue ( codepoints [ i ] , UCHAR_SCRIPT )
) {
log_err ( " uscript_getScript for codepoint \\ U%08X failed \n " , codepoints [ i ] ) ;
passed = FALSE ;
}
} else {
log_err ( " uscript_getScript for codepoint \\ U%08X failed. Error: %s \n " ,
codepoints [ i ] , u_errorName ( status ) ) ;
break ;
}
}
if ( passed = = FALSE ) {
log_err ( " uscript_getScript failed. \n " ) ;
}
}
{
UScriptCode code = USCRIPT_INVALID_CODE ;
UErrorCode status = U_ZERO_ERROR ;
code = uscript_getScript ( 0x001D169 , & status ) ;
if ( code ! = USCRIPT_INHERITED ) {
log_err ( " \\ U001D169 is not contained in USCRIPT_INHERITED " ) ;
}
}
{
UScriptCode code = USCRIPT_INVALID_CODE ;
UErrorCode status = U_ZERO_ERROR ;
int32_t err = 0 ;
for ( i = 0 ; i < = 0x10ffff ; i + + ) {
code = uscript_getScript ( i , & status ) ;
if ( code = = USCRIPT_INVALID_CODE ) {
err + + ;
log_err ( " uscript_getScript for codepoint \\ U%08X failed. \n " , i ) ;
}
}
if ( err > 0 ) {
log_err ( " uscript_getScript failed for %d codepoints \n " , err ) ;
}
}
{
2006-02-16 17:41:18 +00:00
for ( i = 0 ; ( UScriptCode ) i < USCRIPT_CODE_LIMIT ; i + + ) {
2003-05-15 16:28:58 +00:00
const char * name = uscript_getName ( ( UScriptCode ) i ) ;
if ( name = = NULL | | strcmp ( name , " " ) = = 0 ) {
2006-03-03 20:59:01 +00:00
log_err ( " uscript_getName failed for code %i: name is NULL or \" \" \n " , i ) ;
2003-05-15 16:28:58 +00:00
}
}
}
2010-09-21 00:12:49 +00:00
2006-02-09 01:28:31 +00:00
{
2006-03-03 20:59:01 +00:00
/*
* These script codes were originally added to ICU pre - 3.6 , so that ICU would
* have all ISO 15924 script codes . ICU was then based on Unicode 4.1 .
* These script codes were added with only short names because we don ' t
* want to invent long names ourselves .
* Unicode 5 and later encode some of these scripts and give them long names .
* Whenever this happens , the long script names here need to be updated .
*/
static const char * expectedLong [ ] = {
2014-04-14 22:30:09 +00:00
" Balinese " , " Batak " , " Blis " , " Brahmi " , " Cham " , " Cirt " , " Cyrs " ,
" Egyd " , " Egyh " , " Egyptian_Hieroglyphs " ,
2015-05-28 23:16:00 +00:00
" Geok " , " Hans " , " Hant " , " Pahawh_Hmong " , " Old_Hungarian " , " Inds " ,
2014-04-14 22:30:09 +00:00
" Javanese " , " Kayah_Li " , " Latf " , " Latg " ,
" Lepcha " , " Linear_A " , " Mandaic " , " Maya " , " Meroitic_Hieroglyphs " ,
" Nko " , " Old_Turkic " , " Old_Permic " , " Phags_Pa " , " Phoenician " ,
2011-12-08 22:37:24 +00:00
" Miao " , " Roro " , " Sara " , " Syre " , " Syrj " , " Syrn " , " Teng " , " Vai " , " Visp " , " Cuneiform " ,
2006-03-03 20:59:01 +00:00
" Zxxx " , " Unknown " ,
2015-05-28 23:16:00 +00:00
" Carian " , " Jpan " , " Tai_Tham " , " Lycian " , " Lydian " , " Ol_Chiki " , " Rejang " , " Saurashtra " , " SignWriting " , " Sundanese " ,
2009-11-13 19:25:21 +00:00
" Moon " , " Meetei_Mayek " ,
2008-04-04 22:47:43 +00:00
/* new in ICU 4.0 */
2011-12-08 22:37:24 +00:00
" Imperial_Aramaic " , " Avestan " , " Chakma " , " Kore " ,
2014-04-14 22:30:09 +00:00
" Kaithi " , " Manichaean " , " Inscriptional_Pahlavi " , " Psalter_Pahlavi " , " Phlv " ,
" Inscriptional_Parthian " , " Samaritan " , " Tai_Viet " ,
2008-04-04 22:47:43 +00:00
" Zmth " , " Zsym " ,
2009-11-13 19:25:21 +00:00
/* new in ICU 4.4 */
" Bamum " , " Lisu " , " Nkgb " , " Old_South_Arabian " ,
2010-09-21 00:12:49 +00:00
/* new in ICU 4.6 */
2014-04-14 22:30:09 +00:00
" Bassa_Vah " , " Duployan " , " Elbasan " , " Grantha " , " Kpel " ,
" Loma " , " Mende_Kikakui " , " Meroitic_Cursive " ,
" Old_North_Arabian " , " Nabataean " , " Palmyrene " , " Khudawadi " , " Warang_Citi " ,
2011-05-10 20:56:53 +00:00
/* new in ICU 4.8 */
2017-04-26 21:17:13 +00:00
" Afak " , " Jurc " , " Mro " , " Nushu " , " Sharada " , " Sora_Sompeng " , " Takri " , " Tangut " , " Wole " ,
2011-12-08 22:37:24 +00:00
/* new in ICU 49 */
2015-05-28 23:16:00 +00:00
" Anatolian_Hieroglyphs " , " Khojki " , " Tirhuta " ,
2013-05-15 21:51:04 +00:00
/* new in ICU 52 */
2014-04-14 22:30:09 +00:00
" Caucasian_Albanian " , " Mahajani " ,
/* new in ICU 54 */
2016-04-08 22:23:30 +00:00
" Ahom " , " Hatran " , " Modi " , " Multani " , " Pau_Cin_Hau " , " Siddham " ,
// new in ICU 58
2017-04-26 21:17:13 +00:00
" Adlam " , " Bhaiksuki " , " Marchen " , " Newa " , " Osage " , " Hanb " , " Jamo " , " Zsye " ,
// new in ICU 60
2018-04-03 23:09:49 +00:00
" Masaram_Gondi " , " Soyombo " , " Zanabazar_Square " ,
// new in ICU 61
" Dogra " , " Gunjala_Gondi " , " Makasar " , " Medefaidrin " ,
" Hanifi_Rohingya " , " Sogdian " , " Old_Sogdian " ,
2018-10-11 23:04:05 +00:00
// new in ICU 64
" Elymaic " , " Nyiakeng_Puachue_Hmong " , " Nandinagari " , " Wancho " ,
2019-11-21 21:29:18 +00:00
// new in ICU 66
" Chorasmian " , " Dives_Akuru " , " Khitan_Small_Script " , " Yezidi " ,
2006-03-03 20:59:01 +00:00
} ;
static const char * expectedShort [ ] = {
2014-04-14 22:30:09 +00:00
" Bali " , " Batk " , " Blis " , " Brah " , " Cham " , " Cirt " , " Cyrs " , " Egyd " , " Egyh " , " Egyp " ,
" Geok " , " Hans " , " Hant " , " Hmng " , " Hung " , " Inds " , " Java " , " Kali " , " Latf " , " Latg " ,
" Lepc " , " Lina " , " Mand " , " Maya " , " Mero " , " Nkoo " , " Orkh " , " Perm " , " Phag " , " Phnx " ,
" Plrd " , " Roro " , " Sara " , " Syre " , " Syrj " , " Syrn " , " Teng " , " Vaii " , " Visp " , " Xsux " ,
2006-02-09 01:28:31 +00:00
" Zxxx " , " Zzzz " ,
2006-11-08 23:41:46 +00:00
" Cari " , " Jpan " , " Lana " , " Lyci " , " Lydi " , " Olck " , " Rjng " , " Saur " , " Sgnw " , " Sund " ,
2007-03-06 01:05:52 +00:00
" Moon " , " Mtei " ,
2008-04-04 22:47:43 +00:00
/* new in ICU 4.0 */
" Armi " , " Avst " , " Cakm " , " Kore " ,
" Kthi " , " Mani " , " Phli " , " Phlp " , " Phlv " , " Prti " , " Samr " , " Tavt " ,
" Zmth " , " Zsym " ,
2009-11-13 19:25:21 +00:00
/* new in ICU 4.4 */
" Bamu " , " Lisu " , " Nkgb " , " Sarb " ,
2010-09-21 00:12:49 +00:00
/* new in ICU 4.6 */
" Bass " , " Dupl " , " Elba " , " Gran " , " Kpel " , " Loma " , " Mend " , " Merc " ,
" Narb " , " Nbat " , " Palm " , " Sind " , " Wara " ,
2011-05-10 20:56:53 +00:00
/* new in ICU 4.8 */
" Afak " , " Jurc " , " Mroo " , " Nshu " , " Shrd " , " Sora " , " Takr " , " Tang " , " Wole " ,
2011-12-08 22:37:24 +00:00
/* new in ICU 49 */
2012-01-23 19:51:38 +00:00
" Hluw " , " Khoj " , " Tirh " ,
2013-05-15 21:51:04 +00:00
/* new in ICU 52 */
2014-04-14 22:30:09 +00:00
" Aghb " , " Mahj " ,
/* new in ICU 54 */
2016-04-08 22:23:30 +00:00
" Ahom " , " Hatr " , " Modi " , " Mult " , " Pauc " , " Sidd " ,
// new in ICU 58
2017-04-26 21:17:13 +00:00
" Adlm " , " Bhks " , " Marc " , " Newa " , " Osge " , " Hanb " , " Jamo " , " Zsye " ,
// new in ICU 60
2018-04-03 23:09:49 +00:00
" Gonm " , " Soyo " , " Zanb " ,
// new in ICU 61
" Dogr " , " Gong " , " Maka " , " Medf " , " Rohg " , " Sogd " , " Sogo " ,
2018-10-11 23:04:05 +00:00
// new in ICU 64
" Elym " , " Hmnp " , " Nand " , " Wcho " ,
2019-11-21 21:29:18 +00:00
// new in ICU 66
" Chrs " , " Diak " , " Kits " , " Yezi " ,
2006-02-09 01:28:31 +00:00
} ;
int32_t j = 0 ;
2014-08-28 22:13:45 +00:00
if ( UPRV_LENGTHOF ( expectedLong ) ! = ( USCRIPT_CODE_LIMIT - USCRIPT_BALINESE ) ) {
2009-11-13 19:25:21 +00:00
log_err ( " need to add new script codes in cucdapi.c! \n " ) ;
return ;
}
2006-02-09 01:28:31 +00:00
for ( i = USCRIPT_BALINESE ; ( UScriptCode ) i < USCRIPT_CODE_LIMIT ; i + + , j + + ) {
const char * name = uscript_getName ( ( UScriptCode ) i ) ;
2006-03-03 20:59:01 +00:00
if ( name = = NULL | | strcmp ( name , expectedLong [ j ] ) ! = 0 ) {
log_err ( " uscript_getName failed for code %i: %s!=%s \n " , i , name , expectedLong [ j ] ) ;
2006-02-09 01:28:31 +00:00
}
name = uscript_getShortName ( ( UScriptCode ) i ) ;
2006-03-03 20:59:01 +00:00
if ( name = = NULL | | strcmp ( name , expectedShort [ j ] ) ! = 0 ) {
log_err ( " uscript_getShortName failed for code %i: %s!=%s \n " , i , name , expectedShort [ j ] ) ;
2006-02-09 01:28:31 +00:00
}
}
2014-08-28 22:13:45 +00:00
for ( i = 0 ; i < UPRV_LENGTHOF ( expectedLong ) ; i + + ) {
2006-02-09 01:28:31 +00:00
UScriptCode fillIn [ 5 ] = { USCRIPT_INVALID_CODE } ;
UErrorCode status = U_ZERO_ERROR ;
int32_t len = 0 ;
2014-08-28 22:13:45 +00:00
len = uscript_getCode ( expectedShort [ i ] , fillIn , UPRV_LENGTHOF ( fillIn ) , & status ) ;
2006-02-09 01:28:31 +00:00
if ( U_FAILURE ( status ) ) {
2006-03-03 20:59:01 +00:00
log_err ( " uscript_getCode failed for script name %s. Error: %s \n " , expectedShort [ i ] , u_errorName ( status ) ) ;
2006-02-09 01:28:31 +00:00
}
if ( len > 1 ) {
2006-03-03 20:59:01 +00:00
log_err ( " uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i \n " , expectedShort [ i ] , len ) ;
2006-02-09 01:28:31 +00:00
}
if ( fillIn [ 0 ] ! = ( UScriptCode ) ( USCRIPT_BALINESE + i ) ) {
2006-03-03 20:59:01 +00:00
log_err ( " uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i \n " , expectedShort [ i ] , ( USCRIPT_BALINESE + i ) , fillIn [ 0 ] ) ;
2006-02-09 01:28:31 +00:00
}
}
}
2010-09-21 00:12:49 +00:00
{
/* test characters which have Script_Extensions */
UErrorCode errorCode = U_ZERO_ERROR ;
if ( ! (
USCRIPT_COMMON = = uscript_getScript ( 0x0640 , & errorCode ) & &
USCRIPT_INHERITED = = uscript_getScript ( 0x0650 , & errorCode ) & &
USCRIPT_ARABIC = = uscript_getScript ( 0xfdf2 , & errorCode ) ) | |
U_FAILURE ( errorCode )
) {
log_err ( " uscript_getScript(character with Script_Extensions) failed \n " ) ;
}
}
}
void TestHasScript ( ) {
if ( ! (
! uscript_hasScript ( 0x063f , USCRIPT_COMMON ) & &
uscript_hasScript ( 0x063f , USCRIPT_ARABIC ) & & /* main Script value */
! uscript_hasScript ( 0x063f , USCRIPT_SYRIAC ) & &
! uscript_hasScript ( 0x063f , USCRIPT_THAANA ) )
) {
log_err ( " uscript_hasScript(U+063F, ...) is wrong \n " ) ;
}
if ( ! (
2012-02-06 20:11:22 +00:00
! uscript_hasScript ( 0x0640 , USCRIPT_COMMON ) & & /* main Script value */
2010-09-21 00:12:49 +00:00
uscript_hasScript ( 0x0640 , USCRIPT_ARABIC ) & &
uscript_hasScript ( 0x0640 , USCRIPT_SYRIAC ) & &
! uscript_hasScript ( 0x0640 , USCRIPT_THAANA ) )
) {
log_err ( " uscript_hasScript(U+0640, ...) is wrong \n " ) ;
}
if ( ! (
2012-02-06 20:11:22 +00:00
! uscript_hasScript ( 0x0650 , USCRIPT_INHERITED ) & & /* main Script value */
2010-09-21 00:12:49 +00:00
uscript_hasScript ( 0x0650 , USCRIPT_ARABIC ) & &
uscript_hasScript ( 0x0650 , USCRIPT_SYRIAC ) & &
! uscript_hasScript ( 0x0650 , USCRIPT_THAANA ) )
) {
log_err ( " uscript_hasScript(U+0650, ...) is wrong \n " ) ;
}
if ( ! (
2012-02-06 20:11:22 +00:00
! uscript_hasScript ( 0x0660 , USCRIPT_COMMON ) & & /* main Script value */
2010-09-21 00:12:49 +00:00
uscript_hasScript ( 0x0660 , USCRIPT_ARABIC ) & &
! uscript_hasScript ( 0x0660 , USCRIPT_SYRIAC ) & &
uscript_hasScript ( 0x0660 , USCRIPT_THAANA ) )
) {
log_err ( " uscript_hasScript(U+0660, ...) is wrong \n " ) ;
}
if ( ! (
! uscript_hasScript ( 0xfdf2 , USCRIPT_COMMON ) & &
uscript_hasScript ( 0xfdf2 , USCRIPT_ARABIC ) & & /* main Script value */
! uscript_hasScript ( 0xfdf2 , USCRIPT_SYRIAC ) & &
uscript_hasScript ( 0xfdf2 , USCRIPT_THAANA ) )
) {
log_err ( " uscript_hasScript(U+FDF2, ...) is wrong \n " ) ;
}
2012-02-03 23:36:05 +00:00
if ( uscript_hasScript ( 0x0640 , 0xaffe ) ) {
/* An unguarded implementation might go into an infinite loop. */
log_err ( " uscript_hasScript(U+0640, bogus 0xaffe) is wrong \n " ) ;
}
2010-09-21 00:12:49 +00:00
}
2016-09-21 17:27:57 +00:00
static UBool scriptsContain ( UScriptCode scripts [ ] , int32_t length , UScriptCode script ) {
2014-06-09 21:15:23 +00:00
UBool contain = FALSE ;
2014-06-11 08:34:19 +00:00
int32_t prev = - 1 , i ;
for ( i = 0 ; i < length ; + + i ) {
2014-06-09 21:15:23 +00:00
int32_t s = scripts [ i ] ;
if ( s < = prev ) {
log_err ( " uscript_getScriptExtensions() not in sorted order: %d %d \n " , ( int ) prev , ( int ) s ) ;
}
if ( s = = script ) { contain = TRUE ; }
}
return contain ;
}
2010-09-21 00:12:49 +00:00
void TestGetScriptExtensions ( ) {
UScriptCode scripts [ 20 ] ;
int32_t length ;
UErrorCode errorCode ;
/* errors and overflows */
errorCode = U_PARSE_ERROR ;
2014-08-28 22:13:45 +00:00
length = uscript_getScriptExtensions ( 0x0640 , scripts , UPRV_LENGTHOF ( scripts ) , & errorCode ) ;
2010-09-21 00:12:49 +00:00
if ( errorCode ! = U_PARSE_ERROR ) {
2011-12-08 22:37:24 +00:00
log_err ( " uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s \n " ,
2010-09-21 00:12:49 +00:00
u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
2014-08-28 22:13:45 +00:00
length = uscript_getScriptExtensions ( 0x0640 , NULL , UPRV_LENGTHOF ( scripts ) , & errorCode ) ;
2010-09-21 00:12:49 +00:00
if ( errorCode ! = U_ILLEGAL_ARGUMENT_ERROR ) {
2011-12-08 22:37:24 +00:00
log_err ( " uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s \n " ,
2010-09-21 00:12:49 +00:00
u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
length = uscript_getScriptExtensions ( 0x0640 , scripts , - 1 , & errorCode ) ;
if ( errorCode ! = U_ILLEGAL_ARGUMENT_ERROR ) {
2011-12-08 22:37:24 +00:00
log_err ( " uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s \n " ,
2010-09-21 00:12:49 +00:00
u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
length = uscript_getScriptExtensions ( 0x0640 , scripts , 0 , & errorCode ) ;
2014-06-09 21:15:23 +00:00
if ( errorCode ! = U_BUFFER_OVERFLOW_ERROR | | length < 3 ) {
log_err ( " uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s \n " ,
2010-09-21 00:12:49 +00:00
( int ) length , u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
length = uscript_getScriptExtensions ( 0x0640 , scripts , 1 , & errorCode ) ;
2014-06-09 21:15:23 +00:00
if ( errorCode ! = U_BUFFER_OVERFLOW_ERROR | | length < 3 ) {
log_err ( " uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s \n " ,
2010-09-21 00:12:49 +00:00
( int ) length , u_errorName ( errorCode ) ) ;
}
2012-02-03 23:36:05 +00:00
/* U+063F has only a Script code, no Script_Extensions. */
errorCode = U_ZERO_ERROR ;
length = uscript_getScriptExtensions ( 0x063f , scripts , 0 , & errorCode ) ;
if ( errorCode ! = U_BUFFER_OVERFLOW_ERROR | | length ! = 1 ) {
log_err ( " uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s \n " ,
( int ) length , u_errorName ( errorCode ) ) ;
}
/* invalid code points */
errorCode = U_ZERO_ERROR ;
2014-08-28 22:13:45 +00:00
length = uscript_getScriptExtensions ( - 1 , scripts , UPRV_LENGTHOF ( scripts ) , & errorCode ) ;
2012-02-03 23:36:05 +00:00
if ( U_FAILURE ( errorCode ) | | length ! = 1 | | scripts [ 0 ] ! = USCRIPT_UNKNOWN ) {
log_err ( " uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s \n " ,
( int ) length , u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
2014-08-28 22:13:45 +00:00
length = uscript_getScriptExtensions ( 0x110000 , scripts , UPRV_LENGTHOF ( scripts ) , & errorCode ) ;
2012-02-03 23:36:05 +00:00
if ( U_FAILURE ( errorCode ) | | length ! = 1 | | scripts [ 0 ] ! = USCRIPT_UNKNOWN ) {
log_err ( " uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s \n " ,
( int ) length , u_errorName ( errorCode ) ) ;
}
2010-09-21 00:12:49 +00:00
/* normal usage */
errorCode = U_ZERO_ERROR ;
2012-02-03 23:36:05 +00:00
length = uscript_getScriptExtensions ( 0x063f , scripts , 1 , & errorCode ) ;
if ( U_FAILURE ( errorCode ) | | length ! = 1 | | scripts [ 0 ] ! = USCRIPT_ARABIC ) {
log_err ( " uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s \n " ,
2010-09-21 00:12:49 +00:00
( int ) length , u_errorName ( errorCode ) ) ;
}
2012-02-03 23:36:05 +00:00
errorCode = U_ZERO_ERROR ;
2014-08-28 22:13:45 +00:00
length = uscript_getScriptExtensions ( 0x0640 , scripts , UPRV_LENGTHOF ( scripts ) , & errorCode ) ;
2014-06-09 21:15:23 +00:00
if ( U_FAILURE ( errorCode ) | | length < 3 | |
! scriptsContain ( scripts , length , USCRIPT_ARABIC ) | |
! scriptsContain ( scripts , length , USCRIPT_SYRIAC ) | |
! scriptsContain ( scripts , length , USCRIPT_MANDAIC ) ) {
2010-09-21 00:12:49 +00:00
log_err ( " uscript_getScriptExtensions(U+0640)=%d failed - %s \n " ,
( int ) length , u_errorName ( errorCode ) ) ;
}
2012-02-03 23:36:05 +00:00
errorCode = U_ZERO_ERROR ;
2014-08-28 22:13:45 +00:00
length = uscript_getScriptExtensions ( 0xfdf2 , scripts , UPRV_LENGTHOF ( scripts ) , & errorCode ) ;
2010-09-21 00:12:49 +00:00
if ( U_FAILURE ( errorCode ) | | length ! = 2 | | scripts [ 0 ] ! = USCRIPT_ARABIC | | scripts [ 1 ] ! = USCRIPT_THAANA ) {
log_err ( " uscript_getScriptExtensions(U+FDF2)=%d failed - %s \n " ,
( int ) length , u_errorName ( errorCode ) ) ;
}
2012-02-03 23:36:05 +00:00
errorCode = U_ZERO_ERROR ;
2014-08-28 22:13:45 +00:00
length = uscript_getScriptExtensions ( 0xff65 , scripts , UPRV_LENGTHOF ( scripts ) , & errorCode ) ;
2010-09-21 00:12:49 +00:00
if ( U_FAILURE ( errorCode ) | | length ! = 6 | | scripts [ 0 ] ! = USCRIPT_BOPOMOFO | | scripts [ 5 ] ! = USCRIPT_YI ) {
log_err ( " uscript_getScriptExtensions(U+FF65)=%d failed - %s \n " ,
( int ) length , u_errorName ( errorCode ) ) ;
}
2008-04-04 22:47:43 +00:00
}
2013-02-17 00:49:18 +00:00
void TestScriptMetadataAPI ( ) {
/* API & code coverage. More testing in intltest/ucdtest.cpp. */
UErrorCode errorCode = U_ZERO_ERROR ;
UChar sample [ 8 ] ;
2014-08-28 22:13:45 +00:00
if ( uscript_getSampleString ( USCRIPT_LATIN , sample , UPRV_LENGTHOF ( sample ) , & errorCode ) ! = 1 | |
2013-02-17 00:49:18 +00:00
U_FAILURE ( errorCode ) | |
uscript_getScript ( sample [ 0 ] , & errorCode ) ! = USCRIPT_LATIN | |
sample [ 1 ] ! = 0 ) {
log_err ( " uscript_getSampleString(Latn) failed - %s \n " , u_errorName ( errorCode ) ) ;
}
sample [ 0 ] = 0xfffe ;
if ( uscript_getSampleString ( USCRIPT_LATIN , sample , 0 , & errorCode ) ! = 1 | |
errorCode ! = U_BUFFER_OVERFLOW_ERROR | |
sample [ 0 ] ! = 0xfffe ) {
log_err ( " uscript_getSampleString(Latn, capacity=0) failed - %s \n " , u_errorName ( errorCode ) ) ;
}
errorCode = U_ZERO_ERROR ;
2014-08-28 22:13:45 +00:00
if ( uscript_getSampleString ( USCRIPT_INVALID_CODE , sample , UPRV_LENGTHOF ( sample ) , & errorCode ) ! = 0 | |
2013-02-17 00:49:18 +00:00
U_FAILURE ( errorCode ) | |
sample [ 0 ] ! = 0 ) {
log_err ( " uscript_getSampleString(invalid) failed - %s \n " , u_errorName ( errorCode ) ) ;
}
sample [ 0 ] = 0xfffe ;
if ( uscript_getSampleString ( USCRIPT_CODE_LIMIT , sample , 0 , & errorCode ) ! = 0 | |
errorCode ! = U_STRING_NOT_TERMINATED_WARNING | |
sample [ 0 ] ! = 0xfffe ) {
log_err ( " uscript_getSampleString(limit, capacity=0) failed - %s \n " , u_errorName ( errorCode ) ) ;
}
if ( uscript_getUsage ( USCRIPT_LATIN ) ! = USCRIPT_USAGE_RECOMMENDED | |
2017-04-26 21:17:13 +00:00
// Unicode 10 gives up on "aspirational".
uscript_getUsage ( USCRIPT_YI ) ! = USCRIPT_USAGE_LIMITED_USE | |
2013-02-17 00:49:18 +00:00
uscript_getUsage ( USCRIPT_CHEROKEE ) ! = USCRIPT_USAGE_LIMITED_USE | |
uscript_getUsage ( USCRIPT_COPTIC ) ! = USCRIPT_USAGE_EXCLUDED | |
uscript_getUsage ( USCRIPT_CIRTH ) ! = USCRIPT_USAGE_NOT_ENCODED | |
uscript_getUsage ( USCRIPT_INVALID_CODE ) ! = USCRIPT_USAGE_NOT_ENCODED | |
2013-02-17 23:24:47 +00:00
uscript_getUsage ( USCRIPT_CODE_LIMIT ) ! = USCRIPT_USAGE_NOT_ENCODED ) {
2013-02-17 00:49:18 +00:00
log_err ( " uscript_getUsage() failed \n " ) ;
}
if ( uscript_isRightToLeft ( USCRIPT_LATIN ) | |
uscript_isRightToLeft ( USCRIPT_CIRTH ) | |
! uscript_isRightToLeft ( USCRIPT_ARABIC ) | |
! uscript_isRightToLeft ( USCRIPT_HEBREW ) ) {
log_err ( " uscript_isRightToLeft() failed \n " ) ;
}
if ( uscript_breaksBetweenLetters ( USCRIPT_LATIN ) | |
uscript_breaksBetweenLetters ( USCRIPT_CIRTH ) | |
! uscript_breaksBetweenLetters ( USCRIPT_HAN ) | |
! uscript_breaksBetweenLetters ( USCRIPT_THAI ) ) {
log_err ( " uscript_breaksBetweenLetters() failed \n " ) ;
}
if ( uscript_isCased ( USCRIPT_CIRTH ) | |
uscript_isCased ( USCRIPT_HAN ) | |
! uscript_isCased ( USCRIPT_LATIN ) | |
! uscript_isCased ( USCRIPT_GREEK ) ) {
log_err ( " uscript_isCased() failed \n " ) ;
}
}
2008-04-04 22:47:43 +00:00
void TestBinaryValues ( ) {
/*
* Unicode 5.1 explicitly defines binary property value aliases .
* Verify that they are all recognized .
*/
static const char * const falseValues [ ] = { " N " , " No " , " F " , " False " } ;
static const char * const trueValues [ ] = { " Y " , " Yes " , " T " , " True " } ;
int32_t i ;
2014-08-28 22:13:45 +00:00
for ( i = 0 ; i < UPRV_LENGTHOF ( falseValues ) ; + + i ) {
2008-04-04 22:47:43 +00:00
if ( FALSE ! = u_getPropertyValueEnum ( UCHAR_ALPHABETIC , falseValues [ i ] ) ) {
2009-06-09 21:28:13 +00:00
log_data_err ( " u_getPropertyValueEnum(UCHAR_ALPHABETIC, \" %s \" )!=FALSE (Are you missing data?) \n " , falseValues [ i ] ) ;
2008-04-04 22:47:43 +00:00
}
}
2014-08-28 22:13:45 +00:00
for ( i = 0 ; i < UPRV_LENGTHOF ( trueValues ) ; + + i ) {
2008-04-04 22:47:43 +00:00
if ( TRUE ! = u_getPropertyValueEnum ( UCHAR_ALPHABETIC , trueValues [ i ] ) ) {
2009-06-09 21:28:13 +00:00
log_data_err ( " u_getPropertyValueEnum(UCHAR_ALPHABETIC, \" %s \" )!=TRUE (Are you missing data?) \n " , trueValues [ i ] ) ;
2008-04-04 22:47:43 +00:00
}
}
}