2003-05-15 16:28:58 +00:00
/********************************************************************
2009-06-09 21:28:13 +00:00
* Copyright ( c ) 1997 - 2009 , International Business Machines
2007-07-27 03:12:12 +00:00
* Corporation and others . All Rights Reserved .
2003-05-15 16:28:58 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include <string.h>
# include "unicode/utypes.h"
# include "unicode/uscript.h"
# include "unicode/uchar.h"
# include "cintltst.h"
2003-12-05 22:48:48 +00:00
# include "cucdapi.h"
2003-05-15 16:28:58 +00:00
2008-04-04 22:47:43 +00:00
# define LENGTHOF(array) (int32_t)(sizeof(array) / sizeof(array[0]))
2006-02-09 01:28:31 +00:00
2003-05-15 16:28:58 +00:00
void TestUScriptCodeAPI ( ) {
int i = 0 ;
int numErrors = 0 ;
{
const char * testNames [ ] = {
/* test locale */
" en " , " en_US " , " sr " , " ta " , " te_IN " ,
" hi " , " he " , " ar " ,
/* test abbr */
" Hani " , " Hang " , " Hebr " , " Hira " ,
" Knda " , " Kana " , " Khmr " , " Lao " ,
" Latn " , /*"Latf","Latg",*/
" Mlym " , " Mong " ,
/* test names */
" CYRILLIC " , " DESERET " , " DEVANAGARI " , " ETHIOPIC " , " GEORGIAN " ,
" GOTHIC " , " GREEK " , " GUJARATI " , " COMMON " , " INHERITED " ,
/* test lower case names */
" malayalam " , " mongolian " , " myanmar " , " ogham " , " old-italic " ,
" oriya " , " runic " , " sinhala " , " syriac " , " tamil " ,
" telugu " , " thaana " , " thai " , " tibetan " ,
/* test the bounds*/
" tagb " , " arabic " ,
/* test bogus */
" asfdasd " , " 5464 " , " 12235 " ,
/* test the last index */
" zyyy " , " YI " ,
' \0 '
} ;
UScriptCode expected [ ] = {
/* locales should return */
USCRIPT_LATIN , USCRIPT_LATIN , USCRIPT_CYRILLIC , USCRIPT_TAMIL , USCRIPT_TELUGU ,
USCRIPT_DEVANAGARI , USCRIPT_HEBREW , USCRIPT_ARABIC ,
/* abbr should return */
USCRIPT_HAN , USCRIPT_HANGUL , USCRIPT_HEBREW , USCRIPT_HIRAGANA ,
USCRIPT_KANNADA , USCRIPT_KATAKANA , USCRIPT_KHMER , USCRIPT_LAO ,
USCRIPT_LATIN , /* USCRIPT_LATIN, USCRIPT_LATIN,*/
USCRIPT_MALAYALAM , USCRIPT_MONGOLIAN ,
/* names should return */
USCRIPT_CYRILLIC , USCRIPT_DESERET , USCRIPT_DEVANAGARI , USCRIPT_ETHIOPIC , USCRIPT_GEORGIAN ,
USCRIPT_GOTHIC , USCRIPT_GREEK , USCRIPT_GUJARATI , USCRIPT_COMMON , USCRIPT_INHERITED ,
/* lower case names should return */
USCRIPT_MALAYALAM , USCRIPT_MONGOLIAN , USCRIPT_MYANMAR , USCRIPT_OGHAM , USCRIPT_OLD_ITALIC ,
USCRIPT_ORIYA , USCRIPT_RUNIC , USCRIPT_SINHALA , USCRIPT_SYRIAC , USCRIPT_TAMIL ,
USCRIPT_TELUGU , USCRIPT_THAANA , USCRIPT_THAI , USCRIPT_TIBETAN ,
/* bounds */
USCRIPT_TAGBANWA , USCRIPT_ARABIC ,
/* bogus names should return invalid code */
USCRIPT_INVALID_CODE , USCRIPT_INVALID_CODE , USCRIPT_INVALID_CODE ,
USCRIPT_COMMON , USCRIPT_YI ,
} ;
UErrorCode err = U_ZERO_ERROR ;
const int32_t capacity = 10 ;
for ( ; testNames [ i ] ! = ' \0 ' ; i + + ) {
UScriptCode script [ 10 ] = { USCRIPT_INVALID_CODE } ;
uscript_getCode ( testNames [ i ] , script , capacity , & err ) ;
if ( script [ 0 ] ! = expected [ i ] ) {
2009-06-09 21:28:13 +00:00
log_data_err ( " Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?) \n " ,
2003-05-15 16:28:58 +00:00
script [ 0 ] , expected [ i ] , testNames [ i ] ) ;
numErrors + + ;
}
}
if ( numErrors > 0 ) {
log_data_err ( " Errors uchar_getScriptCode() : %i \n " , numErrors ) ;
}
}
{
UErrorCode err = U_ZERO_ERROR ;
int32_t capacity = 0 ;
2007-07-25 18:46:17 +00:00
int32_t j ;
2005-05-19 20:28:20 +00:00
UScriptCode jaCode [ ] = { USCRIPT_KATAKANA , USCRIPT_HIRAGANA , USCRIPT_HAN } ;
2003-05-15 16:28:58 +00:00
UScriptCode script [ 10 ] = { USCRIPT_INVALID_CODE } ;
int32_t num = uscript_getCode ( " ja " , script , capacity , & err ) ;
/* preflight */
if ( err = = U_BUFFER_OVERFLOW_ERROR ) {
err = U_ZERO_ERROR ;
capacity = 10 ;
num = uscript_getCode ( " ja " , script , capacity , & err ) ;
2007-07-25 18:46:17 +00:00
if ( num ! = ( sizeof ( jaCode ) / sizeof ( UScriptCode ) ) ) {
log_err ( " Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n " ,
num , ( sizeof ( jaCode ) / sizeof ( UScriptCode ) ) ) ;
}
for ( j = 0 ; j < sizeof ( jaCode ) / sizeof ( UScriptCode ) ; j + + ) {
if ( script [ j ] ! = jaCode [ j ] ) {
log_err ( " Japanese locale: code #%d was %d (%s) but expected %d (%s) \n " , j ,
script [ j ] , uscript_getName ( script [ j ] ) ,
jaCode [ j ] , uscript_getName ( jaCode [ j ] ) ) ;
}
2003-05-15 16:28:58 +00:00
}
} else {
log_data_err ( " Errors in uscript_getScriptCode() expected error : %s got: %s \n " ,
" U_BUFFER_OVERFLOW_ERROR " ,
u_errorName ( err ) ) ;
}
}
{
UScriptCode testAbbr [ ] = {
/* names should return */
USCRIPT_CYRILLIC , USCRIPT_DESERET , USCRIPT_DEVANAGARI , USCRIPT_ETHIOPIC , USCRIPT_GEORGIAN ,
USCRIPT_GOTHIC , USCRIPT_GREEK , USCRIPT_GUJARATI ,
} ;
const char * expectedNames [ ] = {
/* test names */
" Cyrillic " , " Deseret " , " Devanagari " , " Ethiopic " , " Georgian " ,
" Gothic " , " Greek " , " Gujarati " ,
' \0 '
} ;
i = 0 ;
while ( i < sizeof ( testAbbr ) / sizeof ( UScriptCode ) ) {
const char * name = uscript_getName ( testAbbr [ i ] ) ;
if ( name = = NULL ) {
log_data_err ( " Couldn't get script name \n " ) ;
return ;
}
numErrors = 0 ;
if ( strcmp ( expectedNames [ i ] , name ) ! = 0 ) {
log_err ( " Error getting abbreviations Got: %s Expected: %s \n " , name , expectedNames [ i ] ) ;
numErrors + + ;
}
if ( numErrors > 0 ) {
if ( numErrors > 0 ) {
log_err ( " Errors uchar_getScriptAbbr() : %i \n " , numErrors ) ;
}
}
i + + ;
}
}
{
UScriptCode testAbbr [ ] = {
/* abbr should return */
USCRIPT_HAN , USCRIPT_HANGUL , USCRIPT_HEBREW , USCRIPT_HIRAGANA ,
USCRIPT_KANNADA , USCRIPT_KATAKANA , USCRIPT_KHMER , USCRIPT_LAO ,
USCRIPT_LATIN ,
USCRIPT_MALAYALAM , USCRIPT_MONGOLIAN ,
} ;
const char * expectedAbbr [ ] = {
/* test abbr */
" Hani " , " Hang " , " Hebr " , " Hira " ,
" Knda " , " Kana " , " Khmr " , " Laoo " ,
" Latn " ,
" Mlym " , " Mong " ,
' \0 '
} ;
i = 0 ;
while ( i < sizeof ( testAbbr ) / sizeof ( UScriptCode ) ) {
const char * name = uscript_getShortName ( testAbbr [ i ] ) ;
numErrors = 0 ;
if ( strcmp ( expectedAbbr [ i ] , name ) ! = 0 ) {
log_err ( " Error getting abbreviations Got: %s Expected: %s \n " , name , expectedAbbr [ i ] ) ;
numErrors + + ;
}
if ( numErrors > 0 ) {
if ( numErrors > 0 ) {
log_err ( " Errors uchar_getScriptAbbr() : %i \n " , numErrors ) ;
}
}
i + + ;
}
}
/* now test uscript_getScript() API */
{
uint32_t codepoints [ ] = {
0x0000FF9D , /* USCRIPT_KATAKANA*/
0x0000FFBE , /* USCRIPT_HANGUL*/
0x0000FFC7 , /* USCRIPT_HANGUL*/
0x0000FFCF , /* USCRIPT_HANGUL*/
0x0000FFD7 , /* USCRIPT_HANGUL*/
0x0000FFDC , /* USCRIPT_HANGUL*/
0x00010300 , /* USCRIPT_OLD_ITALIC*/
0x00010330 , /* USCRIPT_GOTHIC*/
0x0001034A , /* USCRIPT_GOTHIC*/
0x00010400 , /* USCRIPT_DESERET*/
0x00010428 , /* USCRIPT_DESERET*/
0x0001D167 , /* USCRIPT_INHERITED*/
0x0001D17B , /* USCRIPT_INHERITED*/
0x0001D185 , /* USCRIPT_INHERITED*/
0x0001D1AA , /* USCRIPT_INHERITED*/
0x00020000 , /* USCRIPT_HAN*/
0x00000D02 , /* USCRIPT_MALAYALAM*/
2006-03-03 20:59:01 +00:00
0x00000D00 , /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
2003-05-15 16:28:58 +00:00
0x00000000 , /* USCRIPT_COMMON*/
0x0001D169 , /* USCRIPT_INHERITED*/
0x0001D182 , /* USCRIPT_INHERITED*/
0x0001D18B , /* USCRIPT_INHERITED*/
0x0001D1AD , /* USCRIPT_INHERITED*/
} ;
UScriptCode expected [ ] = {
USCRIPT_KATAKANA ,
USCRIPT_HANGUL ,
USCRIPT_HANGUL ,
USCRIPT_HANGUL ,
USCRIPT_HANGUL ,
USCRIPT_HANGUL ,
USCRIPT_OLD_ITALIC ,
USCRIPT_GOTHIC ,
USCRIPT_GOTHIC ,
USCRIPT_DESERET ,
USCRIPT_DESERET ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_HAN ,
USCRIPT_MALAYALAM ,
2006-03-03 20:59:01 +00:00
USCRIPT_UNKNOWN ,
2003-05-15 16:28:58 +00:00
USCRIPT_COMMON ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
} ;
UScriptCode code = USCRIPT_INVALID_CODE ;
UErrorCode status = U_ZERO_ERROR ;
UBool passed = TRUE ;
2008-04-04 22:47:43 +00:00
for ( i = 0 ; i < LENGTHOF ( codepoints ) ; + + i ) {
2003-05-15 16:28:58 +00:00
code = uscript_getScript ( codepoints [ i ] , & status ) ;
if ( U_SUCCESS ( status ) ) {
if ( code ! = expected [ i ] | |
code ! = ( UScriptCode ) u_getIntPropertyValue ( codepoints [ i ] , UCHAR_SCRIPT )
) {
log_err ( " uscript_getScript for codepoint \\ U%08X failed \n " , codepoints [ i ] ) ;
passed = FALSE ;
}
} else {
log_err ( " uscript_getScript for codepoint \\ U%08X failed. Error: %s \n " ,
codepoints [ i ] , u_errorName ( status ) ) ;
break ;
}
}
if ( passed = = FALSE ) {
log_err ( " uscript_getScript failed. \n " ) ;
}
}
{
UScriptCode code = USCRIPT_INVALID_CODE ;
UErrorCode status = U_ZERO_ERROR ;
code = uscript_getScript ( 0x001D169 , & status ) ;
if ( code ! = USCRIPT_INHERITED ) {
log_err ( " \\ U001D169 is not contained in USCRIPT_INHERITED " ) ;
}
}
{
UScriptCode code = USCRIPT_INVALID_CODE ;
UErrorCode status = U_ZERO_ERROR ;
int32_t err = 0 ;
for ( i = 0 ; i < = 0x10ffff ; i + + ) {
code = uscript_getScript ( i , & status ) ;
if ( code = = USCRIPT_INVALID_CODE ) {
err + + ;
log_err ( " uscript_getScript for codepoint \\ U%08X failed. \n " , i ) ;
}
}
if ( err > 0 ) {
log_err ( " uscript_getScript failed for %d codepoints \n " , err ) ;
}
}
{
2006-02-16 17:41:18 +00:00
for ( i = 0 ; ( UScriptCode ) i < USCRIPT_CODE_LIMIT ; i + + ) {
2003-05-15 16:28:58 +00:00
const char * name = uscript_getName ( ( UScriptCode ) i ) ;
if ( name = = NULL | | strcmp ( name , " " ) = = 0 ) {
2006-03-03 20:59:01 +00:00
log_err ( " uscript_getName failed for code %i: name is NULL or \" \" \n " , i ) ;
2003-05-15 16:28:58 +00:00
}
}
}
2006-02-09 01:28:31 +00:00
{
2006-03-03 20:59:01 +00:00
/*
* These script codes were originally added to ICU pre - 3.6 , so that ICU would
* have all ISO 15924 script codes . ICU was then based on Unicode 4.1 .
* These script codes were added with only short names because we don ' t
* want to invent long names ourselves .
* Unicode 5 and later encode some of these scripts and give them long names .
* Whenever this happens , the long script names here need to be updated .
*/
static const char * expectedLong [ ] = {
2009-11-13 19:25:21 +00:00
" Balinese " , " Batk " , " Blis " , " Brah " , " Cham " , " Cirt " , " Cyrs " , " Egyd " , " Egyh " , " Egyptian_Hieroglyphs " ,
" Geok " , " Hans " , " Hant " , " Hmng " , " Hung " , " Inds " , " Javanese " , " Kayah_Li " , " Latf " , " Latg " ,
" Lepcha " , " Lina " , " Mand " , " Maya " , " Mero " , " Nko " , " Old_Turkic " , " Perm " , " Phags_Pa " , " Phoenician " ,
2008-04-04 22:47:43 +00:00
" Plrd " , " Roro " , " Sara " , " Syre " , " Syrj " , " Syrn " , " Teng " , " Vai " , " Visp " , " Cuneiform " ,
2006-03-03 20:59:01 +00:00
" Zxxx " , " Unknown " ,
2009-11-13 19:25:21 +00:00
" Carian " , " Jpan " , " Tai_Tham " , " Lycian " , " Lydian " , " Ol_Chiki " , " Rejang " , " Saurashtra " , " Sgnw " , " Sundanese " ,
" Moon " , " Meetei_Mayek " ,
2008-04-04 22:47:43 +00:00
/* new in ICU 4.0 */
2009-11-13 19:25:21 +00:00
" Imperial_Aramaic " , " Avestan " , " Cakm " , " Kore " ,
" Kaithi " , " Mani " , " Inscriptional_Pahlavi " , " Phlp " , " Phlv " , " Inscriptional_Parthian " , " Samaritan " , " Tai_Viet " ,
2008-04-04 22:47:43 +00:00
" Zmth " , " Zsym " ,
2009-11-13 19:25:21 +00:00
/* new in ICU 4.4 */
" Bamum " , " Lisu " , " Nkgb " , " Old_South_Arabian " ,
2006-03-03 20:59:01 +00:00
} ;
static const char * expectedShort [ ] = {
2006-02-09 01:28:31 +00:00
" Bali " , " Batk " , " Blis " , " Brah " , " Cham " , " Cirt " , " Cyrs " , " Egyd " , " Egyh " , " Egyp " ,
" Geok " , " Hans " , " Hant " , " Hmng " , " Hung " , " Inds " , " Java " , " Kali " , " Latf " , " Latg " ,
" Lepc " , " Lina " , " Mand " , " Maya " , " Mero " , " Nkoo " , " Orkh " , " Perm " , " Phag " , " Phnx " ,
" Plrd " , " Roro " , " Sara " , " Syre " , " Syrj " , " Syrn " , " Teng " , " Vaii " , " Visp " , " Xsux " ,
" Zxxx " , " Zzzz " ,
2006-11-08 23:41:46 +00:00
" Cari " , " Jpan " , " Lana " , " Lyci " , " Lydi " , " Olck " , " Rjng " , " Saur " , " Sgnw " , " Sund " ,
2007-03-06 01:05:52 +00:00
" Moon " , " Mtei " ,
2008-04-04 22:47:43 +00:00
/* new in ICU 4.0 */
" Armi " , " Avst " , " Cakm " , " Kore " ,
" Kthi " , " Mani " , " Phli " , " Phlp " , " Phlv " , " Prti " , " Samr " , " Tavt " ,
" Zmth " , " Zsym " ,
2009-11-13 19:25:21 +00:00
/* new in ICU 4.4 */
" Bamu " , " Lisu " , " Nkgb " , " Sarb " ,
2006-02-09 01:28:31 +00:00
} ;
int32_t j = 0 ;
2009-11-13 19:25:21 +00:00
if ( LENGTHOF ( expectedLong ) ! = ( USCRIPT_CODE_LIMIT - USCRIPT_BALINESE ) ) {
log_err ( " need to add new script codes in cucdapi.c! \n " ) ;
return ;
}
2006-02-09 01:28:31 +00:00
for ( i = USCRIPT_BALINESE ; ( UScriptCode ) i < USCRIPT_CODE_LIMIT ; i + + , j + + ) {
const char * name = uscript_getName ( ( UScriptCode ) i ) ;
2006-03-03 20:59:01 +00:00
if ( name = = NULL | | strcmp ( name , expectedLong [ j ] ) ! = 0 ) {
log_err ( " uscript_getName failed for code %i: %s!=%s \n " , i , name , expectedLong [ j ] ) ;
2006-02-09 01:28:31 +00:00
}
name = uscript_getShortName ( ( UScriptCode ) i ) ;
2006-03-03 20:59:01 +00:00
if ( name = = NULL | | strcmp ( name , expectedShort [ j ] ) ! = 0 ) {
log_err ( " uscript_getShortName failed for code %i: %s!=%s \n " , i , name , expectedShort [ j ] ) ;
2006-02-09 01:28:31 +00:00
}
}
2008-04-04 22:47:43 +00:00
for ( i = 0 ; i < LENGTHOF ( expectedLong ) ; i + + ) {
2006-02-09 01:28:31 +00:00
UScriptCode fillIn [ 5 ] = { USCRIPT_INVALID_CODE } ;
UErrorCode status = U_ZERO_ERROR ;
int32_t len = 0 ;
2008-04-04 22:47:43 +00:00
len = uscript_getCode ( expectedShort [ i ] , fillIn , LENGTHOF ( fillIn ) , & status ) ;
2006-02-09 01:28:31 +00:00
if ( U_FAILURE ( status ) ) {
2006-03-03 20:59:01 +00:00
log_err ( " uscript_getCode failed for script name %s. Error: %s \n " , expectedShort [ i ] , u_errorName ( status ) ) ;
2006-02-09 01:28:31 +00:00
}
if ( len > 1 ) {
2006-03-03 20:59:01 +00:00
log_err ( " uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i \n " , expectedShort [ i ] , len ) ;
2006-02-09 01:28:31 +00:00
}
if ( fillIn [ 0 ] ! = ( UScriptCode ) ( USCRIPT_BALINESE + i ) ) {
2006-03-03 20:59:01 +00:00
log_err ( " uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i \n " , expectedShort [ i ] , ( USCRIPT_BALINESE + i ) , fillIn [ 0 ] ) ;
2006-02-09 01:28:31 +00:00
}
}
}
2008-04-04 22:47:43 +00:00
}
void TestBinaryValues ( ) {
/*
* Unicode 5.1 explicitly defines binary property value aliases .
* Verify that they are all recognized .
*/
static const char * const falseValues [ ] = { " N " , " No " , " F " , " False " } ;
static const char * const trueValues [ ] = { " Y " , " Yes " , " T " , " True " } ;
int32_t i ;
for ( i = 0 ; i < LENGTHOF ( falseValues ) ; + + i ) {
if ( FALSE ! = u_getPropertyValueEnum ( UCHAR_ALPHABETIC , falseValues [ i ] ) ) {
2009-06-09 21:28:13 +00:00
log_data_err ( " u_getPropertyValueEnum(UCHAR_ALPHABETIC, \" %s \" )!=FALSE (Are you missing data?) \n " , falseValues [ i ] ) ;
2008-04-04 22:47:43 +00:00
}
}
for ( i = 0 ; i < LENGTHOF ( trueValues ) ; + + i ) {
if ( TRUE ! = u_getPropertyValueEnum ( UCHAR_ALPHABETIC , trueValues [ i ] ) ) {
2009-06-09 21:28:13 +00:00
log_data_err ( " u_getPropertyValueEnum(UCHAR_ALPHABETIC, \" %s \" )!=TRUE (Are you missing data?) \n " , trueValues [ i ] ) ;
2008-04-04 22:47:43 +00:00
}
}
}