2010-06-14 21:27:44 +00:00
2001-03-02 00:42:43 +00:00
/********************************************************************
2001-05-17 23:09:35 +00:00
* COPYRIGHT :
2014-01-10 02:24:54 +00:00
* Copyright ( c ) 2001 - 2014 , International Business Machines Corporation and
2001-03-02 00:42:43 +00:00
* others . All Rights Reserved .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-03-22 19:33:43 +00:00
/*******************************************************************************
2001-03-02 00:42:43 +00:00
*
* File cmsccoll . C
*
2001-03-22 19:33:43 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-03-02 00:42:43 +00:00
/**
2001-03-22 19:33:43 +00:00
* These are the tests specific to ICU 1.8 and above , that I didn ' t know where
* to fit .
2001-03-02 00:42:43 +00:00
*/
2001-03-17 00:59:44 +00:00
# include <stdio.h>
2002-09-20 17:54:45 +00:00
2001-03-02 00:42:43 +00:00
# include "unicode/utypes.h"
2002-09-20 17:54:45 +00:00
# if !UCONFIG_NO_COLLATION
2001-03-02 00:42:43 +00:00
# include "unicode/ucol.h"
2001-03-15 22:29:33 +00:00
# include "unicode/ucoleitr.h"
2001-03-02 00:42:43 +00:00
# include "unicode/uloc.h"
# include "cintltst.h"
# include "ccolltst.h"
2001-03-06 03:42:35 +00:00
# include "callcoll.h"
2001-03-02 00:42:43 +00:00
# include "unicode/ustring.h"
# include "string.h"
2001-03-20 00:56:37 +00:00
# include "ucol_imp.h"
2001-03-20 07:22:33 +00:00
# include "cmemory.h"
2001-05-25 22:00:24 +00:00
# include "cstring.h"
2003-12-29 03:43:38 +00:00
# include "uassert.h"
2001-08-16 00:58:53 +00:00
# include "unicode/parseerr.h"
2003-02-20 01:13:36 +00:00
# include "unicode/ucnv.h"
2008-01-05 01:27:56 +00:00
# include "unicode/ures.h"
2010-11-04 20:12:39 +00:00
# include "unicode/uscript.h"
2011-07-27 05:53:56 +00:00
# include "unicode/utf16.h"
2003-04-30 23:26:55 +00:00
# include "uparse.h"
2008-04-04 22:47:43 +00:00
# include "putilimp.h"
2001-03-02 00:42:43 +00:00
2004-04-28 05:31:19 +00:00
# define LEN(a) (sizeof(a) / sizeof(a[0]))
2001-03-30 03:49:29 +00:00
# define MAX_TOKEN_LEN 16
2003-05-15 17:38:42 +00:00
2007-07-19 00:19:21 +00:00
typedef UCollationResult tst_strcoll ( void * collator , const int object ,
2001-05-17 23:09:35 +00:00
const UChar * source , const int sLen ,
2001-04-13 00:05:42 +00:00
const UChar * target , const int tLen ) ;
2001-03-30 03:49:29 +00:00
2001-03-06 00:57:48 +00:00
2001-03-15 22:29:33 +00:00
2001-04-30 19:11:32 +00:00
const static char cnt1 [ ] [ 10 ] = {
2003-01-20 07:42:58 +00:00
2001-04-30 19:11:32 +00:00
" AA " ,
" AC " ,
" AZ " ,
" AQ " ,
" AB " ,
" ABZ " ,
" ABQ " ,
" Z " ,
" ABC " ,
" Q " ,
" B "
} ;
const static char cnt2 [ ] [ 10 ] = {
" DA " ,
" DAD " ,
" DAZ " ,
" MAR " ,
" Z " ,
" DAVIS " ,
" MARK " ,
" DAV " ,
" DAVI "
} ;
2002-03-28 18:26:25 +00:00
static void IncompleteCntTest ( void )
2001-03-14 18:55:56 +00:00
{
UErrorCode status = U_ZERO_ERROR ;
2001-10-17 02:19:48 +00:00
UChar temp [ 90 ] ;
UChar t1 [ 90 ] ;
UChar t2 [ 90 ] ;
2001-03-14 18:55:56 +00:00
UCollator * coll = NULL ;
uint32_t i = 0 , j = 0 ;
uint32_t size = 0 ;
2001-05-17 23:09:35 +00:00
2001-03-14 18:55:56 +00:00
u_uastrcpy ( temp , " & Z < ABC < Q < B " ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( temp , u_strlen ( temp ) , UCOL_OFF , UCOL_DEFAULT_STRENGTH , NULL , & status ) ;
2001-03-15 02:35:49 +00:00
2001-03-14 18:55:56 +00:00
if ( U_SUCCESS ( status ) ) {
size = sizeof ( cnt1 ) / sizeof ( cnt1 [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
2001-03-15 22:29:33 +00:00
UCollationElements * iter ;
2001-03-14 18:55:56 +00:00
u_uastrcpy ( t1 , cnt1 [ i ] ) ;
u_uastrcpy ( t2 , cnt1 [ j ] ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
2001-03-15 22:29:33 +00:00
/* synwee : added collation element iterator test */
iter = ucol_openElements ( coll , t2 , u_strlen ( t2 ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Creation of iterator failed \n " ) ;
break ;
}
backAndForth ( iter ) ;
2002-07-29 21:04:18 +00:00
ucol_closeElements ( iter ) ;
2001-03-14 18:55:56 +00:00
}
}
2001-05-17 23:09:35 +00:00
}
2001-03-02 00:42:43 +00:00
2001-03-14 18:55:56 +00:00
ucol_close ( coll ) ;
2001-03-02 00:42:43 +00:00
2001-03-14 18:55:56 +00:00
u_uastrcpy ( temp , " & Z < DAVIS < MARK <DAV " ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( temp , u_strlen ( temp ) , UCOL_OFF , UCOL_DEFAULT_STRENGTH , NULL , & status ) ;
2001-03-15 02:35:49 +00:00
2001-03-14 18:55:56 +00:00
if ( U_SUCCESS ( status ) ) {
size = sizeof ( cnt2 ) / sizeof ( cnt2 [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
2001-03-15 22:29:33 +00:00
UCollationElements * iter ;
2001-03-14 18:55:56 +00:00
u_uastrcpy ( t1 , cnt2 [ i ] ) ;
u_uastrcpy ( t2 , cnt2 [ j ] ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
2001-03-15 22:29:33 +00:00
/* synwee : added collation element iterator test */
iter = ucol_openElements ( coll , t2 , u_strlen ( t2 ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Creation of iterator failed \n " ) ;
break ;
}
backAndForth ( iter ) ;
2002-07-29 21:04:18 +00:00
ucol_closeElements ( iter ) ;
2001-03-14 18:55:56 +00:00
}
}
2001-05-17 23:09:35 +00:00
}
2001-03-14 18:55:56 +00:00
ucol_close ( coll ) ;
2001-03-02 00:42:43 +00:00
}
2001-03-14 18:55:56 +00:00
2001-03-15 02:35:49 +00:00
const static char shifted [ ] [ 20 ] = {
" black bird " ,
" black-bird " ,
" blackbird " ,
" black Bird " ,
" black-Bird " ,
" blackBird " ,
" black birds " ,
" black-birds " ,
" blackbirds "
} ;
const static UCollationResult shiftedTert [ ] = {
2007-07-19 00:19:21 +00:00
UCOL_EQUAL ,
2001-03-15 02:35:49 +00:00
UCOL_EQUAL ,
UCOL_EQUAL ,
UCOL_LESS ,
UCOL_EQUAL ,
UCOL_EQUAL ,
UCOL_LESS ,
UCOL_EQUAL ,
UCOL_EQUAL
} ;
const static char nonignorable [ ] [ 20 ] = {
" black bird " ,
" black Bird " ,
" black birds " ,
" black-bird " ,
" black-Bird " ,
" black-birds " ,
" blackbird " ,
" blackBird " ,
" blackbirds "
} ;
2002-03-28 18:26:25 +00:00
static void BlackBirdTest ( void ) {
2001-03-15 02:35:49 +00:00
UErrorCode status = U_ZERO_ERROR ;
2001-10-17 02:19:48 +00:00
UChar t1 [ 90 ] ;
UChar t2 [ 90 ] ;
2001-03-15 02:35:49 +00:00
uint32_t i = 0 , j = 0 ;
uint32_t size = 0 ;
2001-03-20 20:11:48 +00:00
UCollator * coll = ucol_open ( " en_US " , & status ) ;
2001-03-15 02:35:49 +00:00
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_OFF , & status ) ;
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , UCOL_NON_IGNORABLE , & status ) ;
if ( U_SUCCESS ( status ) ) {
size = sizeof ( nonignorable ) / sizeof ( nonignorable [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
u_uastrcpy ( t1 , nonignorable [ i ] ) ;
u_uastrcpy ( t2 , nonignorable [ j ] ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
}
}
2001-05-17 23:09:35 +00:00
}
2001-03-15 02:35:49 +00:00
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , UCOL_SHIFTED , & status ) ;
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_QUATERNARY , & status ) ;
if ( U_SUCCESS ( status ) ) {
size = sizeof ( shifted ) / sizeof ( shifted [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
u_uastrcpy ( t1 , shifted [ i ] ) ;
u_uastrcpy ( t2 , shifted [ j ] ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
}
}
2001-05-17 23:09:35 +00:00
}
2001-03-15 02:35:49 +00:00
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_TERTIARY , & status ) ;
if ( U_SUCCESS ( status ) ) {
size = sizeof ( shifted ) / sizeof ( shifted [ 0 ] ) ;
for ( i = 1 ; i < size ; i + + ) {
u_uastrcpy ( t1 , shifted [ i - 1 ] ) ;
u_uastrcpy ( t2 , shifted [ i ] ) ;
doTest ( coll , t1 , t2 , shiftedTert [ i ] ) ;
}
2001-05-17 23:09:35 +00:00
}
2001-03-15 02:35:49 +00:00
ucol_close ( coll ) ;
}
2001-03-16 19:03:53 +00:00
const static UChar testSourceCases [ ] [ MAX_TOKEN_LEN ] = {
{ 0x0041 /*'A'*/ , 0x0300 , 0x0301 , 0x0000 } ,
{ 0x0041 /*'A'*/ , 0x0300 , 0x0316 , 0x0000 } ,
{ 0x0041 /*'A'*/ , 0x0300 , 0x0000 } ,
{ 0x00C0 , 0x0301 , 0x0000 } ,
/* this would work with forced normalization */
{ 0x00C0 , 0x0316 , 0x0000 }
} ;
const static UChar testTargetCases [ ] [ MAX_TOKEN_LEN ] = {
{ 0x0041 /*'A'*/ , 0x0301 , 0x0300 , 0x0000 } ,
{ 0x0041 /*'A'*/ , 0x0316 , 0x0300 , 0x0000 } ,
{ 0x00C0 , 0 } ,
{ 0x0041 /*'A'*/ , 0x0301 , 0x0300 , 0x0000 } ,
/* this would work with forced normalization */
{ 0x0041 /*'A'*/ , 0x0316 , 0x0300 , 0x0000 }
} ;
const static UCollationResult results [ ] = {
UCOL_GREATER ,
UCOL_EQUAL ,
UCOL_EQUAL ,
UCOL_GREATER ,
UCOL_EQUAL
} ;
2002-03-28 18:26:25 +00:00
static void FunkyATest ( void )
2001-03-16 19:03:53 +00:00
{
2001-05-17 23:09:35 +00:00
2001-03-16 19:03:53 +00:00
int32_t i ;
UErrorCode status = U_ZERO_ERROR ;
2001-05-04 00:02:24 +00:00
UCollator * myCollation ;
2001-03-20 20:11:48 +00:00
myCollation = ucol_open ( " en_US " , & status ) ;
2001-03-16 19:03:53 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
2001-03-30 03:49:29 +00:00
return ;
2001-03-16 19:03:53 +00:00
}
log_verbose ( " Testing some A letters, for some reason \n " ) ;
ucol_setAttribute ( myCollation , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
for ( i = 0 ; i < 4 ; i + + )
{
doTest ( myCollation , testSourceCases [ i ] , testTargetCases [ i ] , results [ i ] ) ;
}
ucol_close ( myCollation ) ;
}
2001-03-20 00:56:37 +00:00
UColAttributeValue caseFirst [ ] = {
UCOL_OFF ,
UCOL_LOWER_FIRST ,
UCOL_UPPER_FIRST
} ;
2001-03-17 00:59:44 +00:00
UColAttributeValue alternateHandling [ ] = {
2001-03-20 00:56:37 +00:00
UCOL_NON_IGNORABLE ,
2001-03-17 00:59:44 +00:00
UCOL_SHIFTED
} ;
UColAttributeValue caseLevel [ ] = {
2001-03-20 00:56:37 +00:00
UCOL_OFF ,
2001-03-17 00:59:44 +00:00
UCOL_ON
} ;
UColAttributeValue strengths [ ] = {
2001-03-20 00:56:37 +00:00
UCOL_PRIMARY ,
2001-03-17 00:59:44 +00:00
UCOL_SECONDARY ,
UCOL_TERTIARY ,
UCOL_QUATERNARY ,
UCOL_IDENTICAL
} ;
2003-05-12 16:45:40 +00:00
#if 0
static const char * strengthsC [ ] = {
2001-11-14 06:55:15 +00:00
" UCOL_PRIMARY " ,
" UCOL_SECONDARY " ,
" UCOL_TERTIARY " ,
" UCOL_QUATERNARY " ,
" UCOL_IDENTICAL "
} ;
2001-03-22 19:33:43 +00:00
static const char * caseFirstC [ ] = {
2001-03-20 00:56:37 +00:00
" UCOL_OFF " ,
" UCOL_LOWER_FIRST " ,
" UCOL_UPPER_FIRST "
} ;
2001-03-22 19:33:43 +00:00
static const char * alternateHandlingC [ ] = {
2001-03-20 00:56:37 +00:00
" UCOL_NON_IGNORABLE " ,
" UCOL_SHIFTED "
} ;
2001-03-22 19:33:43 +00:00
static const char * caseLevelC [ ] = {
2001-03-20 00:56:37 +00:00
" UCOL_OFF " ,
" UCOL_ON "
} ;
2001-06-06 20:48:57 +00:00
/* not used currently - does not test only prints */
2002-03-28 18:26:25 +00:00
static void PrintMarkDavis ( void )
2001-03-17 00:59:44 +00:00
{
UErrorCode status = U_ZERO_ERROR ;
UChar m [ 256 ] ;
uint8_t sortkey [ 256 ] ;
2001-03-20 20:11:48 +00:00
UCollator * coll = ucol_open ( " en_US " , & status ) ;
2001-03-20 00:56:37 +00:00
uint32_t h , i , j , k , sortkeysize ;
2001-03-17 00:59:44 +00:00
uint32_t sizem = 0 ;
2001-03-20 00:56:37 +00:00
char buffer [ 512 ] ;
uint32_t len = 512 ;
2001-03-17 00:59:44 +00:00
2001-03-22 18:12:36 +00:00
log_verbose ( " PrintMarkDavis " ) ;
2001-03-17 00:59:44 +00:00
u_uastrcpy ( m , " Mark Davis " ) ;
sizem = u_strlen ( m ) ;
m [ 1 ] = 0xe4 ;
for ( i = 0 ; i < sizem ; i + + ) {
fprintf ( stderr , " \\ u%04X " , m [ i ] ) ;
}
fprintf ( stderr , " \n " ) ;
2001-03-20 00:56:37 +00:00
for ( h = 0 ; h < sizeof ( caseFirst ) / sizeof ( caseFirst [ 0 ] ) ; h + + ) {
ucol_setAttribute ( coll , UCOL_CASE_FIRST , caseFirst [ i ] , & status ) ;
fprintf ( stderr , " caseFirst: %s \n " , caseFirstC [ h ] ) ;
for ( i = 0 ; i < sizeof ( alternateHandling ) / sizeof ( alternateHandling [ 0 ] ) ; i + + ) {
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , alternateHandling [ i ] , & status ) ;
fprintf ( stderr , " AltHandling: %s \n " , alternateHandlingC [ i ] ) ;
for ( j = 0 ; j < sizeof ( caseLevel ) / sizeof ( caseLevel [ 0 ] ) ; j + + ) {
ucol_setAttribute ( coll , UCOL_CASE_LEVEL , caseLevel [ j ] , & status ) ;
fprintf ( stderr , " caseLevel: %s \n " , caseLevelC [ j ] ) ;
for ( k = 0 ; k < sizeof ( strengths ) / sizeof ( strengths [ 0 ] ) ; k + + ) {
ucol_setAttribute ( coll , UCOL_STRENGTH , strengths [ k ] , & status ) ;
sortkeysize = ucol_getSortKey ( coll , m , sizem , sortkey , 256 ) ;
fprintf ( stderr , " strength: %s \n Sortkey: " , strengthsC [ k ] ) ;
fprintf ( stderr , " %s \n " , ucol_sortKeyToString ( coll , sortkey , buffer , & len ) ) ;
2001-03-17 00:59:44 +00:00
}
2001-03-20 00:56:37 +00:00
2001-03-17 00:59:44 +00:00
}
2001-03-20 00:56:37 +00:00
2001-03-17 00:59:44 +00:00
}
2001-03-20 00:56:37 +00:00
2001-03-17 00:59:44 +00:00
}
}
2001-06-06 20:48:57 +00:00
# endif
2001-03-17 00:59:44 +00:00
2002-03-28 18:26:25 +00:00
static void BillFairmanTest ( void ) {
2001-03-21 00:24:48 +00:00
/*
* * check for actual locale via ICU resource bundles
* *
* * lp points to the original locale ( " fr_FR_.... " )
*/
2001-03-27 00:40:24 +00:00
UResourceBundle * lr , * cr ;
UErrorCode lec = U_ZERO_ERROR ;
const char * lp = " fr_FR_you_ll_never_find_this_locale " ;
log_verbose ( " BillFairmanTest \n " ) ;
lr = ures_open ( NULL , lp , & lec ) ;
if ( lr ) {
2003-11-11 21:41:47 +00:00
cr = ures_getByKey ( lr , " collations " , 0 , & lec ) ;
2001-03-27 00:40:24 +00:00
if ( cr ) {
2009-04-23 00:23:57 +00:00
lp = ures_getLocaleByType ( cr , ULOC_ACTUAL_LOCALE , & lec ) ;
2001-03-27 00:40:24 +00:00
if ( lp ) {
if ( U_SUCCESS ( lec ) ) {
if ( strcmp ( lp , " fr " ) ! = 0 ) {
log_err ( " Wrong locale for French Collation Data, expected \" fr \" got %s " , lp ) ;
}
}
}
ures_close ( cr ) ;
2001-03-21 22:05:42 +00:00
}
2001-03-27 00:40:24 +00:00
ures_close ( lr ) ;
2001-03-21 22:05:42 +00:00
}
2001-03-20 00:56:37 +00:00
}
2001-04-13 00:05:42 +00:00
const static char chTest [ ] [ 20 ] = {
2001-05-02 05:05:06 +00:00
" c " ,
2001-04-13 00:05:42 +00:00
" C " ,
" ca " , " cb " , " cx " , " cy " , " CZ " ,
" c \\ u030C " , " C \\ u030C " ,
" h " ,
" H " ,
" ha " , " Ha " , " harly " , " hb " , " HB " , " hx " , " HX " , " hy " , " HY " ,
" ch " , " cH " , " Ch " , " CH " ,
2001-05-02 05:05:06 +00:00
" cha " , " charly " , " che " , " chh " , " chch " , " chr " ,
2001-05-17 23:09:35 +00:00
" i " , " I " , " iarly " ,
2001-04-13 00:05:42 +00:00
" r " , " R " ,
" r \\ u030C " , " R \\ u030C " ,
" s " ,
" S " ,
" s \\ u030C " , " S \\ u030C " ,
" z " , " Z " ,
2001-05-02 05:05:06 +00:00
" z \\ u030C " , " Z \\ u030C "
2001-04-13 00:05:42 +00:00
} ;
static void TestChMove ( void ) {
2008-02-19 07:09:38 +00:00
UChar t1 [ 256 ] = { 0 } ;
UChar t2 [ 256 ] = { 0 } ;
2001-04-13 00:05:42 +00:00
2008-02-19 07:09:38 +00:00
uint32_t i = 0 , j = 0 ;
uint32_t size = 0 ;
UErrorCode status = U_ZERO_ERROR ;
2001-04-13 00:05:42 +00:00
2008-02-19 07:09:38 +00:00
UCollator * coll = ucol_open ( " cs " , & status ) ;
2001-04-13 00:05:42 +00:00
2008-02-19 07:09:38 +00:00
if ( U_SUCCESS ( status ) ) {
size = sizeof ( chTest ) / sizeof ( chTest [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
u_unescape ( chTest [ i ] , t1 , 256 ) ;
u_unescape ( chTest [ j ] , t2 , 256 ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
}
}
2001-04-13 00:05:42 +00:00
}
2008-02-19 07:09:38 +00:00
else {
2010-07-14 16:09:03 +00:00
log_data_err ( " Can't open collator " ) ;
2008-02-19 07:09:38 +00:00
}
ucol_close ( coll ) ;
2001-04-13 00:05:42 +00:00
}
2004-01-16 23:44:58 +00:00
2014-01-10 02:24:54 +00:00
/*
2001-04-23 03:50:15 +00:00
const static char impTest [ ] [ 20 ] = {
" \\ u4e00 " ,
" a " ,
" A " ,
" b " ,
" B " ,
" \\ u4e01 "
} ;
2014-01-10 02:24:54 +00:00
*/
2001-04-23 03:50:15 +00:00
static void TestImplicitTailoring ( void ) {
2006-09-28 08:41:37 +00:00
static const struct {
2004-01-16 23:44:58 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * data [ 10 ] ;
2004-01-16 23:44:58 +00:00
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
2014-02-25 21:21:49 +00:00
{
/* Tailor b and c before U+4E00. */
" &[before 1] \\ u4e00 < b < c "
/* Now, before U+4E00 is c; put d and e after that. */
" &[before 1] \\ u4e00 < d < e " ,
{ " b " , " c " , " d " , " e " , " \\ u4e00 " } , 5 } ,
2004-01-16 23:44:58 +00:00
{ " & \\ u4e00 < a <<< A < b <<< B " , { " \\ u4e00 " , " a " , " A " , " b " , " B " , " \\ u4e01 " } , 6 } ,
{ " &[before 1] \\ u4e00 < \\ u4e01 < \\ u4e02 " , { " \\ u4e01 " , " \\ u4e02 " , " \\ u4e00 " } , 3 } ,
{ " &[before 1] \\ u4e01 < \\ u4e02 < \\ u4e03 " , { " \\ u4e02 " , " \\ u4e03 " , " \\ u4e01 " } , 3 }
} ;
int32_t i = 0 ;
for ( i = 0 ; i < sizeof ( tests ) / sizeof ( tests [ 0 ] ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len ) ;
}
/*
2001-04-23 03:50:15 +00:00
UChar t1 [ 256 ] = { 0 } ;
UChar t2 [ 256 ] = { 0 } ;
2001-05-04 00:02:24 +00:00
const char * rule = " & \\ u4e00 < a <<< A < b <<< B " ;
2001-04-23 03:50:15 +00:00
uint32_t i = 0 , j = 0 ;
uint32_t size = 0 ;
uint32_t ruleLen = 0 ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = NULL ;
ruleLen = u_unescape ( rule , t1 , 256 ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( t1 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
2001-04-23 03:50:15 +00:00
if ( U_SUCCESS ( status ) ) {
size = sizeof ( impTest ) / sizeof ( impTest [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
u_unescape ( impTest [ i ] , t1 , 256 ) ;
u_unescape ( impTest [ j ] , t2 , 256 ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
}
}
2001-05-17 23:09:35 +00:00
}
2001-10-17 02:19:48 +00:00
else {
log_err ( " Can't open collator " ) ;
}
ucol_close ( coll ) ;
2004-01-16 23:44:58 +00:00
*/
2001-04-23 03:50:15 +00:00
}
static void TestFCDProblem ( void ) {
UChar t1 [ 256 ] = { 0 } ;
UChar t2 [ 256 ] = { 0 } ;
2001-05-04 00:02:24 +00:00
const char * s1 = " \\ u0430 \\ u0306 \\ u0325 " ;
const char * s2 = " \\ u04D1 \\ u0325 " ;
2001-04-23 03:50:15 +00:00
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " " , & status ) ;
u_unescape ( s1 , t1 , 256 ) ;
u_unescape ( s2 , t2 , 256 ) ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_OFF , & status ) ;
doTest ( coll , t1 , t2 , UCOL_EQUAL ) ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
doTest ( coll , t1 , t2 , UCOL_EQUAL ) ;
2001-10-17 02:19:48 +00:00
ucol_close ( coll ) ;
2001-04-23 03:50:15 +00:00
}
2007-12-15 10:01:09 +00:00
/*
The largest normalization form is 18 for NFKC / NFKD , 4 for NFD and 3 for NFC
We ' re only using NFC / NFD in this test .
*/
# define NORM_BUFFER_TEST_LEN 18
2001-05-08 23:38:16 +00:00
typedef struct {
2001-11-10 06:54:28 +00:00
UChar32 u ;
2001-05-08 23:38:16 +00:00
UChar NFC [ NORM_BUFFER_TEST_LEN ] ;
UChar NFD [ NORM_BUFFER_TEST_LEN ] ;
} tester ;
2001-04-19 19:01:39 +00:00
static void TestComposeDecompose ( void ) {
2007-12-15 10:01:09 +00:00
/* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
static const UChar UNICODESET_STR [ ] = {
0x5B , 0x5B , 0x3A , 0x4E , 0x46 , 0x44 , 0x5F , 0x49 , 0x6E , 0x65 , 0x72 , 0x74 , 0x3D , 0x66 , 0x61 ,
0x6C , 0x73 , 0x65 , 0x3A , 0x5D , 0x5B , 0x3A , 0x4E , 0x46 , 0x43 , 0x5F , 0x49 , 0x6E , 0x65 , 0x72 ,
0x74 , 0x3D , 0x66 , 0x61 , 0x6C , 0x73 , 0x65 , 0x3A , 0x5D , 0x5D , 0
} ;
2002-05-31 01:18:14 +00:00
int32_t noOfLoc ;
2001-05-08 23:38:16 +00:00
int32_t i = 0 , j = 0 ;
2001-05-17 23:09:35 +00:00
2001-05-08 23:38:16 +00:00
UErrorCode status = U_ZERO_ERROR ;
const char * locName = NULL ;
uint32_t nfcSize ;
uint32_t nfdSize ;
2002-05-31 01:18:14 +00:00
tester * * t ;
2001-05-08 23:38:16 +00:00
uint32_t noCases = 0 ;
UCollator * coll = NULL ;
2001-11-10 06:54:28 +00:00
UChar32 u = 0 ;
UChar comp [ NORM_BUFFER_TEST_LEN ] ;
uint32_t len = 0 ;
2003-07-24 23:23:19 +00:00
UCollationElements * iter ;
2007-12-15 10:01:09 +00:00
USet * charsToTest = uset_openPattern ( UNICODESET_STR , - 1 , & status ) ;
2008-02-08 03:39:40 +00:00
int32_t charsToTestSize ;
2001-05-17 23:09:35 +00:00
2002-05-31 01:18:14 +00:00
noOfLoc = uloc_countAvailable ( ) ;
2003-06-04 19:02:41 +00:00
coll = ucol_open ( " " , & status ) ;
2009-06-09 21:28:13 +00:00
if ( U_FAILURE ( status ) ) {
log_data_err ( " Error opening collator -> %s (Are you missing data?) \n " , u_errorName ( status ) ) ;
2007-12-15 10:01:09 +00:00
return ;
2003-06-04 19:02:41 +00:00
}
2008-02-08 03:39:40 +00:00
charsToTestSize = uset_size ( charsToTest ) ;
2008-02-10 20:17:14 +00:00
if ( charsToTestSize < = 0 ) {
log_err ( " Set was zero. Missing data? \n " ) ;
return ;
}
2011-06-10 18:56:08 +00:00
t = ( tester * * ) malloc ( charsToTestSize * sizeof ( tester * ) ) ;
2008-02-08 03:39:40 +00:00
t [ 0 ] = ( tester * ) malloc ( sizeof ( tester ) ) ;
log_verbose ( " Testing UCA extensively for %d characters \n " , charsToTestSize ) ;
2001-05-17 23:09:35 +00:00
2007-12-15 10:01:09 +00:00
for ( u = 0 ; u < charsToTestSize ; u + + ) {
UChar32 ch = uset_charAt ( charsToTest , u ) ;
len = 0 ;
2011-07-27 05:53:56 +00:00
U16_APPEND_UNSAFE ( comp , len , ch ) ;
2001-11-10 06:54:28 +00:00
nfcSize = unorm_normalize ( comp , len , UNORM_NFC , 0 , t [ noCases ] - > NFC , NORM_BUFFER_TEST_LEN , & status ) ;
nfdSize = unorm_normalize ( comp , len , UNORM_NFD , 0 , t [ noCases ] - > NFD , NORM_BUFFER_TEST_LEN , & status ) ;
2001-05-17 23:09:35 +00:00
2004-11-11 23:34:58 +00:00
if ( nfcSize ! = nfdSize | | ( uprv_memcmp ( t [ noCases ] - > NFC , t [ noCases ] - > NFD , nfcSize * sizeof ( UChar ) ) ! = 0 )
2001-11-10 06:54:28 +00:00
| | ( len ! = nfdSize | | ( uprv_memcmp ( comp , t [ noCases ] - > NFD , nfdSize * sizeof ( UChar ) ) ! = 0 ) ) ) {
2007-12-15 10:01:09 +00:00
t [ noCases ] - > u = ch ;
2001-11-10 06:54:28 +00:00
if ( len ! = nfdSize | | ( uprv_memcmp ( comp , t [ noCases ] - > NFD , nfdSize * sizeof ( UChar ) ) ! = 0 ) ) {
2007-12-15 10:01:09 +00:00
u_strncpy ( t [ noCases ] - > NFC , comp , len ) ;
t [ noCases ] - > NFC [ len ] = 0 ;
2001-11-10 06:54:28 +00:00
}
2001-05-08 23:38:16 +00:00
noCases + + ;
2002-07-29 21:04:18 +00:00
t [ noCases ] = ( tester * ) malloc ( sizeof ( tester ) ) ;
2001-06-22 18:35:01 +00:00
uprv_memset ( t [ noCases ] , 0 , sizeof ( tester ) ) ;
2004-11-11 23:34:58 +00:00
}
2001-05-08 23:38:16 +00:00
}
2007-12-15 10:01:09 +00:00
log_verbose ( " Testing %d/%d of possible test cases \n " , noCases , charsToTestSize ) ;
uset_close ( charsToTest ) ;
charsToTest = NULL ;
2001-05-17 23:09:35 +00:00
2002-09-20 16:02:16 +00:00
for ( u = 0 ; u < ( UChar32 ) noCases ; u + + ) {
2007-12-15 10:01:09 +00:00
if ( ! ucol_equal ( coll , t [ u ] - > NFC , - 1 , t [ u ] - > NFD , - 1 ) ) {
log_err ( " Failure: codePoint %05X fails TestComposeDecompose in the UCA \n " , t [ u ] - > u ) ;
doTest ( coll , t [ u ] - > NFC , t [ u ] - > NFD , UCOL_EQUAL ) ;
}
2001-06-22 18:35:01 +00:00
}
2001-11-10 06:54:28 +00:00
/*
2007-12-15 10:01:09 +00:00
for ( u = 0 ; u < charsToTestSize ; u + + ) {
2001-11-10 06:54:28 +00:00
if ( ! ( u & 0xFFFF ) ) {
log_verbose ( " %08X " , u ) ;
}
uprv_memset ( t [ noCases ] , 0 , sizeof ( tester ) ) ;
t [ noCases ] - > u = u ;
len = 0 ;
2011-07-27 05:53:56 +00:00
U16_APPEND_UNSAFE ( comp , len , u ) ;
2001-11-10 06:54:28 +00:00
comp [ len ] = 0 ;
nfcSize = unorm_normalize ( comp , len , UNORM_NFC , 0 , t [ noCases ] - > NFC , NORM_BUFFER_TEST_LEN , & status ) ;
nfdSize = unorm_normalize ( comp , len , UNORM_NFD , 0 , t [ noCases ] - > NFD , NORM_BUFFER_TEST_LEN , & status ) ;
doTest ( coll , comp , t [ noCases ] - > NFD , UCOL_EQUAL ) ;
doTest ( coll , comp , t [ noCases ] - > NFC , UCOL_EQUAL ) ;
}
*/
2001-06-22 18:35:01 +00:00
ucol_close ( coll ) ;
2001-11-10 06:54:28 +00:00
log_verbose ( " Testing locales, number of cases = %i \n " , noCases ) ;
2001-05-08 23:38:16 +00:00
for ( i = 0 ; i < noOfLoc ; i + + ) {
status = U_ZERO_ERROR ;
locName = uloc_getAvailable ( i ) ;
if ( hasCollationElements ( locName ) ) {
2001-06-22 18:35:01 +00:00
char cName [ 256 ] ;
UChar name [ 256 ] ;
int32_t nameSize = uloc_getDisplayName ( locName , NULL , name , sizeof ( cName ) , & status ) ;
2001-05-08 23:38:16 +00:00
2001-06-22 18:35:01 +00:00
for ( j = 0 ; j < nameSize ; j + + ) {
cName [ j ] = ( char ) name [ j ] ;
2001-05-08 23:38:16 +00:00
}
2001-06-22 18:35:01 +00:00
cName [ nameSize ] = 0 ;
log_verbose ( " \n Testing locale %s (%s) \n " , locName , cName ) ;
2001-05-17 23:09:35 +00:00
2001-05-08 23:38:16 +00:00
coll = ucol_open ( locName , & status ) ;
2001-10-31 23:59:35 +00:00
ucol_setStrength ( coll , UCOL_IDENTICAL ) ;
2003-12-10 23:56:55 +00:00
iter = ucol_openElements ( coll , t [ u ] - > NFD , u_strlen ( t [ u ] - > NFD ) , & status ) ;
2001-05-17 23:09:35 +00:00
2002-09-20 16:02:16 +00:00
for ( u = 0 ; u < ( UChar32 ) noCases ; u + + ) {
2007-12-15 10:01:09 +00:00
if ( ! ucol_equal ( coll , t [ u ] - > NFC , - 1 , t [ u ] - > NFD , - 1 ) ) {
log_err ( " Failure: codePoint %05X fails TestComposeDecompose for locale %s \n " , t [ u ] - > u , cName ) ;
doTest ( coll , t [ u ] - > NFC , t [ u ] - > NFD , UCOL_EQUAL ) ;
log_verbose ( " Testing NFC \n " ) ;
ucol_setText ( iter , t [ u ] - > NFC , u_strlen ( t [ u ] - > NFC ) , & status ) ;
backAndForth ( iter ) ;
log_verbose ( " Testing NFD \n " ) ;
ucol_setText ( iter , t [ u ] - > NFD , u_strlen ( t [ u ] - > NFD ) , & status ) ;
backAndForth ( iter ) ;
}
2001-05-08 23:38:16 +00:00
}
2003-12-10 23:56:55 +00:00
ucol_closeElements ( iter ) ;
2001-05-08 23:38:16 +00:00
ucol_close ( coll ) ;
2001-04-19 19:01:39 +00:00
}
}
2002-09-20 16:02:16 +00:00
for ( u = 0 ; u < = ( UChar32 ) noCases ; u + + ) {
2002-07-29 21:04:18 +00:00
free ( t [ u ] ) ;
2001-05-08 23:38:16 +00:00
}
2002-07-29 21:04:18 +00:00
free ( t ) ;
2001-04-19 19:01:39 +00:00
}
2002-03-28 18:26:25 +00:00
static void TestEmptyRule ( void ) {
2001-04-23 03:50:15 +00:00
UErrorCode status = U_ZERO_ERROR ;
UChar rulez [ ] = { 0 } ;
2001-09-22 01:24:03 +00:00
UCollator * coll = ucol_openRules ( rulez , 0 , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
2001-04-23 03:50:15 +00:00
ucol_close ( coll ) ;
}
2001-04-30 19:11:32 +00:00
2002-03-28 18:26:25 +00:00
static void TestUCARules ( void ) {
2001-04-30 19:11:32 +00:00
UErrorCode status = U_ZERO_ERROR ;
UChar b [ 256 ] ;
2001-05-31 06:07:06 +00:00
UChar * rules = b ;
2003-06-04 19:02:41 +00:00
uint32_t ruleLen = 0 ;
2001-05-31 06:07:06 +00:00
UCollator * UCAfromRules = NULL ;
2001-04-30 19:11:32 +00:00
UCollator * coll = ucol_open ( " " , & status ) ;
2003-06-04 19:02:41 +00:00
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
ruleLen = ucol_getRulesEx ( coll , UCOL_FULL_RULES , rules , 256 ) ;
2001-05-31 17:27:44 +00:00
log_verbose ( " TestUCARules \n " ) ;
2001-05-31 06:07:06 +00:00
if ( ruleLen > 256 ) {
rules = ( UChar * ) malloc ( ( ruleLen + 1 ) * sizeof ( UChar ) ) ;
ruleLen = ucol_getRulesEx ( coll , UCOL_FULL_RULES , rules , ruleLen ) ;
}
log_verbose ( " Rules length is %d \n " , ruleLen ) ;
2001-09-22 01:24:03 +00:00
UCAfromRules = ucol_openRules ( rules , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
2001-05-31 06:07:06 +00:00
if ( U_SUCCESS ( status ) ) {
ucol_close ( UCAfromRules ) ;
} else {
log_verbose ( " Unable to create a collator from UCARules! \n " ) ;
}
/*
2001-04-30 19:11:32 +00:00
u_unescape ( blah , b , 256 ) ;
ucol_getSortKey ( coll , b , 1 , res , 256 ) ;
2001-05-31 06:07:06 +00:00
*/
2001-04-30 19:11:32 +00:00
ucol_close ( coll ) ;
2001-09-28 16:34:05 +00:00
if ( rules ! = b ) {
2001-05-31 06:07:06 +00:00
free ( rules ) ;
}
2001-04-30 19:11:32 +00:00
}
/* Pinyin tonal order */
/*
A < . . ( \ u0101 ) < . . ( \ u00e1 ) < . . ( \ u01ce ) < . . ( \ u00e0 )
( w / macron ) < ( w / acute ) < ( w / caron ) < ( w / grave )
E < . . ( \ u0113 ) < . . ( \ u00e9 ) < . . ( \ u011b ) < . . ( \ u00e8 )
I < . . ( \ u012b ) < . . ( \ u00ed ) < . . ( \ u01d0 ) < . . ( \ u00ec )
O < . . ( \ u014d ) < . . ( \ u00f3 ) < . . ( \ u01d2 ) < . . ( \ u00f2 )
2001-05-17 23:09:35 +00:00
U < . . ( \ u016b ) < . . ( \ u00fa ) < . . ( \ u01d4 ) < . . ( \ u00f9 )
2001-04-30 19:11:32 +00:00
< . . ( \ u01d6 ) < . . ( \ u01d8 ) < . . ( \ u01da ) < . . ( \ u01dc ) <
. . ( \ u00fc )
However , in testing we got the following order :
A < . . ( \ u00e1 ) < . . ( \ u00e0 ) < . . ( \ u01ce ) < . . ( \ u0101 )
( w / acute ) < ( w / grave ) < ( w / caron ) < ( w / macron )
E < . . ( \ u00e9 ) < . . ( \ u00e8 ) < . . ( \ u00ea ) < . . ( \ u011b ) <
. . ( \ u0113 )
I < . . ( \ u00ed ) < . . ( \ u00ec ) < . . ( \ u01d0 ) < . . ( \ u012b )
O < . . ( \ u00f3 ) < . . ( \ u00f2 ) < . . ( \ u01d2 ) < . . ( \ u014d )
U < . . ( \ u00fa ) < . . ( \ u00f9 ) < . . ( \ u01d4 ) < . . ( \ u00fc ) <
2001-05-17 23:09:35 +00:00
. . ( \ u01d8 )
2001-04-30 19:11:32 +00:00
< . . ( \ u01dc ) < . . ( \ u01da ) < . . ( \ u01d6 ) < . . ( \ u016b )
*/
2001-05-02 05:05:06 +00:00
2002-03-28 18:26:25 +00:00
static void TestBefore ( void ) {
2001-05-02 05:05:06 +00:00
const static char * data [ ] = {
2001-05-17 23:09:35 +00:00
" \\ u0101 " , " \\ u00e1 " , " \\ u01ce " , " \\ u00e0 " , " A " ,
" \\ u0113 " , " \\ u00e9 " , " \\ u011b " , " \\ u00e8 " , " E " ,
" \\ u012b " , " \\ u00ed " , " \\ u01d0 " , " \\ u00ec " , " I " ,
" \\ u014d " , " \\ u00f3 " , " \\ u01d2 " , " \\ u00f2 " , " O " ,
" \\ u016b " , " \\ u00fa " , " \\ u01d4 " , " \\ u00f9 " , " U " ,
2001-05-02 05:05:06 +00:00
" \\ u01d6 " , " \\ u01d8 " , " \\ u01da " , " \\ u01dc " , " \\ u00fc "
} ;
genericRulesStarter (
" &[before 1]a< \\ u0101< \\ u00e1< \\ u01ce< \\ u00e0 "
" &[before 1]e< \\ u0113< \\ u00e9< \\ u011b< \\ u00e8 "
" &[before 1]i< \\ u012b< \\ u00ed< \\ u01d0< \\ u00ec "
" &[before 1]o< \\ u014d< \\ u00f3< \\ u01d2< \\ u00f2 "
" &[before 1]u< \\ u016b< \\ u00fa< \\ u01d4< \\ u00f9 "
" &u< \\ u01d6< \\ u01d8< \\ u01da< \\ u01dc< \\ u00fc " ,
data , sizeof ( data ) / sizeof ( data [ 0 ] ) ) ;
}
2004-06-03 22:08:39 +00:00
#if 0
/* superceded by TestBeforePinyin */
2002-03-28 18:26:25 +00:00
static void TestJ784 ( void ) {
2001-04-30 19:11:32 +00:00
const static char * data [ ] = {
" A " , " \\ u0101 " , " \\ u00e1 " , " \\ u01ce " , " \\ u00e0 " ,
" E " , " \\ u0113 " , " \\ u00e9 " , " \\ u011b " , " \\ u00e8 " ,
" I " , " \\ u012b " , " \\ u00ed " , " \\ u01d0 " , " \\ u00ec " ,
" O " , " \\ u014d " , " \\ u00f3 " , " \\ u01d2 " , " \\ u00f2 " ,
" U " , " \\ u016b " , " \\ u00fa " , " \\ u01d4 " , " \\ u00f9 " ,
" \\ u00fc " ,
" \\ u01d6 " , " \\ u01d8 " , " \\ u01da " , " \\ u01dc "
} ;
genericLocaleStarter ( " zh " , data , sizeof ( data ) / sizeof ( data [ 0 ] ) ) ;
}
2004-06-03 22:08:39 +00:00
# endif
2001-04-30 19:11:32 +00:00
2004-06-03 22:08:39 +00:00
#if 0
/* superceded by the changes to the lv locale */
2002-03-28 18:26:25 +00:00
static void TestJ831 ( void ) {
2001-04-30 19:11:32 +00:00
const static char * data [ ] = {
" I " ,
" i " ,
" Y " ,
" y "
} ;
genericLocaleStarter ( " lv " , data , sizeof ( data ) / sizeof ( data [ 0 ] ) ) ;
}
2004-06-03 22:08:39 +00:00
# endif
2001-04-30 19:11:32 +00:00
2002-03-28 18:26:25 +00:00
static void TestJ815 ( void ) {
2001-04-30 19:11:32 +00:00
const static char * data [ ] = {
" aa " ,
" Aa " ,
" ab " ,
" Ab " ,
" ad " ,
" Ad " ,
" ae " ,
" Ae " ,
" \\ u00e6 " ,
" \\ u00c6 " ,
" af " ,
" Af " ,
" b " ,
" B "
} ;
genericLocaleStarter ( " fr " , data , sizeof ( data ) / sizeof ( data [ 0 ] ) ) ;
2001-05-02 05:05:06 +00:00
genericRulesStarter ( " [backwards 2]&A<< \\ u00e6/e<<< \\ u00c6/E " , data , sizeof ( data ) / sizeof ( data [ 0 ] ) ) ;
2001-04-30 19:11:32 +00:00
}
2002-03-28 18:26:25 +00:00
static void TestCase ( void )
2001-05-17 23:16:34 +00:00
{
const static UChar gRules [ MAX_TOKEN_LEN ] =
/*" & 0 < 1,\u2461<a,A"*/
{ 0x0026 , 0x0030 , 0x003C , 0x0031 , 0x002C , 0x2460 , 0x003C , 0x0061 , 0x002C , 0x0041 , 0x0000 } ;
const static UChar testCase [ ] [ MAX_TOKEN_LEN ] =
{
/*0*/ { 0x0031 /*'1'*/ , 0x0061 /*'a'*/ , 0x0000 } ,
/*1*/ { 0x0031 /*'1'*/ , 0x0041 /*'A'*/ , 0x0000 } ,
/*2*/ { 0x2460 /*circ'1'*/ , 0x0061 /*'a'*/ , 0x0000 } ,
/*3*/ { 0x2460 /*circ'1'*/ , 0x0041 /*'A'*/ , 0x0000 }
} ;
const static UCollationResult caseTestResults [ ] [ 9 ] =
{
2007-07-19 00:19:21 +00:00
{ UCOL_LESS , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_EQUAL , UCOL_LESS } ,
{ UCOL_GREATER , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_EQUAL , UCOL_GREATER } ,
{ UCOL_LESS , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_GREATER , UCOL_LESS , UCOL_EQUAL , UCOL_EQUAL , UCOL_LESS } ,
{ UCOL_GREATER , UCOL_LESS , UCOL_GREATER , UCOL_EQUAL , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_EQUAL , UCOL_GREATER }
2001-05-17 23:16:34 +00:00
} ;
const static UColAttributeValue caseTestAttributes [ ] [ 2 ] =
{
2007-07-19 00:19:21 +00:00
{ UCOL_LOWER_FIRST , UCOL_OFF } ,
{ UCOL_UPPER_FIRST , UCOL_OFF } ,
{ UCOL_LOWER_FIRST , UCOL_ON } ,
{ UCOL_UPPER_FIRST , UCOL_ON }
2001-05-17 23:16:34 +00:00
} ;
int32_t i , j , k ;
UErrorCode status = U_ZERO_ERROR ;
2003-11-12 20:45:53 +00:00
UCollationElements * iter ;
2001-05-17 23:16:34 +00:00
UCollator * myCollation ;
myCollation = ucol_open ( " en_US " , & status ) ;
2003-11-12 20:45:53 +00:00
2001-05-17 23:16:34 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
2001-05-17 23:16:34 +00:00
return ;
}
log_verbose ( " Testing different case settings \n " ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
for ( k = 0 ; k < 4 ; k + + ) {
ucol_setAttribute ( myCollation , UCOL_CASE_FIRST , caseTestAttributes [ k ] [ 0 ] , & status ) ;
ucol_setAttribute ( myCollation , UCOL_CASE_LEVEL , caseTestAttributes [ k ] [ 1 ] , & status ) ;
log_verbose ( " Case first = %d, Case level = %d \n " , caseTestAttributes [ k ] [ 0 ] , caseTestAttributes [ k ] [ 1 ] ) ;
for ( i = 0 ; i < 3 ; i + + ) {
for ( j = i + 1 ; j < 4 ; j + + ) {
doTest ( myCollation , testCase [ i ] , testCase [ j ] , caseTestResults [ k ] [ 3 * i + j - 1 ] ) ;
}
}
}
ucol_close ( myCollation ) ;
2001-09-22 01:24:03 +00:00
myCollation = ucol_openRules ( gRules , u_strlen ( gRules ) , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
2001-05-17 23:16:34 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
log_verbose ( " Testing different case settings with custom rules \n " ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
2004-11-11 23:34:58 +00:00
2001-05-17 23:16:34 +00:00
for ( k = 0 ; k < 4 ; k + + ) {
ucol_setAttribute ( myCollation , UCOL_CASE_FIRST , caseTestAttributes [ k ] [ 0 ] , & status ) ;
ucol_setAttribute ( myCollation , UCOL_CASE_LEVEL , caseTestAttributes [ k ] [ 1 ] , & status ) ;
for ( i = 0 ; i < 3 ; i + + ) {
for ( j = i + 1 ; j < 4 ; j + + ) {
2001-09-21 21:22:44 +00:00
log_verbose ( " k:%d, i:%d, j:%d \n " , k , i , j ) ;
2001-05-17 23:16:34 +00:00
doTest ( myCollation , testCase [ i ] , testCase [ j ] , caseTestResults [ k ] [ 3 * i + j - 1 ] ) ;
2003-11-12 20:45:53 +00:00
iter = ucol_openElements ( myCollation , testCase [ i ] , u_strlen ( testCase [ i ] ) , & status ) ;
backAndForth ( iter ) ;
ucol_closeElements ( iter ) ;
iter = ucol_openElements ( myCollation , testCase [ j ] , u_strlen ( testCase [ j ] ) , & status ) ;
backAndForth ( iter ) ;
ucol_closeElements ( iter ) ;
2001-05-17 23:16:34 +00:00
}
}
}
2001-09-28 16:34:05 +00:00
ucol_close ( myCollation ) ;
2001-05-17 23:16:34 +00:00
{
const static char * lowerFirst [ ] = {
" h " ,
" H " ,
" ch " ,
" Ch " ,
" CH " ,
" cha " ,
" chA " ,
" Cha " ,
" ChA " ,
" CHa " ,
" CHA " ,
" i " ,
" I "
} ;
const static char * upperFirst [ ] = {
" H " ,
" h " ,
" CH " ,
" Ch " ,
" ch " ,
" CHA " ,
" CHa " ,
" ChA " ,
" Cha " ,
" chA " ,
" cha " ,
" I " ,
" i "
} ;
log_verbose ( " mixed case test \n " ) ;
log_verbose ( " lower first, case level off \n " ) ;
2014-02-25 21:21:49 +00:00
genericRulesStarter ( " [caseFirst lower]&H<ch<<<Ch<<<CH " , lowerFirst , sizeof ( lowerFirst ) / sizeof ( lowerFirst [ 0 ] ) ) ;
2001-05-17 23:16:34 +00:00
log_verbose ( " upper first, case level off \n " ) ;
2014-02-25 21:21:49 +00:00
genericRulesStarter ( " [caseFirst upper]&H<ch<<<Ch<<<CH " , upperFirst , sizeof ( upperFirst ) / sizeof ( upperFirst [ 0 ] ) ) ;
2001-05-17 23:16:34 +00:00
log_verbose ( " lower first, case level on \n " ) ;
2014-02-25 21:21:49 +00:00
genericRulesStarter ( " [caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH " , lowerFirst , sizeof ( lowerFirst ) / sizeof ( lowerFirst [ 0 ] ) ) ;
2001-05-17 23:16:34 +00:00
log_verbose ( " upper first, case level on \n " ) ;
2014-02-25 21:21:49 +00:00
genericRulesStarter ( " [caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH " , upperFirst , sizeof ( upperFirst ) / sizeof ( upperFirst [ 0 ] ) ) ;
2001-05-17 23:16:34 +00:00
}
}
2002-03-28 18:26:25 +00:00
static void TestIncrementalNormalize ( void ) {
2001-05-17 23:09:35 +00:00
2003-01-20 07:42:58 +00:00
/*UChar baseA =0x61;*/
2001-05-17 23:09:35 +00:00
UChar baseA = 0x41 ;
2001-05-18 20:53:01 +00:00
/* UChar baseB = 0x42;*/
2006-10-11 08:31:55 +00:00
static const UChar ccMix [ ] = { 0x316 , 0x321 , 0x300 } ;
2003-01-20 07:42:58 +00:00
/*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2001-05-18 20:53:01 +00:00
/*
0x316 is combining grave accent below , cc = 220
0x321 is combining palatalized hook below , cc = 202
0x300 is combining grave accent , cc = 230
*/
2001-05-17 23:09:35 +00:00
2008-02-17 19:32:34 +00:00
# define MAXSLEN 2000
2006-10-11 08:31:55 +00:00
/*int maxSLen = 64000;*/
2001-05-17 23:09:35 +00:00
int sLen ;
int i ;
2001-05-25 19:30:01 +00:00
UCollator * coll ;
UErrorCode status = U_ZERO_ERROR ;
UCollationResult result ;
2010-04-07 16:18:38 +00:00
int32_t myQ = getTestOption ( QUICK_OPTION ) ;
2003-02-20 01:13:36 +00:00
2010-04-07 16:18:38 +00:00
if ( getTestOption ( QUICK_OPTION ) < 0 ) {
setTestOption ( QUICK_OPTION , 1 ) ;
2003-02-20 01:13:36 +00:00
}
2001-05-25 19:30:01 +00:00
{
2001-05-28 20:32:29 +00:00
/* Test 1. Run very long unnormalized strings, to force overflow of*/
/* most buffers along the way.*/
2008-02-17 19:32:34 +00:00
UChar strA [ MAXSLEN + 1 ] ;
UChar strB [ MAXSLEN + 1 ] ;
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
coll = ucol_open ( " en_US " , & status ) ;
2003-06-04 19:02:41 +00:00
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
2002-08-21 19:09:33 +00:00
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
2001-09-28 16:34:05 +00:00
2008-02-17 19:32:34 +00:00
/*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
/*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2003-02-20 01:13:36 +00:00
/*for (sLen = 1000; sLen<1001; sLen++) {*/
for ( sLen = 500 ; sLen < 501 ; sLen + + ) {
2003-04-30 23:26:55 +00:00
/*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2001-05-25 19:30:01 +00:00
strA [ 0 ] = baseA ;
strB [ 0 ] = baseA ;
for ( i = 1 ; i < = sLen - 1 ; i + + ) {
strA [ i ] = ccMix [ i % 3 ] ;
strB [ sLen - i ] = ccMix [ i % 3 ] ;
}
strA [ sLen ] = 0 ;
strB [ sLen ] = 0 ;
2001-09-28 16:34:05 +00:00
2001-05-28 20:32:29 +00:00
ucol_setStrength ( coll , UCOL_TERTIARY ) ; /* Do test with default strength, which runs*/
doTest ( coll , strA , strB , UCOL_EQUAL ) ; /* optimized functions in the impl*/
ucol_setStrength ( coll , UCOL_IDENTICAL ) ; /* Do again with the slow, general impl.*/
2001-05-25 19:30:01 +00:00
doTest ( coll , strA , strB , UCOL_EQUAL ) ;
2001-05-17 23:09:35 +00:00
}
2001-05-25 19:30:01 +00:00
}
2010-04-07 16:18:38 +00:00
setTestOption ( QUICK_OPTION , myQ ) ;
2003-02-20 01:13:36 +00:00
2001-05-17 23:09:35 +00:00
2001-05-28 20:32:29 +00:00
/* Test 2: Non-normal sequence in a string that extends to the last character*/
/* of the string. Checks a couple of edge cases.*/
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
{
2006-09-28 08:41:37 +00:00
static const UChar strA [ ] = { 0x41 , 0x41 , 0x300 , 0x316 , 0 } ;
static const UChar strB [ ] = { 0x41 , 0xc0 , 0x316 , 0 } ;
2001-05-25 19:30:01 +00:00
ucol_setStrength ( coll , UCOL_TERTIARY ) ;
2001-05-17 23:09:35 +00:00
doTest ( coll , strA , strB , UCOL_EQUAL ) ;
}
2001-05-28 20:32:29 +00:00
/* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
{
2004-11-11 23:34:58 +00:00
/* New UCA 3.1.1.
* test below used a code point from Desseret , which sorts differently
2002-06-13 18:35:27 +00:00
* than d800 dc00
*/
/*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2006-09-28 08:41:37 +00:00
static const UChar strA [ ] = { 0x41 , 0x41 , 0x300 , 0x316 , 0xD800 , 0xDC01 , 0 } ;
static const UChar strB [ ] = { 0x41 , 0xc0 , 0x316 , 0xD800 , 0xDC00 , 0 } ;
2001-05-25 19:30:01 +00:00
ucol_setStrength ( coll , UCOL_TERTIARY ) ;
doTest ( coll , strA , strB , UCOL_GREATER ) ;
}
2001-05-28 20:32:29 +00:00
/* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
{
2006-09-28 08:41:37 +00:00
static const UChar strA [ ] = { 0x41 , 0x00 , 0x42 , 0x00 } ;
static const UChar strB [ ] = { 0x41 , 0x00 , 0x00 , 0x00 } ;
2001-05-25 19:30:01 +00:00
char sortKeyA [ 50 ] ;
char sortKeyAz [ 50 ] ;
char sortKeyB [ 50 ] ;
char sortKeyBz [ 50 ] ;
int r ;
2002-07-02 22:36:04 +00:00
/* there used to be -3 here. Hmmmm.... */
/*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
result = ucol_strcoll ( coll , strA , 3 , strB , 3 ) ;
2001-05-25 19:30:01 +00:00
if ( result ! = UCOL_GREATER ) {
log_err ( " ERROR 1 in test 4 \n " ) ;
}
result = ucol_strcoll ( coll , strA , - 1 , strB , - 1 ) ;
if ( result ! = UCOL_EQUAL ) {
log_err ( " ERROR 2 in test 4 \n " ) ;
}
2001-05-30 16:09:09 +00:00
ucol_getSortKey ( coll , strA , 3 , ( uint8_t * ) sortKeyA , sizeof ( sortKeyA ) ) ;
ucol_getSortKey ( coll , strA , - 1 , ( uint8_t * ) sortKeyAz , sizeof ( sortKeyAz ) ) ;
ucol_getSortKey ( coll , strB , 3 , ( uint8_t * ) sortKeyB , sizeof ( sortKeyB ) ) ;
ucol_getSortKey ( coll , strB , - 1 , ( uint8_t * ) sortKeyBz , sizeof ( sortKeyBz ) ) ;
2001-05-25 19:30:01 +00:00
r = strcmp ( sortKeyA , sortKeyAz ) ;
if ( r < = 0 ) {
log_err ( " Error 3 in test 4 \n " ) ;
}
r = strcmp ( sortKeyA , sortKeyB ) ;
if ( r < = 0 ) {
log_err ( " Error 4 in test 4 \n " ) ;
}
r = strcmp ( sortKeyAz , sortKeyBz ) ;
if ( r ! = 0 ) {
log_err ( " Error 5 in test 4 \n " ) ;
}
ucol_setStrength ( coll , UCOL_IDENTICAL ) ;
2001-05-30 16:09:09 +00:00
ucol_getSortKey ( coll , strA , 3 , ( uint8_t * ) sortKeyA , sizeof ( sortKeyA ) ) ;
ucol_getSortKey ( coll , strA , - 1 , ( uint8_t * ) sortKeyAz , sizeof ( sortKeyAz ) ) ;
ucol_getSortKey ( coll , strB , 3 , ( uint8_t * ) sortKeyB , sizeof ( sortKeyB ) ) ;
ucol_getSortKey ( coll , strB , - 1 , ( uint8_t * ) sortKeyBz , sizeof ( sortKeyBz ) ) ;
2001-05-25 19:30:01 +00:00
r = strcmp ( sortKeyA , sortKeyAz ) ;
if ( r < = 0 ) {
log_err ( " Error 6 in test 4 \n " ) ;
}
r = strcmp ( sortKeyA , sortKeyB ) ;
if ( r < = 0 ) {
log_err ( " Error 7 in test 4 \n " ) ;
}
r = strcmp ( sortKeyAz , sortKeyBz ) ;
if ( r ! = 0 ) {
log_err ( " Error 8 in test 4 \n " ) ;
}
ucol_setStrength ( coll , UCOL_TERTIARY ) ;
}
2001-09-28 16:34:05 +00:00
2001-05-28 20:32:29 +00:00
/* Test 5: Null characters in non-normal source strings.*/
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
{
2006-09-28 08:41:37 +00:00
static const UChar strA [ ] = { 0x41 , 0x41 , 0x300 , 0x316 , 0x00 , 0x42 , 0x00 } ;
static const UChar strB [ ] = { 0x41 , 0x41 , 0x300 , 0x316 , 0x00 , 0x00 , 0x00 } ;
2001-05-25 19:30:01 +00:00
char sortKeyA [ 50 ] ;
char sortKeyAz [ 50 ] ;
char sortKeyB [ 50 ] ;
char sortKeyBz [ 50 ] ;
int r ;
result = ucol_strcoll ( coll , strA , 6 , strB , 6 ) ;
if ( result ! = UCOL_GREATER ) {
log_err ( " ERROR 1 in test 5 \n " ) ;
}
result = ucol_strcoll ( coll , strA , - 1 , strB , - 1 ) ;
if ( result ! = UCOL_EQUAL ) {
log_err ( " ERROR 2 in test 5 \n " ) ;
}
2001-05-30 16:09:09 +00:00
ucol_getSortKey ( coll , strA , 6 , ( uint8_t * ) sortKeyA , sizeof ( sortKeyA ) ) ;
ucol_getSortKey ( coll , strA , - 1 , ( uint8_t * ) sortKeyAz , sizeof ( sortKeyAz ) ) ;
ucol_getSortKey ( coll , strB , 6 , ( uint8_t * ) sortKeyB , sizeof ( sortKeyB ) ) ;
ucol_getSortKey ( coll , strB , - 1 , ( uint8_t * ) sortKeyBz , sizeof ( sortKeyBz ) ) ;
2001-05-25 19:30:01 +00:00
r = strcmp ( sortKeyA , sortKeyAz ) ;
if ( r < = 0 ) {
log_err ( " Error 3 in test 5 \n " ) ;
}
r = strcmp ( sortKeyA , sortKeyB ) ;
if ( r < = 0 ) {
log_err ( " Error 4 in test 5 \n " ) ;
}
r = strcmp ( sortKeyAz , sortKeyBz ) ;
if ( r ! = 0 ) {
log_err ( " Error 5 in test 5 \n " ) ;
}
ucol_setStrength ( coll , UCOL_IDENTICAL ) ;
2001-05-30 16:09:09 +00:00
ucol_getSortKey ( coll , strA , 6 , ( uint8_t * ) sortKeyA , sizeof ( sortKeyA ) ) ;
ucol_getSortKey ( coll , strA , - 1 , ( uint8_t * ) sortKeyAz , sizeof ( sortKeyAz ) ) ;
ucol_getSortKey ( coll , strB , 6 , ( uint8_t * ) sortKeyB , sizeof ( sortKeyB ) ) ;
ucol_getSortKey ( coll , strB , - 1 , ( uint8_t * ) sortKeyBz , sizeof ( sortKeyBz ) ) ;
2001-05-25 19:30:01 +00:00
r = strcmp ( sortKeyA , sortKeyAz ) ;
if ( r < = 0 ) {
log_err ( " Error 6 in test 5 \n " ) ;
}
r = strcmp ( sortKeyA , sortKeyB ) ;
if ( r < = 0 ) {
log_err ( " Error 7 in test 5 \n " ) ;
}
r = strcmp ( sortKeyAz , sortKeyBz ) ;
if ( r ! = 0 ) {
log_err ( " Error 8 in test 5 \n " ) ;
}
ucol_setStrength ( coll , UCOL_TERTIARY ) ;
}
2001-09-28 16:34:05 +00:00
2001-05-28 20:32:29 +00:00
/* Test 6: Null character as base of a non-normal combining sequence.*/
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
{
2006-09-28 08:41:37 +00:00
static const UChar strA [ ] = { 0x41 , 0x0 , 0x300 , 0x316 , 0x41 , 0x302 , 0x00 } ;
static const UChar strB [ ] = { 0x41 , 0x0 , 0x302 , 0x316 , 0x41 , 0x300 , 0x00 } ;
2001-05-25 19:30:01 +00:00
result = ucol_strcoll ( coll , strA , 5 , strB , 5 ) ;
if ( result ! = UCOL_LESS ) {
log_err ( " Error 1 in test 6 \n " ) ;
}
result = ucol_strcoll ( coll , strA , - 1 , strB , - 1 ) ;
if ( result ! = UCOL_EQUAL ) {
log_err ( " Error 2 in test 6 \n " ) ;
}
}
2001-05-17 23:09:35 +00:00
ucol_close ( coll ) ;
}
2001-05-25 19:30:01 +00:00
2001-05-22 22:26:58 +00:00
#if 0
2002-03-28 18:26:25 +00:00
static void TestGetCaseBit ( void ) {
2001-05-22 22:26:58 +00:00
static const char * caseBitData [ ] = {
" a " , " A " , " ch " , " Ch " , " CH " ,
" \\ uFF9E " , " \\ u0009 "
} ;
static const uint8_t results [ ] = {
UCOL_LOWER_CASE , UCOL_UPPER_CASE , UCOL_LOWER_CASE , UCOL_MIXED_CASE , UCOL_UPPER_CASE ,
UCOL_UPPER_CASE , UCOL_LOWER_CASE
} ;
2001-05-17 23:09:35 +00:00
2001-05-22 22:26:58 +00:00
uint32_t i , blen = 0 ;
UChar b [ 256 ] = { 0 } ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * UCA = ucol_open ( " " , & status ) ;
uint8_t res = 0 ;
2001-09-28 16:34:05 +00:00
2001-05-22 22:26:58 +00:00
for ( i = 0 ; i < sizeof ( results ) / sizeof ( results [ 0 ] ) ; i + + ) {
blen = u_unescape ( caseBitData [ i ] , b , 256 ) ;
res = ucol_uprv_getCaseBits ( UCA , b , blen , & status ) ;
if ( results [ i ] ! = res ) {
log_err ( " Expected case = %02X, got %02X for %04X \n " , results [ i ] , res , b [ 0 ] ) ;
}
}
}
# endif
2002-03-28 18:26:25 +00:00
static void TestHangulTailoring ( void ) {
2001-05-31 17:27:44 +00:00
static const char * koreanData [ ] = {
2001-09-28 16:34:05 +00:00
" \\ uac00 " , " \\ u4f3d " , " \\ u4f73 " , " \\ u5047 " , " \\ u50f9 " , " \\ u52a0 " , " \\ u53ef " , " \\ u5475 " ,
" \\ u54e5 " , " \\ u5609 " , " \\ u5ac1 " , " \\ u5bb6 " , " \\ u6687 " , " \\ u67b6 " , " \\ u67b7 " , " \\ u67ef " ,
" \\ u6b4c " , " \\ u73c2 " , " \\ u75c2 " , " \\ u7a3c " , " \\ u82db " , " \\ u8304 " , " \\ u8857 " , " \\ u8888 " ,
" \\ u8a36 " , " \\ u8cc8 " , " \\ u8dcf " , " \\ u8efb " , " \\ u8fe6 " , " \\ u99d5 " ,
" \\ u4EEE " , " \\ u50A2 " , " \\ u5496 " , " \\ u54FF " , " \\ u5777 " , " \\ u5B8A " , " \\ u659D " , " \\ u698E " ,
2001-05-31 17:27:44 +00:00
" \\ u6A9F " , " \\ u73C8 " , " \\ u7B33 " , " \\ u801E " , " \\ u8238 " , " \\ u846D " , " \\ u8B0C "
} ;
2001-05-22 22:26:58 +00:00
2001-09-28 16:34:05 +00:00
const char * rules =
" & \\ uac00 <<< \\ u4f3d <<< \\ u4f73 <<< \\ u5047 <<< \\ u50f9 <<< \\ u52a0 <<< \\ u53ef <<< \\ u5475 "
" <<< \\ u54e5 <<< \\ u5609 <<< \\ u5ac1 <<< \\ u5bb6 <<< \\ u6687 <<< \\ u67b6 <<< \\ u67b7 <<< \\ u67ef "
" <<< \\ u6b4c <<< \\ u73c2 <<< \\ u75c2 <<< \\ u7a3c <<< \\ u82db <<< \\ u8304 <<< \\ u8857 <<< \\ u8888 "
" <<< \\ u8a36 <<< \\ u8cc8 <<< \\ u8dcf <<< \\ u8efb <<< \\ u8fe6 <<< \\ u99d5 "
2001-05-31 17:27:44 +00:00
" <<< \\ u4EEE <<< \\ u50A2 <<< \\ u5496 <<< \\ u54FF <<< \\ u5777 <<< \\ u5B8A <<< \\ u659D <<< \\ u698E "
" <<< \\ u6A9F <<< \\ u73C8 <<< \\ u7B33 <<< \\ u801E <<< \\ u8238 <<< \\ u846D <<< \\ u8B0C " ;
2001-05-22 22:26:58 +00:00
2001-09-27 23:19:12 +00:00
UErrorCode status = U_ZERO_ERROR ;
UChar rlz [ 2048 ] = { 0 } ;
uint32_t rlen = u_unescape ( rules , rlz , 2048 ) ;
UCollator * coll = ucol_openRules ( rlz , rlen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
2003-06-04 19:02:41 +00:00
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
2001-09-27 23:19:12 +00:00
log_verbose ( " Using start of korean rules \n " ) ;
if ( U_SUCCESS ( status ) ) {
genericOrderingTest ( coll , koreanData , sizeof ( koreanData ) / sizeof ( koreanData [ 0 ] ) ) ;
} else {
log_err ( " Unable to open collator with rules %s \n " , rules ) ;
}
ucol_close ( coll ) ;
log_verbose ( " Using ko__LOTUS locale \n " ) ;
genericLocaleStarter ( " ko__LOTUS " , koreanData , sizeof ( koreanData ) / sizeof ( koreanData [ 0 ] ) ) ;
2001-05-22 22:26:58 +00:00
}
2001-05-17 23:09:35 +00:00
2014-02-25 21:21:49 +00:00
/*
* The secondary / tertiary compression middle byte
* as used by the current implementation .
* Subject to change as the sort key compression changes .
* See class CollationKeys .
*/
enum {
SEC_COMMON_MIDDLE = 0x25 , /* range 05..45 */
TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */
} ;
2002-03-28 18:26:25 +00:00
static void TestCompressOverlap ( void ) {
2001-05-25 22:00:24 +00:00
UChar secstr [ 150 ] ;
UChar tertstr [ 150 ] ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll ;
2014-02-25 21:21:49 +00:00
uint8_t result [ 500 ] ;
2001-05-25 22:00:24 +00:00
uint32_t resultlen ;
int count = 0 ;
2014-02-25 21:21:49 +00:00
uint8_t * tempptr ;
2001-05-25 22:00:24 +00:00
coll = ucol_open ( " " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Collator can't be created -> %s \n " , u_errorName ( status ) ) ;
2001-05-25 22:00:24 +00:00
return ;
}
while ( count < 149 ) {
secstr [ count ] = 0x0020 ; /* [06, 05, 05] */
tertstr [ count ] = 0x0020 ;
count + + ;
}
/* top down compression ----------------------------------- */
secstr [ count ] = 0x0332 ; /* [, 87, 05] */
tertstr [ count ] = 0x3000 ; /* [06, 05, 07] */
2001-09-28 16:34:05 +00:00
/* no compression secstr should have 150 secondary bytes, tertstr should
2001-05-25 22:00:24 +00:00
have 150 tertiary bytes .
2014-02-25 21:21:49 +00:00
with correct compression , secstr should have 6 secondary
bytes ( 149 / 33 rounded up + accent ) , tertstr should have > 2 tertiary bytes */
resultlen = ucol_getSortKey ( coll , secstr , 150 , result , LEN ( result ) ) ;
2013-03-21 01:42:01 +00:00
( void ) resultlen ; /* Suppress set but not used warning. */
2014-02-25 21:21:49 +00:00
tempptr = ( uint8_t * ) uprv_strchr ( ( char * ) result , 1 ) + 1 ;
2001-05-25 22:00:24 +00:00
while ( * ( tempptr + 1 ) ! = 1 ) {
2001-09-28 16:34:05 +00:00
/* the last secondary collation element is not checked since it is not
2001-05-25 22:00:24 +00:00
part of the compression */
2014-02-25 21:21:49 +00:00
if ( * tempptr < SEC_COMMON_MIDDLE ) {
log_err ( " Secondary top down compression overlapped \n " ) ;
2001-05-25 22:00:24 +00:00
}
tempptr + + ;
}
2001-09-28 16:34:05 +00:00
2001-05-25 22:00:24 +00:00
/* tertiary top/bottom/common for en_US is similar to the secondary
top / bottom / common */
2014-02-25 21:21:49 +00:00
resultlen = ucol_getSortKey ( coll , tertstr , 150 , result , LEN ( result ) ) ;
tempptr = ( uint8_t * ) uprv_strrchr ( ( char * ) result , 1 ) + 1 ;
2001-05-25 22:00:24 +00:00
while ( * ( tempptr + 1 ) ! = 0 ) {
2001-09-28 16:34:05 +00:00
/* the last secondary collation element is not checked since it is not
2001-05-25 22:00:24 +00:00
part of the compression */
2014-02-25 21:21:49 +00:00
if ( * tempptr < TER_ONLY_COMMON_MIDDLE ) {
log_err ( " Tertiary top down compression overlapped \n " ) ;
2001-05-25 22:00:24 +00:00
}
tempptr + + ;
}
/* bottom up compression ------------------------------------- */
secstr [ count ] = 0 ;
tertstr [ count ] = 0 ;
2014-02-25 21:21:49 +00:00
resultlen = ucol_getSortKey ( coll , secstr , 150 , result , LEN ( result ) ) ;
tempptr = ( uint8_t * ) uprv_strchr ( ( char * ) result , 1 ) + 1 ;
2001-05-25 22:00:24 +00:00
while ( * ( tempptr + 1 ) ! = 1 ) {
2001-09-28 16:34:05 +00:00
/* the last secondary collation element is not checked since it is not
2001-05-25 22:00:24 +00:00
part of the compression */
2014-02-25 21:21:49 +00:00
if ( * tempptr > SEC_COMMON_MIDDLE ) {
log_err ( " Secondary bottom up compression overlapped \n " ) ;
2001-05-25 22:00:24 +00:00
}
tempptr + + ;
}
2001-09-28 16:34:05 +00:00
2001-05-25 22:00:24 +00:00
/* tertiary top/bottom/common for en_US is similar to the secondary
top / bottom / common */
2014-02-25 21:21:49 +00:00
resultlen = ucol_getSortKey ( coll , tertstr , 150 , result , LEN ( result ) ) ;
tempptr = ( uint8_t * ) uprv_strrchr ( ( char * ) result , 1 ) + 1 ;
2001-05-25 22:00:24 +00:00
while ( * ( tempptr + 1 ) ! = 0 ) {
2001-09-28 16:34:05 +00:00
/* the last secondary collation element is not checked since it is not
2001-05-25 22:00:24 +00:00
part of the compression */
2014-02-25 21:21:49 +00:00
if ( * tempptr > TER_ONLY_COMMON_MIDDLE ) {
log_err ( " Tertiary bottom up compression overlapped \n " ) ;
2001-05-25 22:00:24 +00:00
}
tempptr + + ;
}
2001-10-17 02:19:48 +00:00
ucol_close ( coll ) ;
2001-05-25 22:00:24 +00:00
}
2001-06-06 20:48:57 +00:00
static void TestCyrillicTailoring ( void ) {
static const char * test [ ] = {
2001-06-06 20:38:43 +00:00
" \\ u0410b " ,
" \\ u0410 \\ u0306a " ,
" \\ u04d0A "
2001-06-05 22:52:56 +00:00
} ;
2002-10-30 05:44:54 +00:00
2002-10-17 23:12:43 +00:00
/* Russian overrides contractions, so this test is not valid anymore */
2004-11-11 23:34:58 +00:00
/*genericLocaleStarter("ru", test, 3);*/
2002-10-17 23:12:43 +00:00
genericLocaleStarter ( " root " , test , 3 ) ;
2001-06-05 22:52:56 +00:00
genericRulesStarter ( " & \\ u0410 = \\ u0410 " , test , 3 ) ;
genericRulesStarter ( " &Z < \\ u0410 " , test , 3 ) ;
2001-06-06 20:38:43 +00:00
genericRulesStarter ( " & \\ u0410 = \\ u0410 < \\ u04d0 " , test , 3 ) ;
genericRulesStarter ( " &Z < \\ u0410 < \\ u04d0 " , test , 3 ) ;
genericRulesStarter ( " & \\ u0410 = \\ u0410 < \\ u0410 \\ u0301 " , test , 3 ) ;
genericRulesStarter ( " &Z < \\ u0410 < \\ u0410 \\ u0301 " , test , 3 ) ;
2001-06-05 22:52:56 +00:00
}
2002-10-30 05:44:54 +00:00
static void TestSuppressContractions ( void ) {
static const char * testNoCont2 [ ] = {
" \\ u0410 \\ u0302a " ,
" \\ u0410 \\ u0306b " ,
2004-11-11 23:34:58 +00:00
" \\ u0410c "
2002-10-30 05:44:54 +00:00
} ;
static const char * testNoCont [ ] = {
2004-11-11 23:34:58 +00:00
" a \\ u0410 " ,
2002-10-30 05:44:54 +00:00
" A \\ u0410 \\ u0306 " ,
" \\ uFF21 \\ u0410 \\ u0302 "
} ;
2004-11-11 23:34:58 +00:00
2002-10-30 05:44:54 +00:00
genericRulesStarter ( " [suppressContractions [ \\ u0400- \\ u047f]] " , testNoCont , 3 ) ;
genericRulesStarter ( " [suppressContractions [ \\ u0400- \\ u047f]] " , testNoCont2 , 3 ) ;
}
2002-03-28 18:26:25 +00:00
static void TestContraction ( void ) {
2001-06-05 18:09:06 +00:00
const static char * testrules [ ] = {
" &A = AB / B " ,
" &A = A \\ u0306/ \\ u0306 " ,
" &c = ch / h "
} ;
const static UChar testdata [ ] [ 2 ] = {
2001-06-11 18:38:05 +00:00
{ 0x0041 /* 'A' */ , 0x0042 /* 'B' */ } ,
{ 0x0041 /* 'A' */ , 0x0306 /* combining breve */ } ,
{ 0x0063 /* 'c' */ , 0x0068 /* 'h' */ }
2001-06-05 18:09:06 +00:00
} ;
const static UChar testdata2 [ ] [ 2 ] = {
2001-06-11 18:38:05 +00:00
{ 0x0063 /* 'c' */ , 0x0067 /* 'g' */ } ,
{ 0x0063 /* 'c' */ , 0x0068 /* 'h' */ } ,
{ 0x0063 /* 'c' */ , 0x006C /* 'l' */ }
2001-06-05 18:09:06 +00:00
} ;
2014-02-25 21:21:49 +00:00
#if 0
/*
* These pairs of rule strings are not guaranteed to yield the very same mappings .
* In fact , LDML 24 recommends an improved way of creating mappings
* which always yields different mappings for such pairs . See
* http : //www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
*/
2001-06-05 18:09:06 +00:00
const static char * testrules3 [ ] = {
2001-06-05 22:52:56 +00:00
" &z < xyz &xyzw << B " ,
" &z < xyz &xyz << B / w " ,
" &z < ch &achm << B " ,
" &z < ch &a << B / chm " ,
" & \\ ud800 \\ udc00w << B " ,
" & \\ ud800 \\ udc00 << B / w " ,
" &a \\ ud800 \\ udc00m << B " ,
" &a << B / \\ ud800 \\ udc00m " ,
2001-06-05 18:09:06 +00:00
} ;
2014-02-25 21:21:49 +00:00
# endif
2001-06-05 18:09:06 +00:00
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll ;
2001-06-05 22:52:56 +00:00
UChar rule [ 256 ] = { 0 } ;
2001-06-05 18:09:06 +00:00
uint32_t rlen = 0 ;
int i ;
for ( i = 0 ; i < sizeof ( testrules ) / sizeof ( testrules [ 0 ] ) ; i + + ) {
UCollationElements * iter1 ;
int j = 0 ;
log_verbose ( " Rule %s for testing \n " , testrules [ i ] ) ;
rlen = u_unescape ( testrules [ i ] , rule , 32 ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( rule , rlen , UCOL_ON , UCOL_TERTIARY , NULL , & status ) ;
2001-06-05 18:09:06 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Collator creation failed %s -> %s \n " , testrules [ i ] , u_errorName ( status ) ) ;
2001-06-05 18:09:06 +00:00
return ;
}
iter1 = ucol_openElements ( coll , testdata [ i ] , 2 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Collation iterator creation failed \n " ) ;
return ;
}
while ( j < 2 ) {
2001-09-28 16:34:05 +00:00
UCollationElements * iter2 = ucol_openElements ( coll ,
& ( testdata [ i ] [ j ] ) ,
2001-06-05 18:09:06 +00:00
1 , & status ) ;
uint32_t ce ;
if ( U_FAILURE ( status ) ) {
log_err ( " Collation iterator creation failed \n " ) ;
return ;
}
ce = ucol_next ( iter2 , & status ) ;
while ( ce ! = UCOL_NULLORDER ) {
if ( ( uint32_t ) ucol_next ( iter1 , & status ) ! = ce ) {
log_err ( " Collation elements in contraction split does not match \n " ) ;
return ;
}
ce = ucol_next ( iter2 , & status ) ;
}
j + + ;
ucol_closeElements ( iter2 ) ;
}
if ( ucol_next ( iter1 , & status ) ! = UCOL_NULLORDER ) {
log_err ( " Collation elements not exhausted \n " ) ;
return ;
}
ucol_closeElements ( iter1 ) ;
ucol_close ( coll ) ;
}
2001-06-05 22:52:56 +00:00
rlen = u_unescape ( " & a < b < c < ch < d & c = ch / h " , rule , 256 ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( rule , rlen , UCOL_ON , UCOL_TERTIARY , NULL , & status ) ;
2001-06-05 18:09:06 +00:00
if ( ucol_strcoll ( coll , testdata2 [ 0 ] , 2 , testdata2 [ 1 ] , 2 ) ! = UCOL_LESS ) {
log_err ( " Expected \\ u%04x \\ u%04x < \\ u%04x \\ u%04x \n " ,
2001-09-28 16:34:05 +00:00
testdata2 [ 0 ] [ 0 ] , testdata2 [ 0 ] [ 1 ] , testdata2 [ 1 ] [ 0 ] ,
2001-06-05 18:09:06 +00:00
testdata2 [ 1 ] [ 1 ] ) ;
return ;
}
if ( ucol_strcoll ( coll , testdata2 [ 1 ] , 2 , testdata2 [ 2 ] , 2 ) ! = UCOL_LESS ) {
log_err ( " Expected \\ u%04x \\ u%04x < \\ u%04x \\ u%04x \n " ,
2001-09-28 16:34:05 +00:00
testdata2 [ 1 ] [ 0 ] , testdata2 [ 1 ] [ 1 ] , testdata2 [ 2 ] [ 0 ] ,
2001-06-05 18:09:06 +00:00
testdata2 [ 2 ] [ 1 ] ) ;
return ;
}
ucol_close ( coll ) ;
2014-02-25 21:21:49 +00:00
#if 0 /* see above */
2001-06-05 18:09:06 +00:00
for ( i = 0 ; i < sizeof ( testrules3 ) / sizeof ( testrules3 [ 0 ] ) ; i + = 2 ) {
2014-02-25 21:21:49 +00:00
log_verbose ( " testrules3 i==%d \" %s \" vs. \" %s \" \n " , i , testrules3 [ i ] , testrules3 [ i + 1 ] ) ;
2001-06-05 18:09:06 +00:00
UCollator * coll1 ,
* coll2 ;
UCollationElements * iter1 ,
* iter2 ;
2001-06-11 18:38:05 +00:00
UChar ch = 0x0042 /* 'B' */ ;
2001-06-05 18:09:06 +00:00
uint32_t ce ;
2001-06-05 22:52:56 +00:00
rlen = u_unescape ( testrules3 [ i ] , rule , 32 ) ;
2001-09-22 01:24:03 +00:00
coll1 = ucol_openRules ( rule , rlen , UCOL_ON , UCOL_TERTIARY , NULL , & status ) ;
2001-06-05 22:52:56 +00:00
rlen = u_unescape ( testrules3 [ i + 1 ] , rule , 32 ) ;
2001-09-22 01:24:03 +00:00
coll2 = ucol_openRules ( rule , rlen , UCOL_ON , UCOL_TERTIARY , NULL , & status ) ;
2001-06-05 18:09:06 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " Collator creation failed %s \n " , testrules [ i ] ) ;
return ;
}
iter1 = ucol_openElements ( coll1 , & ch , 1 , & status ) ;
iter2 = ucol_openElements ( coll2 , & ch , 1 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Collation iterator creation failed \n " ) ;
return ;
}
ce = ucol_next ( iter1 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Retrieving ces failed \n " ) ;
return ;
}
while ( ce ! = UCOL_NULLORDER ) {
2014-02-25 21:21:49 +00:00
uint32_t ce2 = ( uint32_t ) ucol_next ( iter2 , & status ) ;
if ( ce = = ce2 ) {
log_verbose ( " CEs match: %08x \n " , ce ) ;
} else {
log_err ( " CEs do not match: %08x vs. %08x \n " , ce , ce2 ) ;
2001-06-05 18:09:06 +00:00
return ;
}
ce = ucol_next ( iter1 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Retrieving ces failed \n " ) ;
return ;
}
}
if ( ucol_next ( iter2 , & status ) ! = UCOL_NULLORDER ) {
log_err ( " CEs not exhausted \n " ) ;
return ;
}
ucol_closeElements ( iter1 ) ;
ucol_closeElements ( iter2 ) ;
ucol_close ( coll1 ) ;
ucol_close ( coll2 ) ;
}
2014-02-25 21:21:49 +00:00
# endif
2001-06-05 18:09:06 +00:00
}
2002-03-28 18:26:25 +00:00
static void TestExpansion ( void ) {
2001-06-08 02:11:28 +00:00
const static char * testrules [ ] = {
2014-02-25 21:21:49 +00:00
#if 0
/*
* This seems to have tested that M was not mapped to an expansion .
* I believe the old builder just did that because it computed the extension CEs
* at the very end , which was a bug .
* Among other problems , it violated the core tailoring principle
* by making an earlier rule depend on a later one .
* And , of course , if M did not get an expansion , then it was primary different from K ,
* unlike what the rule & K < < M says .
*/
2001-06-08 02:11:28 +00:00
" &J << K / B & K << M " ,
2014-02-25 21:21:49 +00:00
# endif
2001-06-08 02:11:28 +00:00
" &J << K / B << M "
} ;
const static UChar testdata [ ] [ 3 ] = {
2001-06-11 18:38:05 +00:00
{ 0x004A /*'J'*/ , 0x0041 /*'A'*/ , 0 } ,
{ 0x004D /*'M'*/ , 0x0041 /*'A'*/ , 0 } ,
{ 0x004B /*'K'*/ , 0x0041 /*'A'*/ , 0 } ,
{ 0x004B /*'K'*/ , 0x0043 /*'C'*/ , 0 } ,
{ 0x004A /*'J'*/ , 0x0043 /*'C'*/ , 0 } ,
{ 0x004D /*'M'*/ , 0x0043 /*'C'*/ , 0 }
2001-06-08 02:11:28 +00:00
} ;
2001-09-28 16:34:05 +00:00
2001-06-08 02:11:28 +00:00
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll ;
UChar rule [ 256 ] = { 0 } ;
uint32_t rlen = 0 ;
int i ;
for ( i = 0 ; i < sizeof ( testrules ) / sizeof ( testrules [ 0 ] ) ; i + + ) {
int j = 0 ;
log_verbose ( " Rule %s for testing \n " , testrules [ i ] ) ;
rlen = u_unescape ( testrules [ i ] , rule , 32 ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( rule , rlen , UCOL_ON , UCOL_TERTIARY , NULL , & status ) ;
2001-06-08 02:11:28 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Collator creation failed %s -> %s \n " , testrules [ i ] , u_errorName ( status ) ) ;
2001-06-08 02:11:28 +00:00
return ;
}
2001-09-28 16:34:05 +00:00
2001-06-08 02:11:28 +00:00
for ( j = 0 ; j < 5 ; j + + ) {
doTest ( coll , testdata [ j ] , testdata [ j + 1 ] , UCOL_LESS ) ;
}
ucol_close ( coll ) ;
}
}
2001-11-14 06:55:15 +00:00
#if 0
/* this test tests the current limitations of the engine */
/* it always fail, so it is disabled by default */
2002-03-28 18:26:25 +00:00
static void TestLimitations ( void ) {
2001-06-26 22:26:13 +00:00
/* recursive expansions */
{
static const char * rule = " &a=b/c&d=c/e " ;
static const char * tlimit01 [ ] = { " add " , " b " , " adf " } ;
static const char * tlimit02 [ ] = { " aa " , " b " , " af " } ;
log_verbose ( " recursive expansions \n " ) ;
genericRulesStarter ( rule , tlimit01 , sizeof ( tlimit01 ) / sizeof ( tlimit01 [ 0 ] ) ) ;
genericRulesStarter ( rule , tlimit02 , sizeof ( tlimit02 ) / sizeof ( tlimit02 [ 0 ] ) ) ;
}
/* contractions spanning expansions */
{
static const char * rule = " &a<<<c/e&g<<<eh " ;
static const char * tlimit01 [ ] = { " ad " , " c " , " af " , " f " , " ch " , " h " } ;
static const char * tlimit02 [ ] = { " ad " , " c " , " ch " , " af " , " f " , " h " } ;
log_verbose ( " contractions spanning expansions \n " ) ;
genericRulesStarter ( rule , tlimit01 , sizeof ( tlimit01 ) / sizeof ( tlimit01 [ 0 ] ) ) ;
genericRulesStarter ( rule , tlimit02 , sizeof ( tlimit02 ) / sizeof ( tlimit02 [ 0 ] ) ) ;
}
/* normalization: nulls in contractions */
{
static const char * rule = " &a<<< \\ u0000 \\ u0302 " ;
static const char * tlimit01 [ ] = { " a " , " \\ u0000 \\ u0302 \\ u0327 " } ;
static const char * tlimit02 [ ] = { " \\ u0000 \\ u0302 \\ u0327 " , " a " } ;
static const UColAttribute att [ ] = { UCOL_DECOMPOSITION_MODE } ;
2001-06-27 01:19:04 +00:00
static const UColAttributeValue valOn [ ] = { UCOL_ON } ;
static const UColAttributeValue valOff [ ] = { UCOL_OFF } ;
2001-06-26 22:26:13 +00:00
log_verbose ( " NULL in contractions \n " ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , 2 , att , valOn , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , 2 , att , valOn , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , 2 , att , valOff , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , 2 , att , valOff , 1 ) ;
}
/* normalization: contractions spanning normalization */
{
static const char * rule = " &a<<< \\ u0000 \\ u0302 " ;
static const char * tlimit01 [ ] = { " a " , " \\ u0000 \\ u0302 \\ u0327 " } ;
static const char * tlimit02 [ ] = { " \\ u0000 \\ u0302 \\ u0327 " , " a " } ;
static const UColAttribute att [ ] = { UCOL_DECOMPOSITION_MODE } ;
2001-06-27 01:19:04 +00:00
static const UColAttributeValue valOn [ ] = { UCOL_ON } ;
static const UColAttributeValue valOff [ ] = { UCOL_OFF } ;
2001-06-26 22:26:13 +00:00
log_verbose ( " contractions spanning normalization \n " ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , 2 , att , valOn , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , 2 , att , valOn , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , 2 , att , valOff , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , 2 , att , valOff , 1 ) ;
}
/* variable top: */
{
2001-11-10 06:54:28 +00:00
/*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2001-06-26 22:26:13 +00:00
static const char * rule = " & \\ u2010<x<[variable top]=z " ;
2001-11-10 06:54:28 +00:00
/*static const char *rule3 = "&' '<x<[variable top]=z";*/
2001-06-26 22:26:13 +00:00
static const char * tlimit01 [ ] = { " " , " z " , " zb " , " a " , " b " , " xb " , " b " , " c " } ;
static const char * tlimit02 [ ] = { " - " , " -x " , " x " , " xb " , " -z " , " z " , " zb " , " -a " , " a " , " -b " , " b " , " c " } ;
static const char * tlimit03 [ ] = { " " , " xb " , " z " , " zb " , " a " , " b " , " b " , " c " } ;
static const UColAttribute att [ ] = { UCOL_ALTERNATE_HANDLING , UCOL_STRENGTH } ;
2001-06-27 01:19:04 +00:00
static const UColAttributeValue valOn [ ] = { UCOL_SHIFTED , UCOL_QUATERNARY } ;
static const UColAttributeValue valOff [ ] = { UCOL_NON_IGNORABLE , UCOL_TERTIARY } ;
2001-06-26 22:26:13 +00:00
log_verbose ( " variable top \n " ) ;
genericRulesStarterWithOptions ( rule , tlimit03 , sizeof ( tlimit03 ) / sizeof ( tlimit03 [ 0 ] ) , att , valOn , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , sizeof ( tlimit01 ) / sizeof ( tlimit01 [ 0 ] ) , att , valOn , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , sizeof ( tlimit02 ) / sizeof ( tlimit02 [ 0 ] ) , att , valOn , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , sizeof ( tlimit01 ) / sizeof ( tlimit01 [ 0 ] ) , att , valOff , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , sizeof ( tlimit02 ) / sizeof ( tlimit02 [ 0 ] ) , att , valOff , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
}
/* case level */
{
static const char * rule = " &c<ch<<<cH<<<Ch<<<CH " ;
static const char * tlimit01 [ ] = { " c " , " CH " , " Ch " , " cH " , " ch " } ;
static const char * tlimit02 [ ] = { " c " , " CH " , " cH " , " Ch " , " ch " } ;
static const UColAttribute att [ ] = { UCOL_CASE_FIRST } ;
2001-06-27 01:19:04 +00:00
static const UColAttributeValue valOn [ ] = { UCOL_UPPER_FIRST } ;
2001-11-10 06:54:28 +00:00
/*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2001-06-26 22:26:13 +00:00
log_verbose ( " case level \n " ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , sizeof ( tlimit01 ) / sizeof ( tlimit01 [ 0 ] ) , att , valOn , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , sizeof ( tlimit02 ) / sizeof ( tlimit02 [ 0 ] ) , att , valOn , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
/*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
/*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
}
}
2001-11-14 06:55:15 +00:00
# endif
2001-06-26 22:26:13 +00:00
2002-03-28 18:26:25 +00:00
static void TestBocsuCoverage ( void ) {
2001-06-22 18:35:01 +00:00
UErrorCode status = U_ZERO_ERROR ;
2001-06-27 01:19:04 +00:00
const char * testString = " \\ u0041 \\ u0441 \\ u4441 \\ U00044441 \\ u4441 \\ u0441 \\ u0041 " ;
2001-06-22 18:35:01 +00:00
UChar test [ 256 ] = { 0 } ;
uint32_t tlen = u_unescape ( testString , test , 32 ) ;
uint8_t key [ 256 ] = { 0 } ;
uint32_t klen = 0 ;
UCollator * coll = ucol_open ( " " , & status ) ;
2003-06-04 06:53:23 +00:00
if ( U_SUCCESS ( status ) ) {
2003-07-22 16:45:58 +00:00
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_IDENTICAL , & status ) ;
2001-06-22 18:35:01 +00:00
2003-07-22 16:45:58 +00:00
klen = ucol_getSortKey ( coll , test , tlen , key , 256 ) ;
2013-03-21 01:42:01 +00:00
( void ) klen ; /* Suppress set but not used warning. */
2001-06-22 18:35:01 +00:00
2003-07-22 16:45:58 +00:00
ucol_close ( coll ) ;
2003-06-04 06:53:23 +00:00
} else {
log_data_err ( " Couldn't open UCA \n " ) ;
}
2001-06-26 22:26:13 +00:00
}
2001-06-22 18:35:01 +00:00
2002-03-28 18:26:25 +00:00
static void TestVariableTopSetting ( void ) {
2001-06-26 22:26:13 +00:00
UErrorCode status = U_ZERO_ERROR ;
2001-06-29 22:54:56 +00:00
uint32_t varTopOriginal = 0 , varTop1 , varTop2 ;
2001-06-26 22:26:13 +00:00
UCollator * coll = ucol_open ( " " , & status ) ;
2003-06-04 06:53:23 +00:00
if ( U_SUCCESS ( status ) ) {
2001-06-22 18:35:01 +00:00
2014-02-25 21:21:49 +00:00
static const UChar nul = 0 ;
static const UChar space = 0x20 ;
static const UChar dot = 0x2e ; /* punctuation */
static const UChar degree = 0xb0 ; /* symbol */
static const UChar dollar = 0x24 ; /* currency symbol */
static const UChar zero = 0x30 ; /* digit */
2002-06-13 18:35:27 +00:00
2014-02-25 21:21:49 +00:00
varTopOriginal = ucol_getVariableTop ( coll , & status ) ;
log_verbose ( " ucol_getVariableTop(root) -> %08x \n " , varTopOriginal ) ;
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , UCOL_SHIFTED , & status ) ;
2003-06-04 06:53:23 +00:00
2014-02-25 21:21:49 +00:00
varTop1 = ucol_setVariableTop ( coll , & space , 1 , & status ) ;
varTop2 = ucol_getVariableTop ( coll , & status ) ;
log_verbose ( " ucol_setVariableTop(space) -> %08x \n " , varTop1 ) ;
if ( U_FAILURE ( status ) | | varTop1 ! = varTop2 | |
! ucol_equal ( coll , & nul , 0 , & space , 1 ) | |
ucol_equal ( coll , & nul , 0 , & dot , 1 ) | |
ucol_equal ( coll , & nul , 0 , & degree , 1 ) | |
ucol_equal ( coll , & nul , 0 , & dollar , 1 ) | |
ucol_equal ( coll , & nul , 0 , & zero , 1 ) | |
ucol_greaterOrEqual ( coll , & space , 1 , & dot , 1 ) ) {
log_err ( " ucol_setVariableTop(space) did not work - %s \n " , u_errorName ( status ) ) ;
2003-07-22 16:45:58 +00:00
}
2014-02-25 21:21:49 +00:00
varTop1 = ucol_setVariableTop ( coll , & dot , 1 , & status ) ;
varTop2 = ucol_getVariableTop ( coll , & status ) ;
log_verbose ( " ucol_setVariableTop(dot) -> %08x \n " , varTop1 ) ;
if ( U_FAILURE ( status ) | | varTop1 ! = varTop2 | |
! ucol_equal ( coll , & nul , 0 , & space , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & dot , 1 ) | |
ucol_equal ( coll , & nul , 0 , & degree , 1 ) | |
ucol_equal ( coll , & nul , 0 , & dollar , 1 ) | |
ucol_equal ( coll , & nul , 0 , & zero , 1 ) | |
ucol_greaterOrEqual ( coll , & dot , 1 , & degree , 1 ) ) {
log_err ( " ucol_setVariableTop(dot) did not work - %s \n " , u_errorName ( status ) ) ;
}
2003-07-22 16:45:58 +00:00
2014-02-25 21:21:49 +00:00
varTop1 = ucol_setVariableTop ( coll , & degree , 1 , & status ) ;
varTop2 = ucol_getVariableTop ( coll , & status ) ;
log_verbose ( " ucol_setVariableTop(degree) -> %08x \n " , varTop1 ) ;
if ( U_FAILURE ( status ) | | varTop1 ! = varTop2 | |
! ucol_equal ( coll , & nul , 0 , & space , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & dot , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & degree , 1 ) | |
ucol_equal ( coll , & nul , 0 , & dollar , 1 ) | |
ucol_equal ( coll , & nul , 0 , & zero , 1 ) | |
ucol_greaterOrEqual ( coll , & degree , 1 , & dollar , 1 ) ) {
log_err ( " ucol_setVariableTop(degree) did not work - %s \n " , u_errorName ( status ) ) ;
2003-07-22 16:45:58 +00:00
}
2001-06-26 22:26:13 +00:00
2014-02-25 21:21:49 +00:00
varTop1 = ucol_setVariableTop ( coll , & dollar , 1 , & status ) ;
varTop2 = ucol_getVariableTop ( coll , & status ) ;
log_verbose ( " ucol_setVariableTop(dollar) -> %08x \n " , varTop1 ) ;
if ( U_FAILURE ( status ) | | varTop1 ! = varTop2 | |
! ucol_equal ( coll , & nul , 0 , & space , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & dot , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & degree , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & dollar , 1 ) | |
ucol_equal ( coll , & nul , 0 , & zero , 1 ) | |
ucol_greaterOrEqual ( coll , & dollar , 1 , & zero , 1 ) ) {
log_err ( " ucol_setVariableTop(dollar) did not work - %s \n " , u_errorName ( status ) ) ;
}
2003-02-20 08:19:04 +00:00
2003-07-22 16:45:58 +00:00
log_verbose ( " Testing setting variable top to contractions \n " ) ;
{
2014-02-25 21:21:49 +00:00
UChar first [ 4 ] = { 0 } ;
2003-07-22 16:45:58 +00:00
first [ 0 ] = 0x0040 ;
first [ 1 ] = 0x0050 ;
first [ 2 ] = 0x0000 ;
2001-06-26 22:26:13 +00:00
2014-02-25 21:21:49 +00:00
status = U_ZERO_ERROR ;
2003-07-22 16:45:58 +00:00
ucol_setVariableTop ( coll , first , - 1 , & status ) ;
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
if ( U_SUCCESS ( status ) ) {
log_err ( " Invalid contraction succeded in setting variable top! \n " ) ;
2001-06-26 22:26:13 +00:00
}
2003-07-22 16:45:58 +00:00
}
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
log_verbose ( " Test restoring variable top \n " ) ;
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
status = U_ZERO_ERROR ;
ucol_restoreVariableTop ( coll , varTopOriginal , & status ) ;
if ( varTopOriginal ! = ucol_getVariableTop ( coll , & status ) ) {
log_err ( " Couldn't restore old variable top \n " ) ;
}
log_verbose ( " Testing calling with error set \n " ) ;
status = U_INTERNAL_PROGRAM_ERROR ;
2014-02-25 21:21:49 +00:00
varTop1 = ucol_setVariableTop ( coll , & space , 1 , & status ) ;
2003-07-22 16:45:58 +00:00
varTop2 = ucol_getVariableTop ( coll , & status ) ;
ucol_restoreVariableTop ( coll , varTop2 , & status ) ;
2014-02-25 21:21:49 +00:00
varTop1 = ucol_setVariableTop ( NULL , & dot , 1 , & status ) ;
2003-07-22 16:45:58 +00:00
varTop2 = ucol_getVariableTop ( NULL , & status ) ;
ucol_restoreVariableTop ( NULL , varTop2 , & status ) ;
if ( status ! = U_INTERNAL_PROGRAM_ERROR ) {
log_err ( " Bad reaction to passed error! \n " ) ;
}
ucol_close ( coll ) ;
2003-06-04 06:53:23 +00:00
} else {
log_data_err ( " Couldn't open UCA collator \n " ) ;
2001-06-26 22:26:13 +00:00
}
2014-02-25 21:21:49 +00:00
}
static void TestMaxVariable ( ) {
UErrorCode status = U_ZERO_ERROR ;
UColReorderCode oldMax , max ;
UCollator * coll ;
static const UChar nul = 0 ;
static const UChar space = 0x20 ;
static const UChar dot = 0x2e ; /* punctuation */
static const UChar degree = 0xb0 ; /* symbol */
static const UChar dollar = 0x24 ; /* currency symbol */
static const UChar zero = 0x30 ; /* digit */
coll = ucol_open ( " " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_data_err ( " Couldn't open root collator \n " ) ;
return ;
}
oldMax = ucol_getMaxVariable ( coll ) ;
log_verbose ( " ucol_getMaxVariable(root) -> %04x \n " , oldMax ) ;
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , UCOL_SHIFTED , & status ) ;
ucol_setMaxVariable ( coll , UCOL_REORDER_CODE_SPACE , & status ) ;
max = ucol_getMaxVariable ( coll ) ;
log_verbose ( " ucol_setMaxVariable(space) -> %04x \n " , max ) ;
if ( U_FAILURE ( status ) | | max ! = UCOL_REORDER_CODE_SPACE | |
! ucol_equal ( coll , & nul , 0 , & space , 1 ) | |
ucol_equal ( coll , & nul , 0 , & dot , 1 ) | |
ucol_equal ( coll , & nul , 0 , & degree , 1 ) | |
ucol_equal ( coll , & nul , 0 , & dollar , 1 ) | |
ucol_equal ( coll , & nul , 0 , & zero , 1 ) | |
ucol_greaterOrEqual ( coll , & space , 1 , & dot , 1 ) ) {
log_err ( " ucol_setMaxVariable(space) did not work - %s \n " , u_errorName ( status ) ) ;
}
ucol_setMaxVariable ( coll , UCOL_REORDER_CODE_PUNCTUATION , & status ) ;
max = ucol_getMaxVariable ( coll ) ;
log_verbose ( " ucol_setMaxVariable(punctuation) -> %04x \n " , max ) ;
if ( U_FAILURE ( status ) | | max ! = UCOL_REORDER_CODE_PUNCTUATION | |
! ucol_equal ( coll , & nul , 0 , & space , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & dot , 1 ) | |
ucol_equal ( coll , & nul , 0 , & degree , 1 ) | |
ucol_equal ( coll , & nul , 0 , & dollar , 1 ) | |
ucol_equal ( coll , & nul , 0 , & zero , 1 ) | |
ucol_greaterOrEqual ( coll , & dot , 1 , & degree , 1 ) ) {
log_err ( " ucol_setMaxVariable(punctuation) did not work - %s \n " , u_errorName ( status ) ) ;
}
ucol_setMaxVariable ( coll , UCOL_REORDER_CODE_SYMBOL , & status ) ;
max = ucol_getMaxVariable ( coll ) ;
log_verbose ( " ucol_setMaxVariable(symbol) -> %04x \n " , max ) ;
if ( U_FAILURE ( status ) | | max ! = UCOL_REORDER_CODE_SYMBOL | |
! ucol_equal ( coll , & nul , 0 , & space , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & dot , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & degree , 1 ) | |
ucol_equal ( coll , & nul , 0 , & dollar , 1 ) | |
ucol_equal ( coll , & nul , 0 , & zero , 1 ) | |
ucol_greaterOrEqual ( coll , & degree , 1 , & dollar , 1 ) ) {
log_err ( " ucol_setMaxVariable(symbol) did not work - %s \n " , u_errorName ( status ) ) ;
}
ucol_setMaxVariable ( coll , UCOL_REORDER_CODE_CURRENCY , & status ) ;
max = ucol_getMaxVariable ( coll ) ;
log_verbose ( " ucol_setMaxVariable(currency) -> %04x \n " , max ) ;
if ( U_FAILURE ( status ) | | max ! = UCOL_REORDER_CODE_CURRENCY | |
! ucol_equal ( coll , & nul , 0 , & space , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & dot , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & degree , 1 ) | |
! ucol_equal ( coll , & nul , 0 , & dollar , 1 ) | |
ucol_equal ( coll , & nul , 0 , & zero , 1 ) | |
ucol_greaterOrEqual ( coll , & dollar , 1 , & zero , 1 ) ) {
log_err ( " ucol_setMaxVariable(currency) did not work - %s \n " , u_errorName ( status ) ) ;
}
2003-06-04 06:53:23 +00:00
2014-02-25 21:21:49 +00:00
log_verbose ( " Test restoring maxVariable \n " ) ;
status = U_ZERO_ERROR ;
ucol_setMaxVariable ( coll , oldMax , & status ) ;
if ( oldMax ! = ucol_getMaxVariable ( coll ) ) {
log_err ( " Couldn't restore old maxVariable \n " ) ;
}
log_verbose ( " Testing calling with error set \n " ) ;
status = U_INTERNAL_PROGRAM_ERROR ;
ucol_setMaxVariable ( coll , UCOL_REORDER_CODE_SPACE , & status ) ;
max = ucol_getMaxVariable ( coll ) ;
if ( max ! = oldMax | | status ! = U_INTERNAL_PROGRAM_ERROR ) {
log_err ( " Bad reaction to passed error! \n " ) ;
}
ucol_close ( coll ) ;
2001-06-22 18:35:01 +00:00
}
2002-03-28 18:26:25 +00:00
static void TestNonChars ( void ) {
2001-09-21 21:22:44 +00:00
static const char * test [ ] = {
2010-10-19 21:48:04 +00:00
" \\ u0000 " , /* ignorable */
" \\ uFFFE " , /* special merge-sort character with minimum non-ignorable weights */
" \\ uFDD0 " , " \\ uFDEF " ,
" \\ U0001FFFE " , " \\ U0001FFFF " , /* UCA 6.0: noncharacters are treated like unassigned, */
" \\ U0002FFFE " , " \\ U0002FFFF " , /* not like ignorable. */
2001-09-21 21:22:44 +00:00
" \\ U0003FFFE " , " \\ U0003FFFF " ,
" \\ U0004FFFE " , " \\ U0004FFFF " ,
" \\ U0005FFFE " , " \\ U0005FFFF " ,
" \\ U0006FFFE " , " \\ U0006FFFF " ,
" \\ U0007FFFE " , " \\ U0007FFFF " ,
" \\ U0008FFFE " , " \\ U0008FFFF " ,
" \\ U0009FFFE " , " \\ U0009FFFF " ,
" \\ U000AFFFE " , " \\ U000AFFFF " ,
" \\ U000BFFFE " , " \\ U000BFFFF " ,
" \\ U000CFFFE " , " \\ U000CFFFF " ,
" \\ U000DFFFE " , " \\ U000DFFFF " ,
" \\ U000EFFFE " , " \\ U000EFFFF " ,
" \\ U000FFFFE " , " \\ U000FFFFF " ,
2010-10-19 21:48:04 +00:00
" \\ U0010FFFE " , " \\ U0010FFFF " ,
" \\ uFFFF " /* special character with maximum primary weight */
2001-09-21 21:22:44 +00:00
} ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " en_US " , & status ) ;
log_verbose ( " Test non characters \n " ) ;
if ( U_SUCCESS ( status ) ) {
2010-10-19 21:48:04 +00:00
genericOrderingTestWithResult ( coll , test , 35 , UCOL_LESS ) ;
2001-09-21 21:22:44 +00:00
} else {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Unable to open collator \n " ) ;
2001-09-21 21:22:44 +00:00
}
2001-09-28 16:34:05 +00:00
2001-09-25 21:49:30 +00:00
ucol_close ( coll ) ;
2001-09-21 21:22:44 +00:00
}
2002-03-28 18:26:25 +00:00
static void TestExtremeCompression ( void ) {
2001-11-13 23:41:11 +00:00
static char * test [ 4 ] ;
2003-01-20 07:42:58 +00:00
int32_t j = 0 , i = 0 ;
2001-11-13 22:55:05 +00:00
for ( i = 0 ; i < 4 ; i + + ) {
2002-07-29 21:04:18 +00:00
test [ i ] = ( char * ) malloc ( 2048 * sizeof ( char ) ) ;
2001-11-13 22:55:05 +00:00
}
2003-01-20 07:42:58 +00:00
for ( j = 20 ; j < 500 ; j + + ) {
for ( i = 0 ; i < 4 ; i + + ) {
uprv_memset ( test [ i ] , ' a ' , ( j - 1 ) * sizeof ( char ) ) ;
test [ i ] [ j - 1 ] = ( char ) ( ' a ' + i ) ;
test [ i ] [ j ] = 0 ;
}
genericLocaleStarter ( " en_US " , ( const char * * ) test , 4 ) ;
}
for ( i = 0 ; i < 4 ; i + + ) {
free ( test [ i ] ) ;
}
}
#if 0
static void TestExtremeCompression ( void ) {
static char * test [ 4 ] ;
int32_t j = 0 , i = 0 ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " en_US " , status ) ;
for ( i = 0 ; i < 4 ; i + + ) {
test [ i ] = ( char * ) malloc ( 2048 * sizeof ( char ) ) ;
}
for ( j = 10 ; j < 2048 ; j + + ) {
for ( i = 0 ; i < 4 ; i + + ) {
uprv_memset ( test [ i ] , ' a ' , ( j - 2 ) * sizeof ( char ) ) ;
test [ i ] [ j - 1 ] = ( char ) ( ' a ' + i ) ;
test [ i ] [ j ] = 0 ;
}
}
2001-11-14 06:55:15 +00:00
genericLocaleStarter ( " en_US " , ( const char * * ) test , 4 ) ;
2001-11-13 23:41:11 +00:00
2003-01-20 07:42:58 +00:00
for ( j = 10 ; j < 2048 ; j + + ) {
for ( i = 0 ; i < 1 ; i + + ) {
uprv_memset ( test [ i ] , ' a ' , ( j - 1 ) * sizeof ( char ) ) ;
test [ i ] [ j ] = 0 ;
}
}
2001-11-13 23:41:11 +00:00
for ( i = 0 ; i < 4 ; i + + ) {
2002-07-29 21:04:18 +00:00
free ( test [ i ] ) ;
2001-11-13 23:41:11 +00:00
}
2001-09-21 21:22:44 +00:00
}
2003-01-20 07:42:58 +00:00
# endif
2001-09-21 21:22:44 +00:00
2002-03-28 18:26:25 +00:00
static void TestSurrogates ( void ) {
2001-08-10 22:02:31 +00:00
static const char * test [ ] = {
" z " , " \\ ud900 \\ udc25 " , " \\ ud805 \\ udc50 " ,
" \\ ud800 \\ udc00y " , " \\ ud800 \\ udc00r " ,
" \\ ud800 \\ udc00f " , " \\ ud800 \\ udc00 " ,
" \\ ud800 \\ udc00c " , " \\ ud800 \\ udc00b " ,
" \\ ud800 \\ udc00fa " , " \\ ud800 \\ udc00fb " ,
2001-09-28 16:34:05 +00:00
" \\ ud800 \\ udc00a " ,
2001-08-10 22:02:31 +00:00
" c " , " b "
} ;
2001-09-28 16:34:05 +00:00
static const char * rule =
2001-08-10 22:02:31 +00:00
" &z < \\ ud900 \\ udc25 < \\ ud805 \\ udc50 "
" < \\ ud800 \\ udc00y < \\ ud800 \\ udc00r "
" < \\ ud800 \\ udc00f << \\ ud800 \\ udc00 "
" < \\ ud800 \\ udc00fa << \\ ud800 \\ udc00fb "
" < \\ ud800 \\ udc00a < c < b " ;
genericRulesStarter ( rule , test , 14 ) ;
}
2001-10-06 02:08:12 +00:00
/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2002-03-28 18:26:25 +00:00
static void TestPrefix ( void ) {
2001-10-08 02:26:25 +00:00
uint32_t i ;
2006-10-01 07:12:18 +00:00
static const struct {
2001-10-08 02:26:25 +00:00
const char * rules ;
const char * data [ 50 ] ;
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
{ " &z <<< z|a " ,
2001-10-08 02:26:25 +00:00
{ " zz " , " za " } , 2 } ,
2003-02-20 01:13:36 +00:00
2004-11-11 23:34:58 +00:00
{ " &z <<< z| a " ,
2003-05-27 16:50:03 +00:00
{ " zz " , " za " } , 2 } ,
2001-10-08 02:26:25 +00:00
{ " [strength I] "
" &a= \\ ud900 \\ udc25 "
2004-11-11 23:34:58 +00:00
" &z<<< \\ ud900 \\ udc25|a " ,
2001-10-08 02:26:25 +00:00
{ " aa " , " az " , " \\ ud900 \\ udc25z " , " \\ ud900 \\ udc25a " , " zz " } , 4 } ,
2001-10-06 02:08:12 +00:00
} ;
2001-10-08 02:26:25 +00:00
for ( i = 0 ; i < ( sizeof ( tests ) / sizeof ( tests [ 0 ] ) ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len ) ;
}
2001-10-06 02:08:12 +00:00
}
/* This test uses data suplied by Masashiko Maedera to test the implementation */
/* JIS X 4061 collation order implementation */
2002-03-28 18:26:25 +00:00
static void TestNewJapanese ( void ) {
2001-10-06 02:08:12 +00:00
2006-10-01 07:12:18 +00:00
static const char * const test1 [ ] = {
2001-10-05 02:03:17 +00:00
" \\ u30b7 \\ u30e3 \\ u30fc \\ u30ec " ,
2001-10-02 01:37:00 +00:00
" \\ u30b7 \\ u30e3 \\ u30a4 " ,
" \\ u30b7 \\ u30e4 \\ u30a3 " ,
" \\ u30b7 \\ u30e3 \\ u30ec " ,
" \\ u3061 \\ u3087 \\ u3053 " ,
" \\ u3061 \\ u3088 \\ u3053 " ,
" \\ u30c1 \\ u30e7 \\ u30b3 \\ u30ec \\ u30fc \\ u30c8 " ,
" \\ u3066 \\ u30fc \\ u305f " ,
2003-01-20 07:42:58 +00:00
" \\ u30c6 \\ u30fc \\ u30bf " ,
2001-10-02 01:37:00 +00:00
" \\ u30c6 \\ u30a7 \\ u30bf " ,
" \\ u3066 \\ u3048 \\ u305f " ,
2004-11-11 23:34:58 +00:00
" \\ u3067 \\ u30fc \\ u305f " ,
2001-10-02 01:37:00 +00:00
" \\ u30c7 \\ u30fc \\ u30bf " ,
" \\ u30c7 \\ u30a7 \\ u30bf " ,
" \\ u3067 \\ u3048 \\ u305f " ,
" \\ u3066 \\ u30fc \\ u305f \\ u30fc " ,
" \\ u30c6 \\ u30fc \\ u30bf \\ u30a1 " ,
" \\ u30c6 \\ u30a7 \\ u30bf \\ u30fc " ,
" \\ u3066 \\ u3047 \\ u305f \\ u3041 " ,
" \\ u3066 \\ u3048 \\ u305f \\ u30fc " ,
" \\ u3067 \\ u30fc \\ u305f \\ u30fc " ,
" \\ u30c7 \\ u30fc \\ u30bf \\ u30a1 " ,
" \\ u3067 \\ u30a7 \\ u305f \\ u30a1 " ,
" \\ u30c7 \\ u3047 \\ u30bf \\ u3041 " ,
" \\ u30c7 \\ u30a8 \\ u30bf \\ u30a2 " ,
" \\ u3072 \\ u3086 " ,
" \\ u3073 \\ u3085 \\ u3042 " ,
" \\ u3074 \\ u3085 \\ u3042 " ,
" \\ u3073 \\ u3085 \\ u3042 \\ u30fc " ,
" \\ u30d3 \\ u30e5 \\ u30a2 \\ u30fc " ,
" \\ u3074 \\ u3085 \\ u3042 \\ u30fc " ,
" \\ u30d4 \\ u30e5 \\ u30a2 \\ u30fc " ,
" \\ u30d2 \\ u30e5 \\ u30a6 " ,
" \\ u30d2 \\ u30e6 \\ u30a6 " ,
" \\ u30d4 \\ u30e5 \\ u30a6 \\ u30a2 " ,
2004-11-11 23:34:58 +00:00
" \\ u3073 \\ u3085 \\ u30fc \\ u3042 \\ u30fc " ,
2001-10-02 01:37:00 +00:00
" \\ u30d3 \\ u30e5 \\ u30fc \\ u30a2 \\ u30fc " ,
" \\ u30d3 \\ u30e5 \\ u30a6 \\ u30a2 \\ u30fc " ,
" \\ u3072 \\ u3085 \\ u3093 " ,
" \\ u3074 \\ u3085 \\ u3093 " ,
" \\ u3075 \\ u30fc \\ u308a " ,
" \\ u30d5 \\ u30fc \\ u30ea " ,
" \\ u3075 \\ u3045 \\ u308a " ,
" \\ u3075 \\ u30a5 \\ u308a " ,
" \\ u3075 \\ u30a5 \\ u30ea " ,
" \\ u30d5 \\ u30a6 \\ u30ea " ,
" \\ u3076 \\ u30fc \\ u308a " ,
" \\ u30d6 \\ u30fc \\ u30ea " ,
" \\ u3076 \\ u3045 \\ u308a " ,
" \\ u30d6 \\ u30a5 \\ u308a " ,
" \\ u3077 \\ u3046 \\ u308a " ,
" \\ u30d7 \\ u30a6 \\ u30ea " ,
" \\ u3075 \\ u30fc \\ u308a \\ u30fc " ,
" \\ u30d5 \\ u30a5 \\ u30ea \\ u30fc " ,
" \\ u3075 \\ u30a5 \\ u308a \\ u30a3 " ,
" \\ u30d5 \\ u3045 \\ u308a \\ u3043 " ,
" \\ u30d5 \\ u30a6 \\ u30ea \\ u30fc " ,
" \\ u3075 \\ u3046 \\ u308a \\ u3043 " ,
" \\ u30d6 \\ u30a6 \\ u30ea \\ u30a4 " ,
" \\ u3077 \\ u30fc \\ u308a \\ u30fc " ,
" \\ u3077 \\ u30a5 \\ u308a \\ u30a4 " ,
" \\ u3077 \\ u3046 \\ u308a \\ u30fc " ,
" \\ u30d7 \\ u30a6 \\ u30ea \\ u30a4 " ,
" \\ u30d5 \\ u30fd " ,
" \\ u3075 \\ u309e " ,
" \\ u3076 \\ u309d " ,
" \\ u3076 \\ u3075 " ,
" \\ u3076 \\ u30d5 " ,
" \\ u30d6 \\ u3075 " ,
" \\ u30d6 \\ u30d5 " ,
" \\ u3076 \\ u309e " ,
" \\ u3076 \\ u3077 " ,
2001-10-05 18:01:27 +00:00
" \\ u30d6 \\ u3077 " ,
2001-10-02 01:37:00 +00:00
" \\ u3077 \\ u309d " ,
" \\ u30d7 \\ u30fd " ,
2001-10-05 18:01:27 +00:00
" \\ u3077 \\ u3075 " ,
2001-10-11 21:19:10 +00:00
} ;
2001-10-08 19:32:09 +00:00
static const char * test2 [ ] = {
" \\ u306f \\ u309d " , /* H\\u309d */
2005-09-17 06:26:58 +00:00
" \\ u30cf \\ u30fd " , /* K\\u30fd */
2001-10-08 19:32:09 +00:00
" \\ u306f \\ u306f " , /* HH */
" \\ u306f \\ u30cf " , /* HK */
" \\ u30cf \\ u30cf " , /* KK */
" \\ u306f \\ u309e " , /* H\\u309e */
" \\ u30cf \\ u30fe " , /* K\\u30fe */
" \\ u306f \\ u3070 " , /* HH\\u309b */
" \\ u30cf \\ u30d0 " , /* KK\\u309b */
" \\ u306f \\ u3071 " , /* HH\\u309c */
" \\ u30cf \\ u3071 " , /* KH\\u309c */
" \\ u30cf \\ u30d1 " , /* KK\\u309c */
" \\ u3070 \\ u309d " , /* H\\u309b\\u309d */
" \\ u30d0 \\ u30fd " , /* K\\u309b\\u30fd */
" \\ u3070 \\ u306f " , /* H\\u309bH */
" \\ u30d0 \\ u30cf " , /* K\\u309bK */
" \\ u3070 \\ u309e " , /* H\\u309b\\u309e */
" \\ u30d0 \\ u30fe " , /* K\\u309b\\u30fe */
" \\ u3070 \\ u3070 " , /* H\\u309bH\\u309b */
" \\ u30d0 \\ u3070 " , /* K\\u309bH\\u309b */
" \\ u30d0 \\ u30d0 " , /* K\\u309bK\\u309b */
" \\ u3070 \\ u3071 " , /* H\\u309bH\\u309c */
" \\ u30d0 \\ u30d1 " , /* K\\u309bK\\u309c */
" \\ u3071 \\ u309d " , /* H\\u309c\\u309d */
" \\ u30d1 \\ u30fd " , /* K\\u309c\\u30fd */
" \\ u3071 \\ u306f " , /* H\\u309cH */
" \\ u30d1 \\ u30cf " , /* K\\u309cK */
" \\ u3071 \\ u3070 " , /* H\\u309cH\\u309b */
" \\ u3071 \\ u30d0 " , /* H\\u309cK\\u309b */
" \\ u30d1 \\ u30d0 " , /* K\\u309cK\\u309b */
" \\ u3071 \\ u3071 " , /* H\\u309cH\\u309c */
" \\ u30d1 \\ u30d1 " , /* K\\u309cK\\u309c */
} ;
2001-11-10 06:54:28 +00:00
/*
2001-10-09 15:24:32 +00:00
static const char * test3 [ ] = {
2001-10-11 21:19:10 +00:00
" \\ u221er \\ u221e " ,
" \\ u221eR# " ,
" \\ u221et \\ u221e " ,
" #r \\ u221e " ,
" #R# " ,
" #t% " ,
" #T% " ,
" 8t \\ u221e " ,
" 8T \\ u221e " ,
" 8t# " ,
" 8T# " ,
" 8t% " ,
" 8T% " ,
" 8t8 " ,
" 8T8 " ,
" \\ u03c9r \\ u221e " ,
" \\ u03a9R% " ,
" rr \\ u221e " ,
" rR \\ u221e " ,
" Rr \\ u221e " ,
" RR \\ u221e " ,
" RT% " ,
" rt8 " ,
" tr \\ u221e " ,
" tr8 " ,
" TR8 " ,
" tt8 " ,
" \\ u30b7 \\ u30e3 \\ u30fc \\ u30ec " ,
2001-10-09 15:24:32 +00:00
} ;
2001-11-10 06:54:28 +00:00
*/
2002-09-03 19:43:11 +00:00
static const UColAttribute att [ ] = { UCOL_STRENGTH } ;
static const UColAttributeValue val [ ] = { UCOL_QUATERNARY } ;
2001-11-13 22:55:05 +00:00
2002-09-03 19:43:11 +00:00
static const UColAttribute attShifted [ ] = { UCOL_STRENGTH , UCOL_ALTERNATE_HANDLING } ;
static const UColAttributeValue valShifted [ ] = { UCOL_QUATERNARY , UCOL_SHIFTED } ;
genericLocaleStarterWithOptions ( " ja " , test1 , sizeof ( test1 ) / sizeof ( test1 [ 0 ] ) , att , val , 1 ) ;
genericLocaleStarterWithOptions ( " ja " , test2 , sizeof ( test2 ) / sizeof ( test2 [ 0 ] ) , att , val , 1 ) ;
2001-10-31 23:59:35 +00:00
/*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
2002-09-03 19:43:11 +00:00
genericLocaleStarterWithOptions ( " ja " , test1 , sizeof ( test1 ) / sizeof ( test1 [ 0 ] ) , attShifted , valShifted , 2 ) ;
genericLocaleStarterWithOptions ( " ja " , test2 , sizeof ( test2 ) / sizeof ( test2 [ 0 ] ) , attShifted , valShifted , 2 ) ;
2001-10-11 21:19:10 +00:00
}
2002-03-28 18:26:25 +00:00
static void TestStrCollIdenticalPrefix ( void ) {
2001-10-11 21:19:10 +00:00
const char * rule = " & \\ ud9b0 \\ udc70= \\ ud9b0 \\ udc71 " ;
const char * test [ ] = {
" ab \\ ud9b0 \\ udc70 " ,
" ab \\ ud9b0 \\ udc71 "
} ;
2006-01-28 08:25:52 +00:00
genericRulesStarterWithResult ( rule , test , sizeof ( test ) / sizeof ( test [ 0 ] ) , UCOL_EQUAL ) ;
2001-10-06 02:08:12 +00:00
}
2002-03-07 19:06:00 +00:00
/* Contractions should have all their canonically equivalent */
/* strings included */
2002-03-28 18:26:25 +00:00
static void TestContractionClosure ( void ) {
2006-09-28 08:41:37 +00:00
static const struct {
2001-10-18 22:59:20 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * data [ 10 ] ;
2001-10-18 22:59:20 +00:00
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
2002-03-13 06:04:01 +00:00
{ " &b= \\ u00e4 \\ u00e4 " ,
2001-10-18 22:59:20 +00:00
{ " b " , " \\ u00e4 \\ u00e4 " , " a \\ u0308a \\ u0308 " , " \\ u00e4a \\ u0308 " , " a \\ u0308 \\ u00e4 " } , 5 } ,
2002-03-13 06:04:01 +00:00
{ " &b= \\ u00C5 " ,
2001-10-18 22:59:20 +00:00
{ " b " , " \\ u00C5 " , " A \\ u030A " , " \\ u212B " } , 4 } ,
2002-03-07 19:06:00 +00:00
} ;
uint32_t i ;
for ( i = 0 ; i < ( sizeof ( tests ) / sizeof ( tests [ 0 ] ) ) ; i + + ) {
2006-01-28 08:25:52 +00:00
genericRulesStarterWithResult ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len , UCOL_EQUAL ) ;
2002-03-07 19:06:00 +00:00
}
}
/* This tests also fails*/
2002-03-28 18:26:25 +00:00
static void TestBeforePrefixFailure ( void ) {
2006-09-28 08:41:37 +00:00
static const struct {
2002-03-07 19:06:00 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * data [ 10 ] ;
2002-03-07 19:06:00 +00:00
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
2002-03-07 19:06:00 +00:00
{ " &g <<< a "
" &[before 3] \\ uff41 <<< x " ,
{ " x " , " \\ uff41 " } , 2 } ,
2001-10-18 22:59:20 +00:00
{ " & \\ u30A7= \\ u30A7= \\ u3047= \\ uff6a "
" & \\ u30A8= \\ u30A8= \\ u3048= \\ uff74 "
2004-11-11 23:34:58 +00:00
" &[before 3] \\ u30a7<<< \\ u30a9 " ,
2001-10-18 22:59:20 +00:00
{ " \\ u30a9 " , " \\ u30a7 " } , 2 } ,
{ " &[before 3] \\ u30a7<<< \\ u30a9 "
" & \\ u30A7= \\ u30A7= \\ u3047= \\ uff6a "
" & \\ u30A8= \\ u30A8= \\ u3048= \\ uff74 " ,
{ " \\ u30a9 " , " \\ u30a7 " } , 2 } ,
} ;
uint32_t i ;
for ( i = 0 ; i < ( sizeof ( tests ) / sizeof ( tests [ 0 ] ) ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len ) ;
}
#if 0
2004-11-11 23:34:58 +00:00
const char * rule1 =
2001-10-11 21:19:10 +00:00
" & \\ u30A7= \\ u30A7= \\ u3047= \\ uff6a "
" & \\ u30A8= \\ u30A8= \\ u3048= \\ uff74 "
" &[before 3] \\ u30a7<<< \\ u30c6| \\ u30fc " ;
2004-11-11 23:34:58 +00:00
const char * rule2 =
2001-10-11 21:19:10 +00:00
" &[before 3] \\ u30a7<<< \\ u30c6| \\ u30fc "
" & \\ u30A7= \\ u30A7= \\ u3047= \\ uff6a "
" & \\ u30A8= \\ u30A8= \\ u3048= \\ uff74 " ;
const char * test [ ] = {
2004-11-11 23:34:58 +00:00
" \\ u30c6 \\ u30fc \\ u30bf " ,
2001-10-11 21:19:10 +00:00
" \\ u30c6 \\ u30a7 \\ u30bf " ,
} ;
genericRulesStarter ( rule1 , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
genericRulesStarter ( rule2 , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
2001-10-18 22:59:20 +00:00
/* this piece of code should be in some sort of verbose mode */
2001-10-06 02:08:12 +00:00
/* it gets the collation elements for elements and prints them */
/* This is useful when trying to see whether the problem is */
2004-11-11 23:34:58 +00:00
{
2001-10-11 21:19:10 +00:00
UErrorCode status = U_ZERO_ERROR ;
uint32_t i = 0 ;
UCollationElements * it = NULL ;
uint32_t CE ;
UChar string [ 256 ] ;
uint32_t uStringLen ;
UCollator * coll = NULL ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
uStringLen = u_unescape ( rule1 , string , 256 ) ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
coll = ucol_openRules ( string , uStringLen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
/*coll = ucol_open("ja_JP_JIS", &status);*/
it = ucol_openElements ( coll , string , 0 , & status ) ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
for ( i = 0 ; i < sizeof ( test ) / sizeof ( test [ 0 ] ) ; i + + ) {
log_verbose ( " %s \n " , test [ i ] ) ;
uStringLen = u_unescape ( test [ i ] , string , 256 ) ;
ucol_setText ( it , string , uStringLen , & status ) ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
while ( ( CE = ucol_next ( it , & status ) ) ! = UCOL_NULLORDER ) {
log_verbose ( " %08X \n " , CE ) ;
}
log_verbose ( " \n " ) ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
}
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
ucol_closeElements ( it ) ;
ucol_close ( coll ) ;
}
2001-10-18 22:59:20 +00:00
# endif
2001-10-08 19:32:09 +00:00
}
2002-03-07 19:06:00 +00:00
2002-03-28 18:26:25 +00:00
static void TestPrefixCompose ( void ) {
2004-11-11 23:34:58 +00:00
const char * rule1 =
2001-10-13 16:22:08 +00:00
" & \\ u30a7<<< \\ u30ab| \\ u30fc= \\ u30ac| \\ u30fc " ;
2001-11-10 06:54:28 +00:00
/*
2001-10-13 16:22:08 +00:00
const char * test [ ] = {
2004-11-11 23:34:58 +00:00
" \\ u30c6 \\ u30fc \\ u30bf " ,
2001-10-13 16:22:08 +00:00
" \\ u30c6 \\ u30a7 \\ u30bf " ,
} ;
2001-11-10 06:54:28 +00:00
*/
2004-11-11 23:34:58 +00:00
{
2001-10-13 16:22:08 +00:00
UErrorCode status = U_ZERO_ERROR ;
2001-11-10 06:54:28 +00:00
/*uint32_t i = 0;*/
/*UCollationElements *it = NULL;*/
2001-10-17 02:19:48 +00:00
/* uint32_t CE;*/
2001-10-13 16:22:08 +00:00
UChar string [ 256 ] ;
uint32_t uStringLen ;
UCollator * coll = NULL ;
uStringLen = u_unescape ( rule1 , string , 256 ) ;
coll = ucol_openRules ( string , uStringLen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
ucol_close ( coll ) ;
}
}
2001-10-08 19:32:09 +00:00
2002-03-15 23:51:22 +00:00
/*
2004-11-11 23:34:58 +00:00
[ last variable ] last variable value
[ last primary ignorable ] largest CE for primary ignorable
[ last secondary ignorable ] largest CE for secondary ignorable
[ last tertiary ignorable ] largest CE for tertiary ignorable
[ top ] guaranteed to be above all implicit CEs , for now and in the future ( in 1.8 )
2002-03-15 23:51:22 +00:00
*/
2002-03-28 18:26:25 +00:00
static void TestRuleOptions ( void ) {
2004-11-11 23:34:58 +00:00
/* values here are hardcoded and are correct for the current UCA
* when the UCA changes , one might be forced to change these
2010-10-25 23:06:37 +00:00
* values .
2002-07-11 22:44:26 +00:00
*/
2010-10-25 23:06:37 +00:00
/*
* These strings contain the last character before [ variable top ]
* and the first and second characters ( by primary weights ) after it .
* See FractionalUCA . txt . For example :
[ last variable [ 0 C FE , 05 , 05 ] ] # U + 10 A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
[ variable top = 0 C FE ]
[ first regular [ 0 D 0 A , 05 , 05 ] ] # U + 0060 GRAVE ACCENT
and
00 B4 ; [ 0 D 0 C , 05 , 05 ]
*
* Note : Starting with UCA 6.0 , the [ variable top ] collation element
* is not the weight of any character or string ,
* which means that LAST_VARIABLE_CHAR_STRING sorts before [ last variable ] .
*/
# define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
# define FIRST_REGULAR_CHAR_STRING "\\u0060"
# define SECOND_REGULAR_CHAR_STRING "\\u00B4"
/*
* This string has to match the character that has the [ last regular ] weight
* which changes with each UCA version .
* See the bottom of FractionalUCA . txt which says something like
[ last regular [ 7 A FE , 05 , 05 ] ] # U + 1342 E EGYPTIAN HIEROGLYPH AA032
*
* Note : Starting with UCA 6.0 , the [ last regular ] collation element
* is not the weight of any character or string ,
* which means that LAST_REGULAR_CHAR_STRING sorts before [ last regular ] .
*/
# define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2006-09-28 08:41:37 +00:00
static const struct {
2002-03-15 23:51:22 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * data [ 10 ] ;
2002-03-15 23:51:22 +00:00
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
2014-02-25 21:21:49 +00:00
#if 0
/* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2002-07-11 22:44:26 +00:00
/* - all befores here amount to zero */
2004-11-11 23:34:58 +00:00
{ " &[before 3][first tertiary ignorable]<<<a " ,
2003-07-24 23:23:19 +00:00
{ " \\ u0000 " , " a " } , 2
} , /* you cannot go before first tertiary ignorable */
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
{ " &[before 3][last tertiary ignorable]<<<a " ,
2003-07-24 23:23:19 +00:00
{ " \\ u0000 " , " a " } , 2
} , /* you cannot go before last tertiary ignorable */
2014-02-25 21:21:49 +00:00
# endif
/*
* However , there is a real secondary ignorable ( artificial addition in FractionalUCA . txt ) ,
* and it * is * possible to " go before " that .
*/
2004-11-11 23:34:58 +00:00
{ " &[before 3][first secondary ignorable]<<<a " ,
2003-07-24 23:23:19 +00:00
{ " \\ u0000 " , " a " } , 2
2014-02-25 21:21:49 +00:00
} ,
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
{ " &[before 3][last secondary ignorable]<<<a " ,
2003-07-24 23:23:19 +00:00
{ " \\ u0000 " , " a " } , 2
2014-02-25 21:21:49 +00:00
} ,
2002-07-11 22:44:26 +00:00
/* 'normal' befores */
2014-02-25 21:21:49 +00:00
/*
* Note : With a " SPACE first primary " boundary CE in FractionalUCA . txt ,
* it is not possible to tailor & [ first primary ignorable ] < a or & [ last primary ignorable ] < a
* because there is no tailoring space before that boundary .
* Made the tests work by tailoring to a space instead .
*/
{ " &[before 3][first primary ignorable]<<<c<<<b &' '<a " , /* was &[first primary ignorable]<a */
2003-07-24 23:23:19 +00:00
{ " c " , " b " , " \\ u0332 " , " a " } , 4
} ,
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
/* we don't have a code point that corresponds to
2002-07-11 22:44:26 +00:00
* the last primary ignorable
*/
2014-02-25 21:21:49 +00:00
{ " &[before 3][last primary ignorable]<<<c<<<b &' '<a " , /* was &[last primary ignorable]<a */
2003-07-24 23:23:19 +00:00
{ " \\ u0332 " , " \\ u20e3 " , " c " , " b " , " a " } , 5
2004-11-11 23:34:58 +00:00
} ,
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
{ " &[before 3][first variable]<<<c<<<b &[first variable]<a " ,
2003-07-24 23:23:19 +00:00
{ " c " , " b " , " \\ u0009 " , " a " , " \\ u000a " } , 5
2004-11-11 23:34:58 +00:00
} ,
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
{ " &[last variable]<a &[before 3][last variable]<<<c<<<b " ,
2010-10-19 21:48:04 +00:00
{ LAST_VARIABLE_CHAR_STRING , " c " , " b " , /* [last variable] */ " a " , FIRST_REGULAR_CHAR_STRING } , 5
2004-11-11 23:34:58 +00:00
} ,
2002-07-11 22:44:26 +00:00
{ " &[first regular]<a "
" &[before 1][first regular]<b " ,
2010-10-19 21:48:04 +00:00
{ " b " , FIRST_REGULAR_CHAR_STRING , " a " , SECOND_REGULAR_CHAR_STRING } , 4
2003-07-24 23:23:19 +00:00
} ,
2002-07-11 22:44:26 +00:00
{ " &[before 1][last regular]<b "
" &[last regular]<a " ,
2010-10-19 21:48:04 +00:00
{ LAST_REGULAR_CHAR_STRING , " b " , /* [last regular] */ " a " , " \\ u4e00 " } , 4
2004-11-11 23:34:58 +00:00
} ,
2002-07-11 22:44:26 +00:00
{ " &[before 1][first implicit]<b "
" &[first implicit]<a " ,
2003-07-24 23:23:19 +00:00
{ " b " , " \\ u4e00 " , " a " , " \\ u4e01 " } , 4
} ,
2014-02-25 21:21:49 +00:00
#if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2002-07-11 22:44:26 +00:00
{ " &[before 1][last implicit]<b "
" &[last implicit]<a " ,
2004-01-27 22:17:51 +00:00
{ " b " , " \\ U0010FFFD " , " a " } , 3
2004-11-11 23:34:58 +00:00
} ,
2014-02-25 21:21:49 +00:00
# endif
2002-03-15 23:51:22 +00:00
{ " &[last variable]<z "
2014-02-25 21:21:49 +00:00
" &' '<x " /* was &[last primary ignorable]<x, see above */
2002-03-16 05:02:35 +00:00
" &[last secondary ignorable]<<y "
" &[last tertiary ignorable]<<<w "
" &[top]<u " ,
2010-10-19 21:48:04 +00:00
{ " \\ ufffb " , " w " , " y " , " \\ u20e3 " , " x " , LAST_VARIABLE_CHAR_STRING , " z " , " u " } , 7
2003-07-24 23:23:19 +00:00
}
2002-07-11 22:44:26 +00:00
2002-03-15 23:51:22 +00:00
} ;
uint32_t i ;
for ( i = 0 ; i < ( sizeof ( tests ) / sizeof ( tests [ 0 ] ) ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len ) ;
}
}
2002-10-17 23:12:43 +00:00
2002-10-30 06:09:25 +00:00
static void TestOptimize ( void ) {
2004-11-11 23:34:58 +00:00
/* this is not really a test - just trying out
* whether copying of UCA contents will fail
* Cannot really test , since the functionality
2002-10-30 06:09:25 +00:00
* remains the same .
*/
2006-09-28 08:41:37 +00:00
static const struct {
2002-10-17 23:12:43 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * data [ 10 ] ;
2002-10-17 23:12:43 +00:00
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
2002-10-17 23:12:43 +00:00
/* - all befores here amount to zero */
2004-11-11 23:34:58 +00:00
{ " [optimize [ \\ uAC00- \\ uD7FF]] " ,
{ " a " , " b " } , 2 }
2002-10-17 23:12:43 +00:00
} ;
uint32_t i ;
for ( i = 0 ; i < ( sizeof ( tests ) / sizeof ( tests [ 0 ] ) ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len ) ;
}
}
2003-02-20 01:13:36 +00:00
/*
2004-11-11 23:34:58 +00:00
cycheng @ ca . ibm . c . . . we got inconsistent results when using the UTF - 16 BE iterator and the UTF - 8 iterator .
weiv ucol_strcollIter ?
cycheng @ ca . ibm . c . . . e . g . s1 = 0xfffc0062 , and s2 = d8000021
weiv these are the input strings ?
cycheng @ ca . ibm . c . . . yes , using the utf - 16 iterator and UCA with normalization on , we have s1 > s2
weiv will check - could be a problem with utf - 8 iterator
cycheng @ ca . ibm . c . . . but if we use the utf - 8 iterator , i . e . s1 = efbfbc62 and s2 = eda08021 , we have s1 < s2
weiv hmmm
cycheng @ ca . ibm . c . . . note that we have a standalone high surrogate
weiv that doesn ' t sound right
cycheng @ ca . ibm . c . . . we got the same inconsistent results on AIX and Win2000
weiv so you have two strings , you convert them to utf - 8 and to utf - 16 BE
cycheng @ ca . ibm . c . . . yes
weiv and then do the comparison
cycheng @ ca . ibm . c . . . in one case , the input strings are in utf8 , and in the other case the input strings are in utf - 16 be
weiv utf - 16 strings look like a little endian ones in the example you sent me
weiv It could be a bug - let me try to test it out
cycheng @ ca . ibm . c . . . ok
cycheng @ ca . ibm . c . . . we can wait till the conf . call
cycheng @ ca . ibm . c . . . next weke
weiv that would be great
weiv hmmm
weiv I might be wrong
weiv let me play with it some more
cycheng @ ca . ibm . c . . . ok
cycheng @ ca . ibm . c . . . also please check s3 = 0x0e3a0062 and s4 = 0x0e400021 . both are in utf - 16 be
cycheng @ ca . ibm . c . . . seems with icu 2.2 we have s3 > s4 , but not in icu 2.4 that ' s built for db2
cycheng @ ca . ibm . c . . . also s1 & s2 that I sent you earlier are also in utf - 16 be
weiv ok
cycheng @ ca . ibm . c . . . i ask sherman to send you more inconsistent data
weiv thanks
cycheng @ ca . ibm . c . . . the 4 strings we sent are just samples
2003-02-20 01:13:36 +00:00
*/
2003-05-15 21:28:34 +00:00
#if 0
2003-02-20 01:13:36 +00:00
static void Alexis ( void ) {
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " " , & status ) ;
const char utf16be [ 2 ] [ 4 ] = {
{ ( char ) 0xd8 , ( char ) 0x00 , ( char ) 0x00 , ( char ) 0x21 } ,
{ ( char ) 0xff , ( char ) 0xfc , ( char ) 0x00 , ( char ) 0x62 }
} ;
const char utf8 [ 2 ] [ 4 ] = {
{ ( char ) 0xed , ( char ) 0xa0 , ( char ) 0x80 , ( char ) 0x21 } ,
{ ( char ) 0xef , ( char ) 0xbf , ( char ) 0xbc , ( char ) 0x62 } ,
} ;
UCharIterator iterU161 , iterU162 ;
UCharIterator iterU81 , iterU82 ;
UCollationResult resU16 , resU8 ;
uiter_setUTF16BE ( & iterU161 , utf16be [ 0 ] , 4 ) ;
uiter_setUTF16BE ( & iterU162 , utf16be [ 1 ] , 4 ) ;
uiter_setUTF8 ( & iterU81 , utf8 [ 0 ] , 4 ) ;
uiter_setUTF8 ( & iterU82 , utf8 [ 1 ] , 4 ) ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
resU16 = ucol_strcollIter ( coll , & iterU161 , & iterU162 , & status ) ;
resU8 = ucol_strcollIter ( coll , & iterU81 , & iterU82 , & status ) ;
if ( resU16 ! = resU8 ) {
log_err ( " different results \n " ) ;
}
ucol_close ( coll ) ;
}
2003-05-15 21:28:34 +00:00
# endif
2003-02-20 01:13:36 +00:00
# define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
static void Alexis2 ( void ) {
UErrorCode status = U_ZERO_ERROR ;
UChar U16Source [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] , U16Target [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] ;
char U16BESource [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] , U16BETarget [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] ;
2004-11-11 23:34:58 +00:00
char U8Source [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] , U8Target [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] ;
2003-02-20 01:13:36 +00:00
int32_t U16LenS = 0 , U16LenT = 0 , U16BELenS = 0 , U16BELenT = 0 , U8LenS = 0 , U8LenT = 0 ;
2003-06-04 19:02:41 +00:00
UConverter * conv = NULL ;
2003-02-20 01:13:36 +00:00
UCharIterator U16BEItS , U16BEItT ;
UCharIterator U8ItS , U8ItT ;
UCollationResult resU16 , resU16BE , resU8 ;
2006-09-28 08:41:37 +00:00
static const char * const pairs [ ] [ 2 ] = {
2003-04-30 23:26:55 +00:00
{ " \\ ud800 \\ u0021 " , " \\ uFFFC \\ u0062 " } ,
2003-02-20 01:13:36 +00:00
{ " \\ u0435 \\ u0308 \\ u0334 " , " \\ u0415 \\ u0334 \\ u0340 " } ,
{ " \\ u0E40 \\ u0021 " , " \\ u00A1 \\ u0021 " } ,
{ " \\ u0E40 \\ u0021 " , " \\ uFE57 \\ u0062 " } ,
2003-04-30 23:26:55 +00:00
{ " \\ u5F20 " , " \\ u5F20 \\ u4E00 \\ u8E3F " } ,
{ " \\ u0000 \\ u0020 " , " \\ u0000 \\ u0020 \\ u0000 " } ,
{ " \\ u0020 " , " \\ u0020 \\ u0000 " }
/*
5F 20 ( my result here )
5F 204E008 E3F
5F 20 ( your result here )
*/
2003-02-20 01:13:36 +00:00
} ;
int32_t i = 0 ;
2003-06-04 19:02:41 +00:00
UCollator * coll = ucol_open ( " " , & status ) ;
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
2003-02-20 01:13:36 +00:00
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
2003-06-04 19:02:41 +00:00
conv = ucnv_open ( " UTF16BE " , & status ) ;
2003-02-20 01:13:36 +00:00
for ( i = 0 ; i < sizeof ( pairs ) / sizeof ( pairs [ 0 ] ) ; i + + ) {
U16LenS = u_unescape ( pairs [ i ] [ 0 ] , U16Source , CMSCOLL_ALEXIS2_BUFFER_SIZE ) ;
U16LenT = u_unescape ( pairs [ i ] [ 1 ] , U16Target , CMSCOLL_ALEXIS2_BUFFER_SIZE ) ;
resU16 = ucol_strcoll ( coll , U16Source , U16LenS , U16Target , U16LenT ) ;
2003-04-30 23:26:55 +00:00
log_verbose ( " Result of strcoll is %i \n " , resU16 ) ;
2003-02-20 01:13:36 +00:00
U16BELenS = ucnv_fromUChars ( conv , U16BESource , CMSCOLL_ALEXIS2_BUFFER_SIZE , U16Source , U16LenS , & status ) ;
U16BELenT = ucnv_fromUChars ( conv , U16BETarget , CMSCOLL_ALEXIS2_BUFFER_SIZE , U16Target , U16LenT , & status ) ;
2013-03-21 01:42:01 +00:00
( void ) U16BELenS ; /* Suppress set but not used warnings. */
( void ) U16BELenT ;
2003-02-20 01:13:36 +00:00
2003-04-30 23:26:55 +00:00
/* use the original sizes, as the result from converter is in bytes */
uiter_setUTF16BE ( & U16BEItS , U16BESource , U16LenS ) ;
uiter_setUTF16BE ( & U16BEItT , U16BETarget , U16LenT ) ;
2003-02-20 01:13:36 +00:00
resU16BE = ucol_strcollIter ( coll , & U16BEItS , & U16BEItT , & status ) ;
2003-04-30 23:26:55 +00:00
log_verbose ( " Result of U16BE is %i \n " , resU16BE ) ;
2003-02-20 01:13:36 +00:00
if ( resU16 ! = resU16BE ) {
log_verbose ( " Different results between UTF16 and UTF16BE for %s & %s \n " , pairs [ i ] [ 0 ] , pairs [ i ] [ 1 ] ) ;
}
u_strToUTF8 ( U8Source , CMSCOLL_ALEXIS2_BUFFER_SIZE , & U8LenS , U16Source , U16LenS , & status ) ;
u_strToUTF8 ( U8Target , CMSCOLL_ALEXIS2_BUFFER_SIZE , & U8LenT , U16Target , U16LenT , & status ) ;
uiter_setUTF8 ( & U8ItS , U8Source , U8LenS ) ;
uiter_setUTF8 ( & U8ItT , U8Target , U8LenT ) ;
resU8 = ucol_strcollIter ( coll , & U8ItS , & U8ItT , & status ) ;
if ( resU16 ! = resU8 ) {
log_verbose ( " Different results between UTF16 and UTF8 for %s & %s \n " , pairs [ i ] [ 0 ] , pairs [ i ] [ 1 ] ) ;
}
}
ucol_close ( coll ) ;
2003-02-25 22:22:41 +00:00
ucnv_close ( conv ) ;
2003-02-20 01:13:36 +00:00
}
2003-04-30 23:26:55 +00:00
static void TestHebrewUCA ( void ) {
UErrorCode status = U_ZERO_ERROR ;
2006-09-28 08:41:37 +00:00
static const char * first [ ] = {
2003-04-30 23:26:55 +00:00
" d790d6b8d79cd795d6bcd7a9 " ,
" d790d79cd79ed7a7d799d799d7a1 " ,
" d790d6b4d79ed795d6bcd7a9 " ,
} ;
char utf8String [ 3 ] [ 256 ] ;
UChar utf16String [ 3 ] [ 256 ] ;
int32_t i = 0 , j = 0 ;
int32_t sizeUTF8 [ 3 ] ;
int32_t sizeUTF16 [ 3 ] ;
UCollator * coll = ucol_open ( " " , & status ) ;
2008-02-19 20:10:55 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Could not open UCA collation %s \n " , u_errorName ( status ) ) ;
2008-02-19 20:10:55 +00:00
return ;
}
2003-04-30 23:26:55 +00:00
/*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
for ( i = 0 ; i < sizeof ( first ) / sizeof ( first [ 0 ] ) ; i + + ) {
sizeUTF8 [ i ] = u_parseUTF8 ( first [ i ] , - 1 , utf8String [ i ] , 256 , & status ) ;
u_strFromUTF8 ( utf16String [ i ] , 256 , & sizeUTF16 [ i ] , utf8String [ i ] , sizeUTF8 [ i ] , & status ) ;
log_verbose ( " %i: " ) ;
for ( j = 0 ; j < sizeUTF16 [ i ] ; j + + ) {
/*log_verbose("\\u%04X", utf16String[i][j]);*/
log_verbose ( " %04X " , utf16String [ i ] [ j ] ) ;
}
log_verbose ( " \n " ) ;
}
for ( i = 0 ; i < sizeof ( first ) / sizeof ( first [ 0 ] ) - 1 ; i + + ) {
for ( j = i + 1 ; j < sizeof ( first ) / sizeof ( first [ 0 ] ) ; j + + ) {
doTest ( coll , utf16String [ i ] , utf16String [ j ] , UCOL_LESS ) ;
}
}
ucol_close ( coll ) ;
}
static void TestPartialSortKeyTermination ( void ) {
2006-09-28 08:41:37 +00:00
static const char * cases [ ] = {
2003-04-30 23:26:55 +00:00
" \\ u1234 \\ u1234 \\ udc00 " ,
2004-11-11 23:34:58 +00:00
" \\ udc00 \\ ud800 \\ ud800 "
2003-04-30 23:26:55 +00:00
} ;
2014-02-25 21:21:49 +00:00
int32_t i ;
2003-04-30 23:26:55 +00:00
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " " , & status ) ;
UCharIterator iter ;
UChar currCase [ 256 ] ;
int32_t length = 0 ;
int32_t pKeyLen = 0 ;
uint8_t key [ 256 ] ;
for ( i = 0 ; i < sizeof ( cases ) / sizeof ( cases [ 0 ] ) ; i + + ) {
uint32_t state [ 2 ] = { 0 , 0 } ;
length = u_unescape ( cases [ i ] , currCase , 256 ) ;
uiter_setString ( & iter , currCase , length ) ;
pKeyLen = ucol_nextSortKeyPart ( coll , & iter , state , key , 256 , & status ) ;
2013-03-21 01:42:01 +00:00
( void ) pKeyLen ; /* Suppress set but not used warning. */
2003-04-30 23:26:55 +00:00
log_verbose ( " Done \n " ) ;
}
ucol_close ( coll ) ;
}
static void TestSettings ( void ) {
2006-09-28 08:41:37 +00:00
static const char * cases [ ] = {
2003-04-30 23:26:55 +00:00
" apple " ,
" Apple "
} ;
2006-09-28 08:41:37 +00:00
static const char * locales [ ] = {
2003-04-30 23:26:55 +00:00
" " ,
" en "
} ;
UErrorCode status = U_ZERO_ERROR ;
int32_t i = 0 , j = 0 ;
UChar source [ 256 ] , target [ 256 ] ;
int32_t sLen = 0 , tLen = 0 ;
UCollator * collateObject = NULL ;
for ( i = 0 ; i < sizeof ( locales ) / sizeof ( locales [ 0 ] ) ; i + + ) {
collateObject = ucol_open ( locales [ i ] , & status ) ;
ucol_setStrength ( collateObject , UCOL_PRIMARY ) ;
ucol_setAttribute ( collateObject , UCOL_CASE_LEVEL , UCOL_OFF , & status ) ;
for ( j = 1 ; j < sizeof ( cases ) / sizeof ( cases [ 0 ] ) ; j + + ) {
sLen = u_unescape ( cases [ j - 1 ] , source , 256 ) ;
source [ sLen ] = 0 ;
tLen = u_unescape ( cases [ j ] , target , 256 ) ;
source [ tLen ] = 0 ;
doTest ( collateObject , source , target , UCOL_EQUAL ) ;
}
ucol_close ( collateObject ) ;
}
}
2003-05-01 00:57:27 +00:00
static int32_t TestEqualsForCollator ( const char * locName , UCollator * source , UCollator * target ) {
2008-02-19 08:08:10 +00:00
UErrorCode status = U_ZERO_ERROR ;
int32_t errorNo = 0 ;
2013-09-17 19:48:50 +00:00
const UChar * sourceRules = NULL ;
int32_t sourceRulesLen = 0 ;
UParseError parseError ;
2008-02-19 08:08:10 +00:00
UColAttributeValue french = UCOL_OFF ;
2003-05-01 00:57:27 +00:00
if ( ! ucol_equals ( source , target ) ) {
2008-02-19 08:08:10 +00:00
log_err ( " Same collators, different address not equal \n " ) ;
errorNo + + ;
2003-05-01 00:57:27 +00:00
}
ucol_close ( target ) ;
2009-04-23 00:23:57 +00:00
if ( uprv_strcmp ( ucol_getLocaleByType ( source , ULOC_REQUESTED_LOCALE , & status ) , ucol_getLocaleByType ( source , ULOC_ACTUAL_LOCALE , & status ) ) = = 0 ) {
2013-09-17 19:48:50 +00:00
target = ucol_safeClone ( source , NULL , NULL , & status ) ;
2008-02-19 08:08:10 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " Error creating clone \n " ) ;
errorNo + + ;
return errorNo ;
}
if ( ! ucol_equals ( source , target ) ) {
log_err ( " Collator different from it's clone \n " ) ;
errorNo + + ;
}
french = ucol_getAttribute ( source , UCOL_FRENCH_COLLATION , & status ) ;
if ( french = = UCOL_ON ) {
ucol_setAttribute ( target , UCOL_FRENCH_COLLATION , UCOL_OFF , & status ) ;
} else {
ucol_setAttribute ( target , UCOL_FRENCH_COLLATION , UCOL_ON , & status ) ;
}
if ( U_FAILURE ( status ) ) {
log_err ( " Error setting attributes \n " ) ;
errorNo + + ;
return errorNo ;
}
if ( ucol_equals ( source , target ) ) {
log_err ( " Collators same even when options changed \n " ) ;
errorNo + + ;
}
ucol_close ( target ) ;
2013-09-17 19:48:50 +00:00
2008-02-19 08:08:10 +00:00
sourceRules = ucol_getRules ( source , & sourceRulesLen ) ;
target = ucol_openRules ( sourceRules , sourceRulesLen , UCOL_DEFAULT , UCOL_DEFAULT , & parseError , & status ) ;
if ( U_FAILURE ( status ) ) {
2013-09-17 19:48:50 +00:00
log_err ( " Error instantiating target from rules - %s \n " , u_errorName ( status ) ) ;
errorNo + + ;
return errorNo ;
2008-02-19 08:08:10 +00:00
}
2014-04-01 19:59:27 +00:00
/* Note: The tailoring rule string is an optional data item. */
if ( ! ucol_equals ( source , target ) & & sourceRulesLen ! = 0 ) {
2013-09-17 19:48:50 +00:00
log_err ( " Collator different from collator that was created from the same rules \n " ) ;
errorNo + + ;
2008-02-19 08:08:10 +00:00
}
ucol_close ( target ) ;
2003-05-01 00:57:27 +00:00
}
2008-02-19 08:08:10 +00:00
return errorNo ;
2003-05-01 00:57:27 +00:00
}
static void TestEquals ( void ) {
2008-02-19 08:08:10 +00:00
/* ucol_equals is not currently a public API. There is a chance that it will become
2014-04-01 19:59:27 +00:00
* something like this .
2008-02-19 08:08:10 +00:00
*/
/* test whether the two collators instantiated from the same locale are equal */
UErrorCode status = U_ZERO_ERROR ;
UParseError parseError ;
int32_t noOfLoc = uloc_countAvailable ( ) ;
const char * locName = NULL ;
UCollator * source = NULL , * target = NULL ;
int32_t i = 0 ;
2003-05-01 00:57:27 +00:00
2008-02-19 08:08:10 +00:00
const char * rules [ ] = {
" &l < lj <<< Lj <<< LJ " ,
" &n < nj <<< Nj <<< NJ " ,
" &ae <<< \\ u00e4 " ,
" &AE <<< \\ u00c4 "
} ;
/*
const char * badRules [ ] = {
2003-05-01 17:44:17 +00:00
" &l <<< Lj " ,
2008-02-19 08:08:10 +00:00
" &n < nj <<< nJ <<< NJ " ,
" &a <<< \\ u00e4 " ,
" &AE <<< \\ u00c4 <<< x "
} ;
*/
2003-05-01 17:44:17 +00:00
2008-02-19 08:08:10 +00:00
UChar sourceRules [ 1024 ] , targetRules [ 1024 ] ;
int32_t sourceRulesSize = 0 , targetRulesSize = 0 ;
int32_t rulesSize = sizeof ( rules ) / sizeof ( rules [ 0 ] ) ;
2003-05-01 17:44:17 +00:00
2008-02-19 08:08:10 +00:00
for ( i = 0 ; i < rulesSize ; i + + ) {
sourceRulesSize + = u_unescape ( rules [ i ] , sourceRules + sourceRulesSize , 1024 - sourceRulesSize ) ;
targetRulesSize + = u_unescape ( rules [ rulesSize - i - 1 ] , targetRules + targetRulesSize , 1024 - targetRulesSize ) ;
}
2003-05-01 17:44:17 +00:00
2008-02-19 08:08:10 +00:00
source = ucol_openRules ( sourceRules , sourceRulesSize , UCOL_DEFAULT , UCOL_DEFAULT , & parseError , & status ) ;
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
target = ucol_openRules ( targetRules , targetRulesSize , UCOL_DEFAULT , UCOL_DEFAULT , & parseError , & status ) ;
if ( ! ucol_equals ( source , target ) ) {
log_err ( " Equivalent collators not equal! \n " ) ;
}
ucol_close ( source ) ;
ucol_close ( target ) ;
2003-05-01 00:57:27 +00:00
2008-02-19 08:08:10 +00:00
source = ucol_open ( " root " , & status ) ;
target = ucol_open ( " root " , & status ) ;
log_verbose ( " Testing root \n " ) ;
if ( ! ucol_equals ( source , source ) ) {
log_err ( " Same collator not equal \n " ) ;
}
if ( TestEqualsForCollator ( locName , source , target ) ) {
log_err ( " Errors for root \n " , locName ) ;
}
ucol_close ( source ) ;
for ( i = 0 ; i < noOfLoc ; i + + ) {
status = U_ZERO_ERROR ;
locName = uloc_getAvailable ( i ) ;
/*if(hasCollationElements(locName)) {*/
log_verbose ( " Testing equality for locale %s \n " , locName ) ;
source = ucol_open ( locName , & status ) ;
target = ucol_open ( locName , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator for locale %s %s \n " , locName , u_errorName ( status ) ) ;
continue ;
}
if ( TestEqualsForCollator ( locName , source , target ) ) {
log_err ( " Errors for locale %s \n " , locName ) ;
}
ucol_close ( source ) ;
/*}*/
}
2003-05-29 21:15:26 +00:00
}
2003-05-01 00:57:27 +00:00
2003-05-29 21:15:26 +00:00
static void TestJ2726 ( void ) {
2008-02-19 08:08:10 +00:00
UChar a [ 2 ] = { 0x61 , 0x00 } ; /*"a"*/
UChar aSpace [ 3 ] = { 0x61 , 0x20 , 0x00 } ; /*"a "*/
UChar spaceA [ 3 ] = { 0x20 , 0x61 , 0x00 } ; /*" a"*/
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " en " , & status ) ;
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , UCOL_SHIFTED , & status ) ;
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_PRIMARY , & status ) ;
doTest ( coll , a , aSpace , UCOL_EQUAL ) ;
doTest ( coll , aSpace , a , UCOL_EQUAL ) ;
doTest ( coll , a , spaceA , UCOL_EQUAL ) ;
doTest ( coll , spaceA , a , UCOL_EQUAL ) ;
doTest ( coll , spaceA , aSpace , UCOL_EQUAL ) ;
doTest ( coll , aSpace , spaceA , UCOL_EQUAL ) ;
ucol_close ( coll ) ;
2003-05-01 00:57:27 +00:00
}
2003-07-22 16:49:56 +00:00
static void NullRule ( void ) {
2008-02-19 08:08:10 +00:00
UChar r [ 3 ] = { 0 } ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_openRules ( r , 1 , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
if ( U_SUCCESS ( status ) ) {
log_err ( " This should have been an error! \n " ) ;
ucol_close ( coll ) ;
} else {
status = U_ZERO_ERROR ;
}
coll = ucol_openRules ( r , 0 , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Empty rules should have produced a valid collator -> %s \n " , u_errorName ( status ) ) ;
2008-02-19 08:08:10 +00:00
} else {
ucol_close ( coll ) ;
}
2003-07-22 16:49:56 +00:00
}
2003-08-18 22:12:04 +00:00
/**
* Test for CollationElementIterator previous and next for the whole set of
* unicode characters with normalization on .
*/
static void TestNumericCollation ( void )
{
UErrorCode status = U_ZERO_ERROR ;
2004-11-11 23:34:58 +00:00
const static char * basicTestStrings [ ] = {
" hello1 " ,
" hello2 " ,
" hello2002 " ,
" hello2003 " ,
" hello123456 " ,
" hello1234567 " ,
" hello10000000 " ,
" hello100000000 " ,
" hello1000000000 " ,
" hello10000000000 " ,
} ;
const static char * preZeroTestStrings [ ] = {
" avery10000 " ,
" avery010000 " ,
" avery0010000 " ,
" avery00010000 " ,
" avery000010000 " ,
" avery0000010000 " ,
" avery00000010000 " ,
" avery000000010000 " ,
} ;
const static char * thirtyTwoBitNumericStrings [ ] = {
" avery42949672960 " ,
" avery42949672961 " ,
" avery42949672962 " ,
" avery429496729610 "
2009-01-29 07:44:27 +00:00
} ;
const static char * longNumericStrings [ ] = {
/* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
In fact , a single collation element can represent a maximum of 254 digits as a number . Digit strings longer than that
are treated as multiple collation elements . */
" num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z " , /*253digits, num + 9.23E252 + z */
" num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 " , /*254digits, num + 1.00E253 */
" num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 " , /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
" num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234 " , /*254digits, num + 1.23E253 */
" num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 " , /*255digits, num + 1.23E253 + 5 */
" num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456 " , /*256digits, num + 1.23E253 + 56 */
" num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 " , /*257digits, num + 1.23E253 + 567 */
" num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a " , /*254digits, num + 1.23E253 + a, out of numeric order but expected */
" num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234 " , /*254digits, num + 9.23E253, out of numeric order but expected */
" num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a " , /*254digits, num + 9.23E253 + a, out of numeric order but expected */
2004-11-11 23:34:58 +00:00
} ;
2003-08-18 22:12:04 +00:00
const static char * supplementaryDigits [ ] = {
" \\ uD835 \\ uDFCE " , /* 0 */
" \\ uD835 \\ uDFCF " , /* 1 */
" \\ uD835 \\ uDFD0 " , /* 2 */
" \\ uD835 \\ uDFD1 " , /* 3 */
" \\ uD835 \\ uDFCF \\ uD835 \\ uDFCE " , /* 10 */
" \\ uD835 \\ uDFCF \\ uD835 \\ uDFCF " , /* 11 */
" \\ uD835 \\ uDFCF \\ uD835 \\ uDFD0 " , /* 12 */
" \\ uD835 \\ uDFD0 \\ uD835 \\ uDFCE " , /* 20 */
" \\ uD835 \\ uDFD0 \\ uD835 \\ uDFCF " , /* 21 */
" \\ uD835 \\ uDFD0 \\ uD835 \\ uDFD0 " /* 22 */
} ;
const static char * foreignDigits [ ] = {
" \\ u0661 " ,
" \\ u0662 " ,
" \\ u0663 " ,
" \\ u0661 \\ u0660 " ,
" \\ u0661 \\ u0662 " ,
" \\ u0661 \\ u0663 " ,
" \\ u0662 \\ u0660 " ,
" \\ u0662 \\ u0662 " ,
" \\ u0662 \\ u0663 " ,
" \\ u0663 \\ u0660 " ,
" \\ u0663 \\ u0662 " ,
" \\ u0663 \\ u0663 "
} ;
2003-09-17 04:02:08 +00:00
const static char * evenZeroes [ ] = {
2004-02-12 08:32:34 +00:00
" 2000 " ,
2003-09-17 04:02:08 +00:00
" 2001 " ,
" 2002 " ,
" 2003 "
} ;
2003-08-18 22:12:04 +00:00
UColAttribute att = UCOL_NUMERIC_COLLATION ;
UColAttributeValue val = UCOL_ON ;
2004-11-11 23:34:58 +00:00
/* Open our collator. */
2003-08-18 22:12:04 +00:00
UCollator * coll = ucol_open ( " root " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " ERROR: in using ucol_open() -> %s \n " ,
2003-08-18 22:12:04 +00:00
myErrorName ( status ) ) ;
return ;
}
genericLocaleStarterWithOptions ( " root " , basicTestStrings , sizeof ( basicTestStrings ) / sizeof ( basicTestStrings [ 0 ] ) , & att , & val , 1 ) ;
genericLocaleStarterWithOptions ( " root " , thirtyTwoBitNumericStrings , sizeof ( thirtyTwoBitNumericStrings ) / sizeof ( thirtyTwoBitNumericStrings [ 0 ] ) , & att , & val , 1 ) ;
2009-01-29 07:44:27 +00:00
genericLocaleStarterWithOptions ( " root " , longNumericStrings , sizeof ( longNumericStrings ) / sizeof ( longNumericStrings [ 0 ] ) , & att , & val , 1 ) ;
2004-01-09 07:55:22 +00:00
genericLocaleStarterWithOptions ( " en_US " , foreignDigits , sizeof ( foreignDigits ) / sizeof ( foreignDigits [ 0 ] ) , & att , & val , 1 ) ;
2004-11-11 23:34:58 +00:00
genericLocaleStarterWithOptions ( " root " , supplementaryDigits , sizeof ( supplementaryDigits ) / sizeof ( supplementaryDigits [ 0 ] ) , & att , & val , 1 ) ;
genericLocaleStarterWithOptions ( " root " , evenZeroes , sizeof ( evenZeroes ) / sizeof ( evenZeroes [ 0 ] ) , & att , & val , 1 ) ;
2003-08-18 22:12:04 +00:00
2004-11-11 23:34:58 +00:00
/* Setting up our collator to do digits. */
ucol_setAttribute ( coll , UCOL_NUMERIC_COLLATION , UCOL_ON , & status ) ;
2003-08-18 22:12:04 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute \n %s \n " ,
myErrorName ( status ) ) ;
return ;
}
2004-11-11 23:34:58 +00:00
/*
Testing that prepended zeroes still yield the correct collation behavior .
2003-08-18 22:12:04 +00:00
We expect that every element in our strings array will be equal .
*/
2004-02-12 08:32:34 +00:00
genericOrderingTestWithResult ( coll , preZeroTestStrings , sizeof ( preZeroTestStrings ) / sizeof ( preZeroTestStrings [ 0 ] ) , UCOL_EQUAL ) ;
2004-11-11 23:34:58 +00:00
2003-08-18 22:12:04 +00:00
ucol_close ( coll ) ;
}
2004-11-11 23:34:58 +00:00
static void TestTibetanConformance ( void )
{
const char * test [ ] = {
" \\ u0FB2 \\ u0591 \\ u0F71 \\ u0061 " ,
2003-11-12 20:45:53 +00:00
" \\ u0FB2 \\ u0F71 \\ u0061 "
} ;
2004-11-11 23:34:58 +00:00
2003-11-12 20:45:53 +00:00
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " " , & status ) ;
UChar source [ 100 ] ;
UChar target [ 100 ] ;
int result ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
if ( U_SUCCESS ( status ) ) {
u_unescape ( test [ 0 ] , source , 100 ) ;
u_unescape ( test [ 1 ] , target , 100 ) ;
2003-11-24 19:40:46 +00:00
doTest ( coll , source , target , UCOL_EQUAL ) ;
2003-11-12 20:45:53 +00:00
result = ucol_strcoll ( coll , source , - 1 , target , - 1 ) ;
2003-12-08 19:01:55 +00:00
log_verbose ( " result %d \n " , result ) ;
2003-11-12 20:45:53 +00:00
if ( UCOL_EQUAL ! = result ) {
2004-11-11 23:34:58 +00:00
log_err ( " Tibetan comparison error \n " ) ;
2003-11-12 20:45:53 +00:00
}
}
ucol_close ( coll ) ;
genericLocaleStarterWithResult ( " " , test , 2 , UCOL_EQUAL ) ;
}
2003-08-18 22:12:04 +00:00
2004-01-16 07:14:08 +00:00
static void TestPinyinProblem ( void ) {
2004-01-10 00:22:37 +00:00
static const char * test [ ] = { " \\ u4E56 \\ u4E56 \\ u7761 " , " \\ u4E56 \\ u5B69 \\ u5B50 " } ;
2004-01-09 07:57:34 +00:00
genericLocaleStarter ( " zh__PINYIN " , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
}
2004-04-28 05:31:19 +00:00
/**
* Iterate through the given iterator , checking to see that all the strings
* in the expected array are present .
* @ param expected array of strings we expect to see , or NULL
* @ param expectedCount number of elements of expected , or 0
*/
static int32_t checkUEnumeration ( const char * msg ,
UEnumeration * iter ,
const char * * expected ,
int32_t expectedCount ) {
UErrorCode ec = U_ZERO_ERROR ;
int32_t i = 0 , n , j , bit ;
int32_t seenMask = 0 ;
U_ASSERT ( expectedCount > = 0 & & expectedCount < 31 ) ; /* [sic] 31 not 32 */
n = uenum_count ( iter , & ec ) ;
if ( ! assertSuccess ( " count " , & ec ) ) return - 1 ;
log_verbose ( " %s = [ " , msg ) ;
for ( ; ; + + i ) {
const char * s = uenum_next ( iter , NULL , & ec ) ;
if ( ! assertSuccess ( " snext " , & ec ) | | s = = NULL ) break ;
if ( i ! = 0 ) log_verbose ( " , " ) ;
log_verbose ( " %s " , s ) ;
/* check expected list */
for ( j = 0 , bit = 1 ; j < expectedCount ; + + j , bit < < = 1 ) {
if ( ( seenMask & bit ) = = 0 & &
uprv_strcmp ( s , expected [ j ] ) = = 0 ) {
seenMask | = bit ;
break ;
}
}
}
log_verbose ( " ] (%d) \n " , i ) ;
assertTrue ( " count verified " , i = = n ) ;
/* did we see all expected strings? */
for ( j = 0 , bit = 1 ; j < expectedCount ; + + j , bit < < = 1 ) {
if ( ( seenMask & bit ) ! = 0 ) {
log_verbose ( " Ok: \" %s \" seen \n " , expected [ j ] ) ;
} else {
log_err ( " FAIL: \" %s \" not seen \n " , expected [ j ] ) ;
}
}
return n ;
}
/**
* Test new API added for separate collation tree .
*/
static void TestSeparateTrees ( void ) {
UErrorCode ec = U_ZERO_ERROR ;
UEnumeration * e = NULL ;
int32_t n = - 1 ;
UBool isAvailable ;
char loc [ 256 ] ;
static const char * AVAIL [ ] = { " en " , " de " } ;
static const char * KW [ ] = { " collation " } ;
static const char * KWVAL [ ] = { " phonebook " , " stroke " } ;
2004-07-18 02:02:06 +00:00
# if !UCONFIG_NO_SERVICE
2004-04-28 05:31:19 +00:00
e = ucol_openAvailableLocales ( & ec ) ;
2009-06-09 21:28:13 +00:00
if ( e ! = NULL ) {
assertSuccess ( " ucol_openAvailableLocales " , & ec ) ;
assertTrue ( " ucol_openAvailableLocales!=0 " , e ! = 0 ) ;
n = checkUEnumeration ( " ucol_openAvailableLocales " , e , AVAIL , LEN ( AVAIL ) ) ;
2013-03-21 01:42:01 +00:00
( void ) n ; /* Suppress set but not used warnings. */
2009-06-09 21:28:13 +00:00
/* Don't need to check n because we check list */
uenum_close ( e ) ;
} else {
log_data_err ( " Error calling ucol_openAvailableLocales() -> %s (Are you missing data?) \n " , u_errorName ( ec ) ) ;
}
2004-07-18 02:02:06 +00:00
# endif
2004-04-28 05:31:19 +00:00
e = ucol_getKeywords ( & ec ) ;
2009-06-09 21:28:13 +00:00
if ( e ! = NULL ) {
assertSuccess ( " ucol_getKeywords " , & ec ) ;
assertTrue ( " ucol_getKeywords!=0 " , e ! = 0 ) ;
n = checkUEnumeration ( " ucol_getKeywords " , e , KW , LEN ( KW ) ) ;
/* Don't need to check n because we check list */
uenum_close ( e ) ;
} else {
log_data_err ( " Error calling ucol_getKeywords() -> %s (Are you missing data?) \n " , u_errorName ( ec ) ) ;
}
2004-04-28 05:31:19 +00:00
e = ucol_getKeywordValues ( KW [ 0 ] , & ec ) ;
2009-06-09 21:28:13 +00:00
if ( e ! = NULL ) {
assertSuccess ( " ucol_getKeywordValues " , & ec ) ;
assertTrue ( " ucol_getKeywordValues!=0 " , e ! = 0 ) ;
n = checkUEnumeration ( " ucol_getKeywordValues " , e , KWVAL , LEN ( KWVAL ) ) ;
/* Don't need to check n because we check list */
uenum_close ( e ) ;
} else {
log_data_err ( " Error calling ucol_getKeywordValues() -> %s (Are you missing data?) \n " , u_errorName ( ec ) ) ;
}
2004-04-28 05:31:19 +00:00
2004-05-25 05:43:23 +00:00
/* Try setting a warning before calling ucol_getKeywordValues */
ec = U_USING_FALLBACK_WARNING ;
e = ucol_getKeywordValues ( KW [ 0 ] , & ec ) ;
2009-06-09 21:28:13 +00:00
if ( assertSuccess ( " ucol_getKeywordValues [with warning code set] " , & ec ) ) {
assertTrue ( " ucol_getKeywordValues!=0 [with warning code set] " , e ! = 0 ) ;
n = checkUEnumeration ( " ucol_getKeywordValues [with warning code set] " , e , KWVAL , LEN ( KWVAL ) ) ;
/* Don't need to check n because we check list */
uenum_close ( e ) ;
}
2004-05-25 05:43:23 +00:00
2004-04-28 05:31:19 +00:00
/*
U_DRAFT int32_t U_EXPORT2
ucol_getFunctionalEquivalent ( char * result , int32_t resultCapacity ,
const char * locale , UBool * isAvailable ,
UErrorCode * status ) ;
}
*/
2010-10-19 03:31:50 +00:00
n = ucol_getFunctionalEquivalent ( loc , sizeof ( loc ) , " collation " , " de " ,
2004-04-28 05:31:19 +00:00
& isAvailable , & ec ) ;
2009-06-09 21:28:13 +00:00
if ( assertSuccess ( " getFunctionalEquivalent " , & ec ) ) {
2013-09-07 20:46:42 +00:00
assertEquals ( " getFunctionalEquivalent(de) " , " root " , loc ) ;
2010-10-19 03:31:50 +00:00
assertTrue ( " getFunctionalEquivalent(de).isAvailable==TRUE " ,
2009-06-09 21:28:13 +00:00
isAvailable = = TRUE ) ;
}
2004-11-11 23:34:58 +00:00
2010-10-19 03:31:50 +00:00
n = ucol_getFunctionalEquivalent ( loc , sizeof ( loc ) , " collation " , " de_DE " ,
2004-04-28 05:31:19 +00:00
& isAvailable , & ec ) ;
2009-06-09 21:28:13 +00:00
if ( assertSuccess ( " getFunctionalEquivalent " , & ec ) ) {
2013-09-07 20:46:42 +00:00
assertEquals ( " getFunctionalEquivalent(de_DE) " , " root " , loc ) ;
2014-03-12 04:34:00 +00:00
assertTrue ( " getFunctionalEquivalent(de_DE).isAvailable==FALSE " ,
isAvailable = = FALSE ) ;
2009-06-09 21:28:13 +00:00
}
2004-04-28 05:31:19 +00:00
}
2004-05-14 07:10:56 +00:00
/* supercedes TestJ784 */
2004-05-08 07:59:36 +00:00
static void TestBeforePinyin ( void ) {
2004-11-11 23:34:58 +00:00
const static char rules [ ] = {
2004-06-03 22:08:39 +00:00
" &[before 2]A<< \\ u0101<<< \\ u0100<< \\ u00E1<<< \\ u00C1<< \\ u01CE<<< \\ u01CD<< \\ u00E0<<< \\ u00C0 "
" &[before 2]e<< \\ u0113<<< \\ u0112<< \\ u00E9<<< \\ u00C9<< \\ u011B<<< \\ u011A<< \\ u00E8<<< \\ u00C8 "
" &[before 2]i<< \\ u012B<<< \\ u012A<< \\ u00ED<<< \\ u00CD<< \\ u01D0<<< \\ u01CF<< \\ u00EC<<< \\ u00CC "
" &[before 2]o<< \\ u014D<<< \\ u014C<< \\ u00F3<<< \\ u00D3<< \\ u01D2<<< \\ u01D1<< \\ u00F2<<< \\ u00D2 "
" &[before 2]u<< \\ u016B<<< \\ u016A<< \\ u00FA<<< \\ u00DA<< \\ u01D4<<< \\ u01D3<< \\ u00F9<<< \\ u00D9 "
" &U<< \\ u01D6<<< \\ u01D5<< \\ u01D8<<< \\ u01D7<< \\ u01DA<<< \\ u01D9<< \\ u01DC<<< \\ u01DB<< \\ u00FC "
2004-05-08 07:59:36 +00:00
} ;
2004-05-24 22:17:31 +00:00
const static char * test [ ] = {
" l \\ u0101 " ,
" la " ,
" l \\ u0101n " ,
2004-05-08 07:59:36 +00:00
" lan " ,
2004-05-24 22:17:31 +00:00
" l \\ u0113 " ,
" le " ,
" l \\ u0113n " ,
" len "
2004-05-08 07:59:36 +00:00
} ;
2004-05-24 22:17:31 +00:00
const static char * test2 [ ] = {
2004-05-24 22:07:40 +00:00
" x \\ u0101 " ,
" x \\ u0100 " ,
" X \\ u0101 " ,
" X \\ u0100 " ,
" x \\ u00E1 " ,
" x \\ u00C1 " ,
" X \\ u00E1 " ,
" X \\ u00C1 " ,
" x \\ u01CE " ,
" x \\ u01CD " ,
" X \\ u01CE " ,
" X \\ u01CD " ,
" x \\ u00E0 " ,
" x \\ u00C0 " ,
" X \\ u00E0 " ,
" X \\ u00C0 " ,
" xa " ,
" xA " ,
" Xa " ,
" XA " ,
" x \\ u0101x " ,
" x \\ u0100x " ,
" x \\ u00E1x " ,
" x \\ u00C1x " ,
" x \\ u01CEx " ,
" x \\ u01CDx " ,
" x \\ u00E0x " ,
" x \\ u00C0x " ,
" xax " ,
" xAx "
} ;
2004-05-24 22:17:31 +00:00
2004-05-08 07:59:36 +00:00
genericRulesStarter ( rules , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
2004-05-14 07:10:56 +00:00
genericLocaleStarter ( " zh " , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
2004-05-24 22:07:40 +00:00
genericRulesStarter ( rules , test2 , sizeof ( test2 ) / sizeof ( test2 [ 0 ] ) ) ;
genericLocaleStarter ( " zh " , test2 , sizeof ( test2 ) / sizeof ( test2 [ 0 ] ) ) ;
2004-05-08 07:59:36 +00:00
}
static void TestBeforeTightening ( void ) {
2006-10-01 07:12:18 +00:00
static const struct {
2004-05-08 07:59:36 +00:00
const char * rules ;
UErrorCode expectedStatus ;
} tests [ ] = {
{ " &[before 1]a<x " , U_ZERO_ERROR } ,
{ " &[before 1]a<<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 1]a<<<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 1]a=x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 2]a<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 2]a<<x " , U_ZERO_ERROR } ,
{ " &[before 2]a<<<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 2]a=x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 3]a<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 3]a<<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 3]a<<<x " , U_ZERO_ERROR } ,
{ " &[before 3]a=x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before I]a = x " , U_INVALID_FORMAT_ERROR }
} ;
int32_t i = 0 ;
UErrorCode status = U_ZERO_ERROR ;
UChar rlz [ RULE_BUFFER_LEN ] = { 0 } ;
uint32_t rlen = 0 ;
UCollator * coll = NULL ;
for ( i = 0 ; i < sizeof ( tests ) / sizeof ( tests [ 0 ] ) ; i + + ) {
rlen = u_unescape ( tests [ i ] . rules , rlz , RULE_BUFFER_LEN ) ;
coll = ucol_openRules ( rlz , rlen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
if ( status ! = tests [ i ] . expectedStatus ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Opening a collator with rules %s returned error code %s, expected %s \n " ,
2004-05-08 07:59:36 +00:00
tests [ i ] . rules , u_errorName ( status ) , u_errorName ( tests [ i ] . expectedStatus ) ) ;
}
ucol_close ( coll ) ;
status = U_ZERO_ERROR ;
}
}
2012-08-15 17:46:17 +00:00
/*
2004-05-08 07:59:36 +00:00
& m < a
& [ before 1 ] a < x < < < X < < q < < < Q < z
assert : m < < < M < x < < < X < < q < < < Q < z < a < n
& m < a
& [ before 2 ] a < < x < < < X < < q < < < Q < z
assert : m < < < M < x < < < X < < q < < < Q < < a < z < n
& m < a
& [ before 3 ] a < < < x < < < X < < q < < < Q < z
assert : m < < < M < x < < < X < < < a < < q < < < Q < z < n
& m < < a
& [ before 1 ] a < x < < < X < < q < < < Q < z
assert : x < < < X < < q < < < Q < z < m < < < M < < a < n
& m < < a
& [ before 2 ] a < < x < < < X < < q < < < Q < z
assert : m < < < M < < x < < < X < < q < < < Q < < a < z < n
& m < < a
& [ before 3 ] a < < < x < < < X < < q < < < Q < z
assert : m < < < M < < x < < < X < < < a < < q < < < Q < z < n
& m < < < a
& [ before 1 ] a < x < < < X < < q < < < Q < z
assert : x < < < X < < q < < < Q < z < n < m < < < a < < < M
& m < < < a
& [ before 2 ] a < < x < < < X < < q < < < Q < z
assert : x < < < X < < q < < < Q < < m < < < a < < < M < z < n
& m < < < a
& [ before 3 ] a < < < x < < < X < < q < < < Q < z
assert : m < < < x < < < X < < < a < < < M < < q < < < Q < z < n
& [ before 1 ] s < x < < < X < < q < < < Q < z
assert : r < < < R < x < < < X < < q < < < Q < z < s < n
& [ before 2 ] s < < x < < < X < < q < < < Q < z
assert : r < < < R < x < < < X < < q < < < Q < < s < z < n
& [ before 3 ] s < < < x < < < X < < q < < < Q < z
assert : r < < < R < x < < < X < < < s < < q < < < Q < z < n
& [ before 1 ] \ u24DC < x < < < X < < q < < < Q < z
assert : x < < < X < < q < < < Q < z < n < m < < < \ u24DC < < < M
& [ before 2 ] \ u24DC < < x < < < X < < q < < < Q < z
assert : x < < < X < < q < < < Q < < m < < < \ u24DC < < < M < z < n
& [ before 3 ] \ u24DC < < < x < < < X < < q < < < Q < z
assert : m < < < x < < < X < < < \ u24DC < < < M < < q < < < Q < z < n
2012-08-15 17:46:17 +00:00
*/
2004-05-08 07:59:36 +00:00
2004-06-03 22:08:39 +00:00
#if 0
/* requires features not yet supported */
2004-05-08 07:59:36 +00:00
static void TestMoreBefore ( void ) {
2006-09-28 08:41:37 +00:00
static const struct {
2004-05-08 07:59:36 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * order [ 16 ] ;
2004-05-08 07:59:36 +00:00
int32_t size ;
} tests [ ] = {
{ " &m < a &[before 1] a < x <<< X << q <<< Q < z " ,
{ " m " , " M " , " x " , " X " , " q " , " Q " , " z " , " a " , " n " } , 9 } ,
{ " &m < a &[before 2] a << x <<< X << q <<< Q < z " ,
{ " m " , " M " , " x " , " X " , " q " , " Q " , " a " , " z " , " n " } , 9 } ,
{ " &m < a &[before 3] a <<< x <<< X << q <<< Q < z " ,
{ " m " , " M " , " x " , " X " , " a " , " q " , " Q " , " z " , " n " } , 9 } ,
{ " &m << a &[before 1] a < x <<< X << q <<< Q < z " ,
{ " x " , " X " , " q " , " Q " , " z " , " m " , " M " , " a " , " n " } , 9 } ,
{ " &m << a &[before 2] a << x <<< X << q <<< Q < z " ,
{ " m " , " M " , " x " , " X " , " q " , " Q " , " a " , " z " , " n " } , 9 } ,
{ " &m << a &[before 3] a <<< x <<< X << q <<< Q < z " ,
{ " m " , " M " , " x " , " X " , " a " , " q " , " Q " , " z " , " n " } , 9 } ,
2004-11-11 23:34:58 +00:00
{ " &m <<< a &[before 1] a < x <<< X << q <<< Q < z " ,
2004-05-08 07:59:36 +00:00
{ " x " , " X " , " q " , " Q " , " z " , " n " , " m " , " a " , " M " } , 9 } ,
2004-11-11 23:34:58 +00:00
{ " &m <<< a &[before 2] a << x <<< X << q <<< Q < z " ,
2004-05-08 07:59:36 +00:00
{ " x " , " X " , " q " , " Q " , " m " , " a " , " M " , " z " , " n " } , 9 } ,
{ " &m <<< a &[before 3] a <<< x <<< X << q <<< Q < z " ,
{ " m " , " x " , " X " , " a " , " M " , " q " , " Q " , " z " , " n " } , 9 } ,
{ " &[before 1] s < x <<< X << q <<< Q < z " ,
{ " r " , " R " , " x " , " X " , " q " , " Q " , " z " , " s " , " n " } , 9 } ,
{ " &[before 2] s << x <<< X << q <<< Q < z " ,
{ " r " , " R " , " x " , " X " , " q " , " Q " , " s " , " z " , " n " } , 9 } ,
2004-11-11 23:34:58 +00:00
{ " &[before 3] s <<< x <<< X << q <<< Q < z " ,
2004-05-08 07:59:36 +00:00
{ " r " , " R " , " x " , " X " , " s " , " q " , " Q " , " z " , " n " } , 9 } ,
2004-05-19 00:31:51 +00:00
{ " &[before 1] \\ u24DC < x <<< X << q <<< Q < z " ,
{ " x " , " X " , " q " , " Q " , " z " , " n " , " m " , " \\ u24DC " , " M " } , 9 } ,
{ " &[before 2] \\ u24DC << x <<< X << q <<< Q < z " ,
{ " x " , " X " , " q " , " Q " , " m " , " \\ u24DC " , " M " , " z " , " n " } , 9 } ,
{ " &[before 3] \\ u24DC <<< x <<< X << q <<< Q < z " ,
{ " m " , " x " , " X " , " \\ u24DC " , " M " , " q " , " Q " , " z " , " n " } , 9 }
2004-05-08 07:59:36 +00:00
} ;
int32_t i = 0 ;
for ( i = 0 ; i < sizeof ( tests ) / sizeof ( tests [ 0 ] ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . order , tests [ i ] . size ) ;
}
}
2004-06-03 22:08:39 +00:00
# endif
2004-05-08 07:59:36 +00:00
2004-06-03 22:08:39 +00:00
static void TestTailorNULL ( void ) {
2004-05-17 22:06:14 +00:00
const static char * rule = " &a <<< ' \\ u0000' " ;
2004-05-14 07:10:56 +00:00
UErrorCode status = U_ZERO_ERROR ;
UChar rlz [ RULE_BUFFER_LEN ] = { 0 } ;
uint32_t rlen = 0 ;
UChar a = 1 , null = 0 ;
UCollationResult res = UCOL_EQUAL ;
UCollator * coll = NULL ;
rlen = u_unescape ( rule , rlz , RULE_BUFFER_LEN ) ;
coll = ucol_openRules ( rlz , rlen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
2004-12-16 02:54:23 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Could not open default collator! -> %s \n " , u_errorName ( status ) ) ;
2004-12-16 02:54:23 +00:00
} else {
res = ucol_strcoll ( coll , & a , 1 , & null , 1 ) ;
if ( res ! = UCOL_LESS ) {
log_err ( " NULL was not tailored properly! \n " ) ;
}
2004-05-17 22:06:14 +00:00
}
2004-12-16 02:54:23 +00:00
2004-05-14 07:10:56 +00:00
ucol_close ( coll ) ;
}
2004-05-08 07:59:36 +00:00
2005-09-17 06:26:58 +00:00
static void
TestUpperFirstQuaternary ( void )
{
2005-09-26 06:22:39 +00:00
const char * tests [ ] = { " B " , " b " , " Bb " , " bB " } ;
2005-09-17 06:26:58 +00:00
UColAttribute att [ ] = { UCOL_STRENGTH , UCOL_CASE_FIRST } ;
UColAttributeValue attVals [ ] = { UCOL_QUATERNARY , UCOL_UPPER_FIRST } ;
genericLocaleStarterWithOptions ( " root " , tests , sizeof ( tests ) / sizeof ( tests [ 0 ] ) , att , attVals , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
}
2006-01-28 08:25:52 +00:00
static void
TestJ4960 ( void )
{
const char * tests [ ] = { " \\ u00e2T " , " aT " } ;
UColAttribute att [ ] = { UCOL_STRENGTH , UCOL_CASE_LEVEL } ;
UColAttributeValue attVals [ ] = { UCOL_PRIMARY , UCOL_ON } ;
const char * tests2 [ ] = { " a " , " A " } ;
const char * rule = " &[first tertiary ignorable]=A=a " ;
UColAttribute att2 [ ] = { UCOL_CASE_LEVEL } ;
UColAttributeValue attVals2 [ ] = { UCOL_ON } ;
/* Test whether we correctly ignore primary ignorables on case level when */
/* we have only primary & case level */
genericLocaleStarterWithOptionsAndResult ( " root " , tests , sizeof ( tests ) / sizeof ( tests [ 0 ] ) , att , attVals , sizeof ( att ) / sizeof ( att [ 0 ] ) , UCOL_EQUAL ) ;
/* Test whether ICU4J will make case level for sortkeys that have primary strength */
/* and case level */
genericLocaleStarterWithOptions ( " root " , tests2 , sizeof ( tests2 ) / sizeof ( tests2 [ 0 ] ) , att , attVals , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
/* Test whether completely ignorable letters have case level info (they shouldn't) */
genericRulesStarterWithOptionsAndResult ( rule , tests2 , sizeof ( tests2 ) / sizeof ( tests2 [ 0 ] ) , att2 , attVals2 , sizeof ( att2 ) / sizeof ( att2 [ 0 ] ) , UCOL_EQUAL ) ;
}
2006-07-06 06:30:06 +00:00
static void
TestJ5223 ( void )
{
2006-07-25 02:52:22 +00:00
static const char * test = " this is a test string " ;
2006-07-06 06:30:06 +00:00
UChar ustr [ 256 ] ;
int32_t ustr_length = u_unescape ( test , ustr , 256 ) ;
unsigned char sortkey [ 256 ] ;
int32_t sortkey_length ;
UErrorCode status = U_ZERO_ERROR ;
static UCollator * coll = NULL ;
coll = ucol_open ( " root " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Couldn't open UCA -> %s \n " , u_errorName ( status ) ) ;
2006-07-06 06:30:06 +00:00
return ;
}
ucol_setStrength ( coll , UCOL_PRIMARY ) ;
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_PRIMARY , & status ) ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Failed setting atributes \n " ) ;
return ;
2009-04-23 00:23:57 +00:00
}
2006-07-06 06:30:06 +00:00
sortkey_length = ucol_getSortKey ( coll , ustr , ustr_length , NULL , 0 ) ;
if ( sortkey_length > 256 ) return ;
/* we mark the position where the null byte should be written in advance */
sortkey [ sortkey_length - 1 ] = 0xAA ;
/* we set the buffer size one byte higher than needed */
sortkey_length = ucol_getSortKey ( coll , ustr , ustr_length , sortkey ,
sortkey_length + 1 ) ;
/* no error occurs (for me) */
if ( sortkey [ sortkey_length - 1 ] = = 0xAA ) {
log_err ( " Hit bug at first try \n " ) ;
}
/* we mark the position where the null byte should be written again */
sortkey [ sortkey_length - 1 ] = 0xAA ;
/* this time we set the buffer size to the exact amount needed */
sortkey_length = ucol_getSortKey ( coll , ustr , ustr_length , sortkey ,
sortkey_length ) ;
/* now the trailing null byte is not written */
if ( sortkey [ sortkey_length - 1 ] = = 0xAA ) {
log_err ( " Hit bug at second try \n " ) ;
}
ucol_close ( coll ) ;
}
2006-08-22 17:51:36 +00:00
/* Regression test for Thai partial sort key problem */
static void
TestJ5232 ( void )
{
const static char * test [ ] = {
" \\ u0e40 \\ u0e01 \\ u0e47 \\ u0e1a \\ u0e40 \\ u0e25 \\ u0e47 \\ u0e21 " ,
" \\ u0e40 \\ u0e01 \\ u0e47 \\ u0e1a \\ u0e40 \\ u0e25 \\ u0e48 \\ u0e21 "
} ;
2009-04-23 00:23:57 +00:00
2006-08-22 17:51:36 +00:00
genericLocaleStarter ( " th " , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
}
2006-09-07 20:12:11 +00:00
static void
TestJ5367 ( void )
{
const static char * test [ ] = { " a " , " y " } ;
const char * rules = " &Ny << Y &[first secondary ignorable] <<< a " ;
genericRulesStarter ( rules , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
}
2006-08-22 17:51:36 +00:00
2007-11-30 04:29:20 +00:00
static void
TestVI5913 ( void )
{
UErrorCode status = U_ZERO_ERROR ;
int32_t i , j ;
UCollator * coll = NULL ;
uint8_t resColl [ 100 ] , expColl [ 100 ] ;
int32_t rLen , tLen , ruleLen , sLen , kLen ;
2014-02-25 21:21:49 +00:00
UChar rule [ 256 ] = { 0x26 , 0x62 , 0x3c , 0x1FF3 , 0 } ; /* &b<0x1FF3-omega with Ypogegrammeni*/
2007-11-30 04:29:20 +00:00
UChar rule2 [ 256 ] = { 0x26 , 0x7a , 0x3c , 0x0161 , 0 } ; /* &z<s with caron*/
2014-02-25 21:21:49 +00:00
/*
* Note : Just tailoring & z < ae ^ does not work as expected :
* The UCA spec requires for discontiguous contractions that they
* extend an * existing match * by one combining mark at a time .
* Therefore , ae must be a contraction so that the builder finds
* discontiguous contractions for ae ^ , for example with an intervening underdot .
* Only then do we get the expected tail closure with a \ u1EC7 , a \ u1EB9 \ u0302 , etc .
*/
UChar rule3 [ 256 ] = {
0x26 , 0x78 , 0x3c , 0x61 , 0x65 , /* &x<ae */
0x26 , 0x7a , 0x3c , 0x0061 , 0x00ea , /* &z<a+e with circumflex.*/
0 } ;
2007-12-07 10:49:34 +00:00
static const UChar tData [ ] [ 20 ] = {
2007-11-30 04:29:20 +00:00
{ 0x1EAC , 0 } ,
{ 0x0041 , 0x0323 , 0x0302 , 0 } ,
{ 0x1EA0 , 0x0302 , 0 } ,
{ 0x00C2 , 0x0323 , 0 } ,
{ 0x1ED8 , 0 } , /* O with dot and circumflex */
{ 0x1ECC , 0x0302 , 0 } ,
{ 0x1EB7 , 0 } ,
{ 0x1EA1 , 0x0306 , 0 } ,
} ;
2007-12-07 10:49:34 +00:00
static const UChar tailorData [ ] [ 20 ] = {
2007-11-30 04:29:20 +00:00
{ 0x1FA2 , 0 } , /* Omega with 3 combining marks */
{ 0x03C9 , 0x0313 , 0x0300 , 0x0345 , 0 } ,
{ 0x1FF3 , 0x0313 , 0x0300 , 0 } ,
{ 0x1F60 , 0x0300 , 0x0345 , 0 } ,
{ 0x1F62 , 0x0345 , 0 } ,
{ 0x1FA0 , 0x0300 , 0 } ,
} ;
2007-12-07 10:49:34 +00:00
static const UChar tailorData2 [ ] [ 20 ] = {
2007-11-30 04:29:20 +00:00
{ 0x1E63 , 0x030C , 0 } , /* s with dot below + caron */
{ 0x0073 , 0x0323 , 0x030C , 0 } ,
{ 0x0073 , 0x030C , 0x0323 , 0 } ,
} ;
2007-12-07 10:49:34 +00:00
static const UChar tailorData3 [ ] [ 20 ] = {
2007-11-30 04:29:20 +00:00
{ 0x007a , 0 } , /* z */
{ 0x0061 , 0x0065 , 0 } , /* a + e */
{ 0x0061 , 0x00ea , 0 } , /* a + e with circumflex */
{ 0x0061 , 0x1EC7 , 0 } , /* a+ e with dot below and circumflex */
{ 0x0061 , 0x1EB9 , 0x0302 , 0 } , /* a + e with dot below + combining circumflex */
{ 0x0061 , 0x00EA , 0x0323 , 0 } , /* a + e with circumflex + combining dot below */
{ 0x00EA , 0x0323 , 0 } , /* e with circumflex + combining dot below */
{ 0x00EA , 0 } , /* e with circumflex */
} ;
/* Test Vietnamese sort. */
coll = ucol_open ( " vi " , & status ) ;
2008-02-16 12:30:55 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Couldn't open collator -> %s \n " , u_errorName ( status ) ) ;
2008-02-16 12:30:55 +00:00
return ;
}
2007-11-30 04:29:20 +00:00
log_verbose ( " \n \n VI collation: " ) ;
if ( ! ucol_equal ( coll , tData [ 0 ] , u_strlen ( tData [ 0 ] ) , tData [ 2 ] , u_strlen ( tData [ 2 ] ) ) ) {
log_err ( " \\ u1EAC not equals to \\ u1EA0+ \\ u0302 \n " ) ;
}
if ( ! ucol_equal ( coll , tData [ 0 ] , u_strlen ( tData [ 0 ] ) , tData [ 3 ] , u_strlen ( tData [ 3 ] ) ) ) {
log_err ( " \\ u1EAC not equals to \\ u00c2+ \\ u0323 \n " ) ;
}
if ( ! ucol_equal ( coll , tData [ 5 ] , u_strlen ( tData [ 5 ] ) , tData [ 4 ] , u_strlen ( tData [ 4 ] ) ) ) {
log_err ( " \\ u1ED8 not equals to \\ u1ECC+ \\ u0302 \n " ) ;
}
if ( ! ucol_equal ( coll , tData [ 7 ] , u_strlen ( tData [ 7 ] ) , tData [ 6 ] , u_strlen ( tData [ 6 ] ) ) ) {
log_err ( " \\ u1EB7 not equals to \\ u1EA1+ \\ u0306 \n " ) ;
}
for ( j = 0 ; j < 8 ; j + + ) {
tLen = u_strlen ( tData [ j ] ) ;
log_verbose ( " \n Data :%s \t len: %d key: " , tData [ j ] , tLen ) ;
rLen = ucol_getSortKey ( coll , tData [ j ] , tLen , resColl , 100 ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
}
ucol_close ( coll ) ;
2007-12-07 10:49:34 +00:00
/* Test Romanian sort. */
2007-11-30 04:29:20 +00:00
coll = ucol_open ( " ro " , & status ) ;
log_verbose ( " \n \n RO collation: " ) ;
if ( ! ucol_equal ( coll , tData [ 0 ] , u_strlen ( tData [ 0 ] ) , tData [ 1 ] , u_strlen ( tData [ 1 ] ) ) ) {
log_err ( " \\ u1EAC not equals to \\ u1EA0+ \\ u0302 \n " ) ;
}
if ( ! ucol_equal ( coll , tData [ 4 ] , u_strlen ( tData [ 4 ] ) , tData [ 5 ] , u_strlen ( tData [ 5 ] ) ) ) {
log_err ( " \\ u1EAC not equals to \\ u00c2+ \\ u0323 \n " ) ;
}
if ( ! ucol_equal ( coll , tData [ 6 ] , u_strlen ( tData [ 6 ] ) , tData [ 7 ] , u_strlen ( tData [ 7 ] ) ) ) {
log_err ( " \\ u1EB7 not equals to \\ u1EA1+ \\ u0306 \n " ) ;
}
for ( j = 4 ; j < 8 ; j + + ) {
tLen = u_strlen ( tData [ j ] ) ;
log_verbose ( " \n Data :%s \t len: %d key: " , tData [ j ] , tLen ) ;
rLen = ucol_getSortKey ( coll , tData [ j ] , tLen , resColl , 100 ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
}
ucol_close ( coll ) ;
/* Test the precomposed Greek character with 3 combining marks. */
log_verbose ( " \n \n Tailoring test: Greek character with 3 combining marks " ) ;
ruleLen = u_strlen ( rule ) ;
coll = ucol_openRules ( rule , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
2007-12-07 10:49:34 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " ucol_openRules failed with %s \n " , u_errorName ( status ) ) ;
return ;
}
2007-11-30 04:29:20 +00:00
sLen = u_strlen ( tailorData [ 0 ] ) ;
for ( j = 1 ; j < 6 ; j + + ) {
tLen = u_strlen ( tailorData [ j ] ) ;
if ( ! ucol_equal ( coll , tailorData [ 0 ] , sLen , tailorData [ j ] , tLen ) ) {
log_err ( " \n \\ u1FA2 not equals to data[%d]:%s \n " , j , tailorData [ j ] ) ;
}
}
/* Test getSortKey. */
tLen = u_strlen ( tailorData [ 0 ] ) ;
kLen = ucol_getSortKey ( coll , tailorData [ 0 ] , tLen , expColl , 100 ) ;
for ( j = 0 ; j < 6 ; j + + ) {
tLen = u_strlen ( tailorData [ j ] ) ;
rLen = ucol_getSortKey ( coll , tailorData [ j ] , tLen , resColl , 100 ) ;
if ( kLen ! = rLen | | uprv_memcmp ( expColl , resColl , rLen * sizeof ( uint8_t ) ) ! = 0 ) {
log_err ( " \n Data[%d] :%s \t len: %d key: " , j , tailorData [ j ] , tLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
}
}
ucol_close ( coll ) ;
log_verbose ( " \n \n Tailoring test for s with caron: " ) ;
ruleLen = u_strlen ( rule2 ) ;
coll = ucol_openRules ( rule2 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
tLen = u_strlen ( tailorData2 [ 0 ] ) ;
kLen = ucol_getSortKey ( coll , tailorData2 [ 0 ] , tLen , expColl , 100 ) ;
for ( j = 1 ; j < 3 ; j + + ) {
tLen = u_strlen ( tailorData2 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tailorData2 [ j ] , tLen , resColl , 100 ) ;
if ( kLen ! = rLen | | uprv_memcmp ( expColl , resColl , rLen * sizeof ( uint8_t ) ) ! = 0 ) {
log_err ( " \n After tailoring Data[%d] :%s \t len: %d key: " , j , tailorData [ j ] , tLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
}
}
ucol_close ( coll ) ;
log_verbose ( " \n \n Tailoring test for &z< ae with circumflex: " ) ;
2007-12-07 10:49:34 +00:00
ruleLen = u_strlen ( rule3 ) ;
coll = ucol_openRules ( rule3 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
tLen = u_strlen ( tailorData3 [ 3 ] ) ;
kLen = ucol_getSortKey ( coll , tailorData3 [ 3 ] , tLen , expColl , 100 ) ;
2014-02-25 21:21:49 +00:00
log_verbose ( " \n Test Data[3] :%s \t len: %d key: " , aescstrdup ( tailorData3 [ 3 ] , tLen ) , tLen ) ;
for ( i = 0 ; i < kLen ; i + + ) {
log_verbose ( " %02X " , expColl [ i ] ) ;
}
2007-12-07 10:49:34 +00:00
for ( j = 4 ; j < 6 ; j + + ) {
tLen = u_strlen ( tailorData3 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tailorData3 [ j ] , tLen , resColl , 100 ) ;
2007-11-30 04:29:20 +00:00
2007-12-07 10:49:34 +00:00
if ( kLen ! = rLen | | uprv_memcmp ( expColl , resColl , rLen * sizeof ( uint8_t ) ) ! = 0 ) {
2014-02-25 21:21:49 +00:00
log_err ( " \n After tailoring Data[%d] :%s \t len: %d key: " , j , aescstrdup ( tailorData3 [ j ] , tLen ) , tLen ) ;
2007-12-07 10:49:34 +00:00
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
}
2014-02-25 21:21:49 +00:00
log_verbose ( " \n Test Data[%d] :%s \t len: %d key: " , j , aescstrdup ( tailorData3 [ j ] , tLen ) , tLen ) ;
2007-12-07 10:49:34 +00:00
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
}
ucol_close ( coll ) ;
2007-11-30 04:29:20 +00:00
}
2006-08-22 17:51:36 +00:00
2008-04-04 22:47:43 +00:00
static void
TestTailor6179 ( void )
{
UErrorCode status = U_ZERO_ERROR ;
int32_t i ;
UCollator * coll = NULL ;
uint8_t resColl [ 100 ] ;
int32_t rLen , tLen , ruleLen ;
/* &[last primary ignorable]<< a &[first primary ignorable]<<b */
2011-12-08 22:37:24 +00:00
static const UChar rule1 [ ] = {
0x26 , 0x5B , 0x6C , 0x61 , 0x73 , 0x74 , 0x20 , 0x70 , 0x72 , 0x69 , 0x6D , 0x61 , 0x72 , 0x79 ,
2008-04-04 22:47:43 +00:00
0x20 , 0x69 , 0x67 , 0x6E , 0x6F , 0x72 , 0x61 , 0x62 , 0x6C , 0x65 , 0x5D , 0x3C , 0x3C , 0x20 , 0x61 , 0x20 ,
0x26 , 0x5B , 0x66 , 0x69 , 0x72 , 0x73 , 0x74 , 0x20 , 0x70 , 0x72 , 0x69 , 0x6D , 0x61 , 0x72 , 0x79 , 0x20 ,
0x69 , 0x67 , 0x6E , 0x6F , 0x72 , 0x61 , 0x62 , 0x6C , 0x65 , 0x5D , 0x3C , 0x3C , 0x62 , 0x20 , 0 } ;
/* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
2011-12-08 22:37:24 +00:00
static const UChar rule2 [ ] = {
0x26 , 0x5B , 0x6C , 0x61 , 0x73 , 0x74 , 0x20 , 0x73 , 0x65 , 0x63 , 0x6F , 0x6E , 0x64 , 0x61 ,
2008-04-04 22:47:43 +00:00
0x72 , 0x79 , 0x20 , 0x69 , 0x67 , 0x6E , 0x6F , 0x72 , 0x61 , 0x62 , 0x6C , 0x65 , 0x5D , 0x3C , 0x3C , 0x3C ,
0x61 , 0x20 , 0x26 , 0x5B , 0x66 , 0x69 , 0x72 , 0x73 , 0x74 , 0x20 , 0x73 , 0x65 , 0x63 , 0x6F , 0x6E ,
0x64 , 0x61 , 0x72 , 0x79 , 0x20 , 0x69 , 0x67 , 0x6E , 0x6F , 0x72 , 0x61 , 0x62 , 0x6C , 0x65 , 0x5D , 0x3C ,
0x3C , 0x3C , 0x20 , 0x62 , 0 } ;
2011-12-08 22:37:24 +00:00
static const UChar tData1 [ ] [ 4 ] = {
2008-04-04 22:47:43 +00:00
{ 0x61 , 0 } ,
{ 0x62 , 0 } ,
{ 0xFDD0 , 0x009E , 0 }
} ;
2011-12-08 22:37:24 +00:00
static const UChar tData2 [ ] [ 4 ] = {
{ 0x61 , 0 } ,
{ 0x62 , 0 } ,
{ 0xFDD0 , 0x009E , 0 }
2008-04-04 22:47:43 +00:00
} ;
2009-11-13 19:25:21 +00:00
/*
* These values from FractionalUCA . txt will change ,
* and need to be updated here .
2014-02-25 21:21:49 +00:00
* TODO : Make this not check for particular sort keys .
* Instead , test that we get CEs before & after other ignorables ; see ticket # 6179.
2009-11-13 19:25:21 +00:00
*/
2014-02-25 21:21:49 +00:00
static const uint8_t firstPrimaryIgnCE [ ] = { 1 , 0x83 , 1 , 5 , 0 } ;
static const uint8_t lastPrimaryIgnCE [ ] = { 1 , 0xFC , 1 , 5 , 0 } ;
static const uint8_t firstSecondaryIgnCE [ ] = { 1 , 1 , 0xfe , 0 } ;
static const uint8_t lastSecondaryIgnCE [ ] = { 1 , 1 , 0xff , 0 } ;
UParseError parseError ;
2008-04-04 22:47:43 +00:00
/* Test [Last Primary ignorable] */
2009-04-23 00:23:57 +00:00
2011-12-08 22:37:24 +00:00
log_verbose ( " Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b \n " ) ;
2008-04-04 22:47:43 +00:00
ruleLen = u_strlen ( rule1 ) ;
coll = ucol_openRules ( rule1 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Tailoring test: &[last primary ignorable] failed! -> %s \n " , u_errorName ( status ) ) ;
2008-04-04 22:47:43 +00:00
return ;
}
tLen = u_strlen ( tData1 [ 0 ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ 0 ] , tLen , resColl , 100 ) ;
2011-12-08 22:37:24 +00:00
if ( rLen ! = LEN ( lastPrimaryIgnCE ) | | uprv_memcmp ( resColl , lastPrimaryIgnCE , rLen ) ! = 0 ) {
log_err ( " Bad result for &[lpi]<<a...: Data[%d] :%s \t len: %d key: " , 0 , tData1 [ 0 ] , rLen ) ;
2008-04-04 22:47:43 +00:00
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
2011-12-08 22:37:24 +00:00
log_err ( " \n " ) ;
2008-04-04 22:47:43 +00:00
}
tLen = u_strlen ( tData1 [ 1 ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ 1 ] , tLen , resColl , 100 ) ;
2011-12-08 22:37:24 +00:00
if ( rLen ! = LEN ( firstPrimaryIgnCE ) | | uprv_memcmp ( resColl , firstPrimaryIgnCE , rLen ) ! = 0 ) {
log_err ( " Bad result for &[lpi]<<a...: Data[%d] :%s \t len: %d key: " , 1 , tData1 [ 1 ] , rLen ) ;
2008-04-04 22:47:43 +00:00
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
2011-12-08 22:37:24 +00:00
log_err ( " \n " ) ;
2008-04-04 22:47:43 +00:00
}
ucol_close ( coll ) ;
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
/* Test [Last Secondary ignorable] */
2011-12-08 22:37:24 +00:00
log_verbose ( " Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b \n " ) ;
2014-02-25 21:21:49 +00:00
ruleLen = u_strlen ( rule2 ) ;
coll = ucol_openRules ( rule2 , ruleLen , UCOL_OFF , UCOL_TERTIARY , & parseError , & status ) ;
2008-04-04 22:47:43 +00:00
if ( U_FAILURE ( status ) ) {
2011-12-08 22:37:24 +00:00
log_err ( " Tailoring test: &[last secondary ignorable] failed! -> %s \n " , u_errorName ( status ) ) ;
2014-02-25 21:21:49 +00:00
log_info ( " offset=%d \" %s \" | \" %s \" \n " ,
parseError . offset , aescstrdup ( parseError . preContext , - 1 ) , aescstrdup ( parseError . postContext , - 1 ) ) ;
2008-04-04 22:47:43 +00:00
return ;
}
tLen = u_strlen ( tData2 [ 0 ] ) ;
rLen = ucol_getSortKey ( coll , tData2 [ 0 ] , tLen , resColl , 100 ) ;
2011-12-08 22:37:24 +00:00
if ( rLen ! = LEN ( lastSecondaryIgnCE ) | | uprv_memcmp ( resColl , lastSecondaryIgnCE , rLen ) ! = 0 ) {
log_err ( " Bad result for &[lsi]<<<a...: Data[%d] :%s \t len: %d key: " , 0 , tData2 [ 0 ] , rLen ) ;
2008-04-04 22:47:43 +00:00
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
2011-12-08 22:37:24 +00:00
log_err ( " \n " ) ;
2008-04-04 22:47:43 +00:00
}
2014-02-25 21:21:49 +00:00
tLen = u_strlen ( tData2 [ 1 ] ) ;
rLen = ucol_getSortKey ( coll , tData2 [ 1 ] , tLen , resColl , 100 ) ;
if ( rLen ! = LEN ( firstSecondaryIgnCE ) | | uprv_memcmp ( resColl , firstSecondaryIgnCE , rLen ) ! = 0 ) {
log_err ( " Bad result for &[lsi]<<<a...: Data[%d] :%s \t len: %d key: " , 1 , tData2 [ 1 ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
2013-09-17 00:55:39 +00:00
}
2014-02-25 21:21:49 +00:00
log_err ( " \n " ) ;
2008-04-04 22:47:43 +00:00
}
ucol_close ( coll ) ;
}
static void
TestUCAPrecontext ( void )
{
UErrorCode status = U_ZERO_ERROR ;
int32_t i , j ;
UCollator * coll = NULL ;
uint8_t resColl [ 100 ] , prevColl [ 100 ] ;
int32_t rLen , tLen , ruleLen ;
UChar rule1 [ 256 ] = { 0x26 , 0xb7 , 0x3c , 0x61 , 0 } ; /* & middle-dot < a */
2009-04-23 00:23:57 +00:00
UChar rule2 [ 256 ] = { 0x26 , 0x4C , 0xb7 , 0x3c , 0x3c , 0x61 , 0 } ;
2008-04-04 22:47:43 +00:00
/* & l middle-dot << a a is an expansion. */
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
UChar tData1 [ ] [ 20 ] = {
{ 0xb7 , 0 } , /* standalone middle dot(0xb7) */
{ 0x387 , 0 } , /* standalone middle dot(0x387) */
{ 0x61 , 0 } , /* a */
{ 0x6C , 0 } , /* l */
2009-04-23 00:23:57 +00:00
{ 0x4C , 0x0332 , 0 } , /* l with [first primary ignorable] */
2008-04-04 22:47:43 +00:00
{ 0x6C , 0xb7 , 0 } , /* l with middle dot(0xb7) */
{ 0x6C , 0x387 , 0 } , /* l with middle dot(0x387) */
{ 0x4C , 0xb7 , 0 } , /* L with middle dot(0xb7) */
{ 0x4C , 0x387 , 0 } , /* L with middle dot(0x387) */
{ 0x6C , 0x61 , 0x387 , 0 } , /* la with middle dot(0x387) */
{ 0x4C , 0x61 , 0xb7 , 0 } , /* La with middle dot(0xb7) */
} ;
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
log_verbose ( " \n \n EN collation: " ) ;
coll = ucol_open ( " en " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Tailoring test: &z <<a|- failed! -> %s \n " , u_errorName ( status ) ) ;
2008-04-04 22:47:43 +00:00
return ;
}
for ( j = 0 ; j < 11 ; j + + ) {
tLen = u_strlen ( tData1 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ j ] , tLen , resColl , 100 ) ;
if ( ( j > 0 ) & & ( strcmp ( ( char * ) resColl , ( char * ) prevColl ) < 0 ) ) {
2009-04-23 00:23:57 +00:00
log_err ( " \n Expecting greater key than previous test case: Data[%d] :%s. " ,
2008-04-04 22:47:43 +00:00
j , tData1 [ j ] ) ;
}
log_verbose ( " \n Data[%d] :%s \t len: %d key: " , j , tData1 [ j ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
uprv_memcpy ( prevColl , resColl , sizeof ( uint8_t ) * ( rLen + 1 ) ) ;
}
ucol_close ( coll ) ;
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
log_verbose ( " \n \n JA collation: " ) ;
coll = ucol_open ( " ja " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Tailoring test: &z <<a|- failed! " ) ;
return ;
}
for ( j = 0 ; j < 11 ; j + + ) {
tLen = u_strlen ( tData1 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ j ] , tLen , resColl , 100 ) ;
if ( ( j > 0 ) & & ( strcmp ( ( char * ) resColl , ( char * ) prevColl ) < 0 ) ) {
2009-04-23 00:23:57 +00:00
log_err ( " \n Expecting greater key than previous test case: Data[%d] :%s. " ,
2008-04-04 22:47:43 +00:00
j , tData1 [ j ] ) ;
}
log_verbose ( " \n Data[%d] :%s \t len: %d key: " , j , tData1 [ j ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
uprv_memcpy ( prevColl , resColl , sizeof ( uint8_t ) * ( rLen + 1 ) ) ;
}
ucol_close ( coll ) ;
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
log_verbose ( " \n \n Tailoring test: & middle dot < a " ) ;
ruleLen = u_strlen ( rule1 ) ;
coll = ucol_openRules ( rule1 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Tailoring test: & middle dot < a failed! " ) ;
return ;
}
for ( j = 0 ; j < 11 ; j + + ) {
tLen = u_strlen ( tData1 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ j ] , tLen , resColl , 100 ) ;
if ( ( j > 0 ) & & ( strcmp ( ( char * ) resColl , ( char * ) prevColl ) < 0 ) ) {
2009-04-23 00:23:57 +00:00
log_err ( " \n Expecting greater key than previous test case: Data[%d] :%s. " ,
2008-04-04 22:47:43 +00:00
j , tData1 [ j ] ) ;
}
log_verbose ( " \n Data[%d] :%s \t len: %d key: " , j , tData1 [ j ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
uprv_memcpy ( prevColl , resColl , sizeof ( uint8_t ) * ( rLen + 1 ) ) ;
}
ucol_close ( coll ) ;
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
log_verbose ( " \n \n Tailoring test: & l middle-dot << a " ) ;
ruleLen = u_strlen ( rule2 ) ;
coll = ucol_openRules ( rule2 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Tailoring test: & l middle-dot << a failed! " ) ;
return ;
}
for ( j = 0 ; j < 11 ; j + + ) {
tLen = u_strlen ( tData1 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ j ] , tLen , resColl , 100 ) ;
if ( ( j > 0 ) & & ( j ! = 3 ) & & ( strcmp ( ( char * ) resColl , ( char * ) prevColl ) < 0 ) ) {
2009-04-23 00:23:57 +00:00
log_err ( " \n Expecting greater key than previous test case: Data[%d] :%s. " ,
2008-04-04 22:47:43 +00:00
j , tData1 [ j ] ) ;
}
if ( ( j = = 3 ) & & ( strcmp ( ( char * ) resColl , ( char * ) prevColl ) > 0 ) ) {
2009-04-23 00:23:57 +00:00
log_err ( " \n Expecting smaller key than previous test case: Data[%d] :%s. " ,
2008-04-04 22:47:43 +00:00
j , tData1 [ j ] ) ;
}
log_verbose ( " \n Data[%d] :%s \t len: %d key: " , j , tData1 [ j ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
uprv_memcpy ( prevColl , resColl , sizeof ( uint8_t ) * ( rLen + 1 ) ) ;
}
ucol_close ( coll ) ;
}
2008-06-11 21:37:03 +00:00
static void
TestOutOfBuffer5468 ( void )
{
static const char * test = " \\ u4e00 " ;
UChar ustr [ 256 ] ;
int32_t ustr_length = u_unescape ( test , ustr , 256 ) ;
unsigned char shortKeyBuf [ 1 ] ;
int32_t sortkey_length ;
UErrorCode status = U_ZERO_ERROR ;
static UCollator * coll = NULL ;
2009-04-23 00:23:57 +00:00
2008-06-11 21:37:03 +00:00
coll = ucol_open ( " root " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Couldn't open UCA -> %s \n " , u_errorName ( status ) ) ;
2008-06-11 21:37:03 +00:00
return ;
}
ucol_setStrength ( coll , UCOL_PRIMARY ) ;
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_PRIMARY , & status ) ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Failed setting atributes \n " ) ;
return ;
2009-04-23 00:23:57 +00:00
}
2008-06-11 21:37:03 +00:00
sortkey_length = ucol_getSortKey ( coll , ustr , ustr_length , shortKeyBuf , sizeof ( shortKeyBuf ) ) ;
if ( sortkey_length ! = 4 ) {
log_err ( " expecting length of sortKey is 4 got:%d " , sortkey_length ) ;
}
log_verbose ( " length of sortKey is %d " , sortkey_length ) ;
ucol_close ( coll ) ;
}
2008-04-04 22:47:43 +00:00
2007-11-15 23:09:40 +00:00
# define TSKC_DATA_SIZE 5
# define TSKC_BUF_SIZE 50
static void
TestSortKeyConsistency ( void )
{
UErrorCode icuRC = U_ZERO_ERROR ;
UCollator * ucol ;
UChar data [ ] = { 0xFFFD , 0x0006 , 0x0006 , 0x0006 , 0xFFFD } ;
uint8_t bufFull [ TSKC_DATA_SIZE ] [ TSKC_BUF_SIZE ] ;
uint8_t bufPart [ TSKC_DATA_SIZE ] [ TSKC_BUF_SIZE ] ;
2007-12-26 18:50:17 +00:00
int32_t i , j , i2 ;
2007-11-15 23:09:40 +00:00
ucol = ucol_openFromShortString ( " LEN_S4 " , FALSE , NULL , & icuRC ) ;
if ( U_FAILURE ( icuRC ) )
2007-12-26 18:50:17 +00:00
{
2009-06-09 21:28:13 +00:00
log_err_status ( icuRC , " ucol_openFromShortString failed -> %s \n " , u_errorName ( icuRC ) ) ;
2007-11-15 23:09:40 +00:00
return ;
2007-12-26 18:50:17 +00:00
}
2007-11-15 23:09:40 +00:00
for ( i = 0 ; i < TSKC_DATA_SIZE ; i + + )
{
UCharIterator uiter ;
uint32_t state [ 2 ] = { 0 , 0 } ;
int32_t dataLen = i + 1 ;
2007-12-26 18:50:17 +00:00
for ( j = 0 ; j < TSKC_BUF_SIZE ; j + + )
bufFull [ i ] [ j ] = bufPart [ i ] [ j ] = 0 ;
2007-11-15 23:09:40 +00:00
2007-11-16 01:25:00 +00:00
/* Full sort key */
2007-11-15 23:09:40 +00:00
ucol_getSortKey ( ucol , data , dataLen , bufFull [ i ] , TSKC_BUF_SIZE ) ;
2007-11-16 01:25:00 +00:00
/* Partial sort key */
2007-11-15 23:09:40 +00:00
uiter_setString ( & uiter , data , dataLen ) ;
ucol_nextSortKeyPart ( ucol , & uiter , state , bufPart [ i ] , TSKC_BUF_SIZE , & icuRC ) ;
if ( U_FAILURE ( icuRC ) )
2007-12-26 18:50:17 +00:00
{
log_err ( " ucol_nextSortKeyPart failed \n " ) ;
ucol_close ( ucol ) ;
return ;
}
2007-11-15 23:09:40 +00:00
2007-12-26 18:50:17 +00:00
for ( i2 = 0 ; i2 < i ; i2 + + )
{
UBool fullMatch = TRUE ;
UBool partMatch = TRUE ;
for ( j = 0 ; j < TSKC_BUF_SIZE ; j + + )
{
fullMatch = fullMatch & & ( bufFull [ i ] [ j ] ! = bufFull [ i2 ] [ j ] ) ;
partMatch = partMatch & & ( bufPart [ i ] [ j ] ! = bufPart [ i2 ] [ j ] ) ;
}
if ( fullMatch ! = partMatch ) {
log_err ( fullMatch ? " full key was consistent, but partial key changed \n "
: " partial key was consistent, but full key changed \n " ) ;
ucol_close ( ucol ) ;
return ;
}
}
2007-11-15 23:09:40 +00:00
}
2007-11-16 01:25:00 +00:00
/*=============================================*/
2007-11-15 23:09:40 +00:00
ucol_close ( ucol ) ;
}
2007-12-21 00:08:12 +00:00
/* ticket: 6101 */
static void TestCroatianSortKey ( void ) {
2007-12-26 18:50:17 +00:00
const char * collString = " LHR_AN_CX_EX_FX_HX_NX_S3 " ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * ucol ;
UCharIterator iter ;
static const UChar text [ ] = { 0x0044 , 0xD81A } ;
size_t length = sizeof ( text ) / sizeof ( * text ) ;
uint8_t textSortKey [ 32 ] ;
size_t lenSortKey = 32 ;
size_t actualSortKeyLen ;
uint32_t uStateInfo [ 2 ] = { 0 , 0 } ;
ucol = ucol_openFromShortString ( collString , FALSE , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " ucol_openFromShortString error in Craotian test. -> %s \n " , u_errorName ( status ) ) ;
2007-12-26 18:50:17 +00:00
return ;
}
uiter_setString ( & iter , text , length ) ;
actualSortKeyLen = ucol_nextSortKeyPart (
ucol , & iter , ( uint32_t * ) uStateInfo ,
textSortKey , lenSortKey , & status
) ;
if ( actualSortKeyLen = = lenSortKey ) {
log_err ( " ucol_nextSortKeyPart did not give correct result in Croatian test. \n " ) ;
}
ucol_close ( ucol ) ;
2007-12-21 00:08:12 +00:00
}
2008-04-17 05:19:19 +00:00
/* ticket: 6140 */
2008-04-17 16:55:43 +00:00
/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
2009-04-23 00:23:57 +00:00
* they are both Hiragana and Katakana
2008-04-17 16:55:43 +00:00
*/
# define SORTKEYLEN 50
2008-04-17 05:19:19 +00:00
static void TestHiragana ( void ) {
UErrorCode status = U_ZERO_ERROR ;
UCollator * ucol ;
2008-04-17 16:55:43 +00:00
UCollationResult strcollresult ;
2008-04-17 05:19:19 +00:00
UChar data1 [ ] = { 0x3058 , 0x30B8 } ; /* Hiragana and Katakana letter Zi */
UChar data2 [ ] = { 0x3057 , 0x3099 , 0x30B7 , 0x3099 } ;
int32_t data1Len = sizeof ( data1 ) / sizeof ( * data1 ) ;
int32_t data2Len = sizeof ( data2 ) / sizeof ( * data2 ) ;
2008-04-17 16:55:43 +00:00
int32_t i , j ;
uint8_t sortKey1 [ SORTKEYLEN ] ;
uint8_t sortKey2 [ SORTKEYLEN ] ;
2008-04-17 05:19:19 +00:00
UCharIterator uiter1 ;
UCharIterator uiter2 ;
uint32_t state1 [ 2 ] = { 0 , 0 } ;
uint32_t state2 [ 2 ] = { 0 , 0 } ;
int32_t keySize1 ;
int32_t keySize2 ;
ucol = ucol_openFromShortString ( " LJA_AN_CX_EX_FX_HO_NX_S4 " , FALSE , NULL ,
& status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Error status: %s; Unable to open collator from short string. \n " , u_errorName ( status ) ) ;
2008-04-17 05:19:19 +00:00
return ;
}
2007-11-15 23:09:40 +00:00
2008-04-17 05:19:19 +00:00
/* Start of full sort keys */
2008-04-17 17:58:36 +00:00
/* Full sort key1 */
2008-04-17 16:55:43 +00:00
keySize1 = ucol_getSortKey ( ucol , data1 , data1Len , sortKey1 , SORTKEYLEN ) ;
2008-04-17 17:58:36 +00:00
/* Full sort key2 */
2008-04-17 16:55:43 +00:00
keySize2 = ucol_getSortKey ( ucol , data2 , data2Len , sortKey2 , SORTKEYLEN ) ;
2008-04-17 05:19:19 +00:00
if ( keySize1 = = keySize2 ) {
for ( i = 0 ; i < keySize1 ; i + + ) {
if ( sortKey1 [ i ] ! = sortKey2 [ i ] ) {
log_err ( " Full sort keys are different. Should be equal. " ) ;
}
}
} else {
2008-04-17 16:55:43 +00:00
log_err ( " Full sort keys sizes doesn't match: %d %d " , keySize1 , keySize2 ) ;
2008-04-17 05:19:19 +00:00
}
/* End of full sort keys */
/* Start of partial sort keys */
2008-04-17 17:58:36 +00:00
/* Partial sort key1 */
2008-04-17 05:19:19 +00:00
uiter_setString ( & uiter1 , data1 , data1Len ) ;
2008-04-17 16:55:43 +00:00
keySize1 = ucol_nextSortKeyPart ( ucol , & uiter1 , state1 , sortKey1 , SORTKEYLEN , & status ) ;
2008-04-17 17:58:36 +00:00
/* Partial sort key2 */
2008-04-17 05:19:19 +00:00
uiter_setString ( & uiter2 , data2 , data2Len ) ;
2008-04-17 16:55:43 +00:00
keySize2 = ucol_nextSortKeyPart ( ucol , & uiter2 , state2 , sortKey2 , SORTKEYLEN , & status ) ;
if ( U_SUCCESS ( status ) & & keySize1 = = keySize2 ) {
2008-04-17 05:19:19 +00:00
for ( j = 0 ; j < keySize1 ; j + + ) {
if ( sortKey1 [ j ] ! = sortKey2 [ j ] ) {
log_err ( " Partial sort keys are different. Should be equal " ) ;
}
}
} else {
2008-04-17 16:55:43 +00:00
log_err ( " Error Status: %s or Partial sort keys sizes doesn't match: %d %d " , u_errorName ( status ) , keySize1 , keySize2 ) ;
2008-04-17 05:19:19 +00:00
}
/* End of partial sort keys */
/* Start of strcoll */
2008-04-17 17:58:36 +00:00
/* Use ucol_strcoll() to determine ordering */
2008-04-17 05:19:19 +00:00
strcollresult = ucol_strcoll ( ucol , data1 , data1Len , data2 , data2Len ) ;
if ( strcollresult ! = UCOL_EQUAL ) {
log_err ( " Result from ucol_strcoll() should be UCOL_EQUAL. " ) ;
}
2009-04-23 00:23:57 +00:00
2008-04-17 05:19:19 +00:00
ucol_close ( ucol ) ;
}
2008-04-17 16:55:43 +00:00
2010-06-14 21:27:44 +00:00
/* Convenient struct for running collation tests */
typedef struct {
const UChar source [ MAX_TOKEN_LEN ] ; /* String on left */
const UChar target [ MAX_TOKEN_LEN ] ; /* String on right */
UCollationResult result ; /* -1, 0 or +1, depending on collation */
} OneTestCase ;
/*
* Utility function to test one collation test case .
* @ param testcases Array of test cases .
* @ param n_testcases Size of the array testcases .
* @ param str_rules Array of rules . These rules should be specifying the same rule in different formats .
* @ param n_rules Size of the array str_rules .
*/
static void doTestOneTestCase ( const OneTestCase testcases [ ] ,
int n_testcases ,
const char * str_rules [ ] ,
int n_rules )
{
int rule_no , testcase_no ;
UChar rule [ 500 ] ;
int32_t length = 0 ;
UErrorCode status = U_ZERO_ERROR ;
UParseError parse_error ;
UCollator * myCollation ;
for ( rule_no = 0 ; rule_no < n_rules ; + + rule_no ) {
length = u_unescape ( str_rules [ rule_no ] , rule , 500 ) ;
if ( length = = 0 ) {
log_err ( " ERROR: The rule cannot be unescaped: %s \n " ) ;
return ;
}
myCollation = ucol_openRules ( rule , length , UCOL_ON , UCOL_TERTIARY , & parse_error , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
2014-02-25 21:21:49 +00:00
log_info ( " offset=%d \" %s \" | \" %s \" \n " ,
parse_error . offset ,
aescstrdup ( parse_error . preContext , - 1 ) ,
aescstrdup ( parse_error . postContext , - 1 ) ) ;
2010-06-14 21:27:44 +00:00
return ;
}
log_verbose ( " Testing the <<* syntax \n " ) ;
ucol_setAttribute ( myCollation , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
for ( testcase_no = 0 ; testcase_no < n_testcases ; + + testcase_no ) {
doTest ( myCollation ,
testcases [ testcase_no ] . source ,
testcases [ testcase_no ] . target ,
testcases [ testcase_no ] . result
) ;
}
ucol_close ( myCollation ) ;
}
}
const static OneTestCase rangeTestcases [ ] = {
{ { 0x0061 } , { 0x0062 } , UCOL_LESS } , /* "a" < "b" */
{ { 0x0062 } , { 0x0063 } , UCOL_LESS } , /* "b" < "c" */
{ { 0x0061 } , { 0x0063 } , UCOL_LESS } , /* "a" < "c" */
{ { 0x0062 } , { 0x006b } , UCOL_LESS } , /* "b" << "k" */
{ { 0x006b } , { 0x006c } , UCOL_LESS } , /* "k" << "l" */
{ { 0x0062 } , { 0x006c } , UCOL_LESS } , /* "b" << "l" */
{ { 0x0061 } , { 0x006c } , UCOL_LESS } , /* "a" < "l" */
{ { 0x0061 } , { 0x006d } , UCOL_LESS } , /* "a" < "m" */
{ { 0x0079 } , { 0x006d } , UCOL_LESS } , /* "y" < "f" */
{ { 0x0079 } , { 0x0067 } , UCOL_LESS } , /* "y" < "g" */
{ { 0x0061 } , { 0x0068 } , UCOL_LESS } , /* "y" < "h" */
{ { 0x0061 } , { 0x0065 } , UCOL_LESS } , /* "g" < "e" */
{ { 0x0061 } , { 0x0031 } , UCOL_EQUAL } , /* "a" = "1" */
{ { 0x0061 } , { 0x0032 } , UCOL_EQUAL } , /* "a" = "2" */
{ { 0x0061 } , { 0x0033 } , UCOL_EQUAL } , /* "a" = "3" */
{ { 0x0061 } , { 0x0066 } , UCOL_LESS } , /* "a" < "f" */
{ { 0x006c , 0x0061 } , { 0x006b , 0x0062 } , UCOL_LESS } , /* "la" < "123" */
{ { 0x0061 , 0x0061 , 0x0061 } , { 0x0031 , 0x0032 , 0x0033 } , UCOL_EQUAL } , /* "aaa" = "123" */
{ { 0x0062 } , { 0x007a } , UCOL_LESS } , /* "b" < "z" */
{ { 0x0061 , 0x007a , 0x0062 } , { 0x0032 , 0x0079 , 0x006d } , UCOL_LESS } , /* "azm" = "2yc" */
2010-02-09 19:59:06 +00:00
} ;
2010-06-14 21:27:44 +00:00
static int nRangeTestcases = LEN ( rangeTestcases ) ;
const static OneTestCase rangeTestcasesSupplemental [ ] = {
2014-02-25 21:21:49 +00:00
{ { 0x4e00 } , { 0xfffb } , UCOL_LESS } , /* U+4E00 < U+FFFB */
{ { 0xfffb } , { 0xd800 , 0xdc00 } , UCOL_LESS } , /* U+FFFB < U+10000 */
2010-06-14 21:27:44 +00:00
{ { 0xd800 , 0xdc00 } , { 0xd800 , 0xdc01 } , UCOL_LESS } , /* U+10000 < U+10001 */
2014-02-25 21:21:49 +00:00
{ { 0x4e00 } , { 0xd800 , 0xdc01 } , UCOL_LESS } , /* U+4E00 < U+10001 */
2010-06-14 21:27:44 +00:00
{ { 0xd800 , 0xdc01 } , { 0xd800 , 0xdc02 } , UCOL_LESS } , /* U+10000 < U+10001 */
{ { 0xd800 , 0xdc01 } , { 0xd800 , 0xdc02 } , UCOL_LESS } , /* U+10000 < U+10001 */
2014-02-25 21:21:49 +00:00
{ { 0x4e00 } , { 0xd800 , 0xdc02 } , UCOL_LESS } , /* U+4E00 < U+10001 */
2010-02-09 19:59:06 +00:00
} ;
2010-06-14 21:27:44 +00:00
static int nRangeTestcasesSupplemental = LEN ( rangeTestcasesSupplemental ) ;
const static OneTestCase rangeTestcasesQwerty [ ] = {
{ { 0x0071 } , { 0x0077 } , UCOL_LESS } , /* "q" < "w" */
{ { 0x0077 } , { 0x0065 } , UCOL_LESS } , /* "w" < "e" */
{ { 0x0079 } , { 0x0075 } , UCOL_LESS } , /* "y" < "u" */
{ { 0x0071 } , { 0x0075 } , UCOL_LESS } , /* "q" << "u" */
{ { 0x0074 } , { 0x0069 } , UCOL_LESS } , /* "t" << "i" */
{ { 0x006f } , { 0x0070 } , UCOL_LESS } , /* "o" << "p" */
{ { 0x0079 } , { 0x0065 } , UCOL_LESS } , /* "y" < "e" */
{ { 0x0069 } , { 0x0075 } , UCOL_LESS } , /* "i" < "u" */
{ { 0x0071 , 0x0075 , 0x0065 , 0x0073 , 0x0074 } ,
{ 0x0077 , 0x0065 , 0x0072 , 0x0065 } , UCOL_LESS } , /* "quest" < "were" */
{ { 0x0071 , 0x0075 , 0x0061 , 0x0063 , 0x006b } ,
{ 0x0071 , 0x0075 , 0x0065 , 0x0073 , 0x0074 } , UCOL_LESS } , /* "quack" < "quest" */
2010-02-09 19:59:06 +00:00
} ;
2010-06-14 21:27:44 +00:00
static int nRangeTestcasesQwerty = LEN ( rangeTestcasesQwerty ) ;
2010-02-09 19:59:06 +00:00
static void TestSameStrengthList ( void )
{
2010-06-14 21:27:44 +00:00
const char * strRules [ ] = {
/* Normal */
" &a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3 " ,
2010-02-09 19:59:06 +00:00
2010-06-14 21:27:44 +00:00
/* Lists */
" &a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123 " ,
} ;
doTestOneTestCase ( rangeTestcases , nRangeTestcases , strRules , LEN ( strRules ) ) ;
}
2010-02-09 19:59:06 +00:00
2010-06-14 21:27:44 +00:00
static void TestSameStrengthListQuoted ( void )
{
const char * strRules [ ] = {
/* Lists with quoted characters */
2010-10-14 18:44:44 +00:00
" & \\ u0061<*bcd &b<<*klm &k<<<*xyz &y<*f \\ u0067 \\ u0068e &a=*123 " ,
2010-06-14 21:27:44 +00:00
" &' \\ u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f' \\ u0067 \\ u0068'e &a=*123 " ,
2010-10-14 18:44:44 +00:00
" & \\ u0061<*b \\ u0063d &b<<*klm &k<<<*xyz & \\ u0079<*fgh \\ u0065 &a=* \\ u0031 \\ u0032 \\ u0033 " ,
2010-06-14 21:27:44 +00:00
" &' \\ u0061'<*b' \\ u0063'd &b<<*klm &k<<<*xyz &' \\ u0079'<*fgh' \\ u0065' &a=*' \\ u0031 \\ u0032 \\ u0033' " ,
2010-10-14 18:44:44 +00:00
" & \\ u0061<* \\ u0062c \\ u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=* \\ u0031 \\ u0032 \\ u0033 " ,
2010-06-14 21:27:44 +00:00
" &' \\ u0061'<*' \\ u0062'c' \\ u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*' \\ u0031 \\ u0032 \\ u0033' " ,
} ;
doTestOneTestCase ( rangeTestcases , nRangeTestcases , strRules , LEN ( strRules ) ) ;
}
static void TestSameStrengthListSupplemental ( void )
{
const char * strRules [ ] = {
2014-02-25 21:21:49 +00:00
" & \\ u4e00< \\ ufffb< \\ U00010000< \\ U00010001< \\ U00010002 " ,
" & \\ u4e00< \\ ufffb< \\ ud800 \\ udc00< \\ ud800 \\ udc01< \\ ud800 \\ udc02 " ,
" & \\ u4e00<* \\ ufffb \\ U00010000 \\ U00010001 \\ U00010002 " ,
" & \\ u4e00<* \\ ufffb \\ ud800 \\ udc00 \\ ud800 \\ udc01 \\ ud800 \\ udc02 " ,
2010-06-14 21:27:44 +00:00
} ;
doTestOneTestCase ( rangeTestcasesSupplemental , nRangeTestcasesSupplemental , strRules , LEN ( strRules ) ) ;
}
static void TestSameStrengthListQwerty ( void )
{
const char * strRules [ ] = {
" &q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d " , /* Normal */
" &q<*wer &w<<*tyu &t<<<*iop &o=*asd " , /* Lists */
2010-10-14 18:44:44 +00:00
" & \\ u0071< \\ u0077< \\ u0065< \\ u0072 & \\ u0077<< \\ u0074<< \\ u0079<< \\ u0075 & \\ u0074<<< \\ u0069<<< \\ u006f<<< \\ u0070 & \\ u006f= \\ u0061= \\ u0073= \\ u0064 " ,
" &' \\ u0071'< \\ u0077< \\ u0065< \\ u0072 & \\ u0077<<' \\ u0074'<< \\ u0079<< \\ u0075 & \\ u0074<<< \\ u0069<<<' \\ u006f'<<< \\ u0070 & \\ u006f= \\ u0061=' \\ u0073'= \\ u0064 " ,
" & \\ u0071<* \\ u0077 \\ u0065 \\ u0072 & \\ u0077<<* \\ u0074 \\ u0079 \\ u0075 & \\ u0074<<<* \\ u0069 \\ u006f \\ u0070 & \\ u006f=* \\ u0061 \\ u0073 \\ u0064 " ,
2010-10-14 20:25:09 +00:00
/* Quoted characters also will work if two quoted characters are not consecutive. */
2010-10-14 18:44:44 +00:00
" & \\ u0071<*' \\ u0077' \\ u0065 \\ u0072 & \\ u0077<<* \\ u0074' \\ u0079' \\ u0075 & \\ u0074<<<* \\ u0069 \\ u006f' \\ u0070' &' \\ u006f'=* \\ u0061 \\ u0073 \\ u0064 " ,
/* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
2010-10-14 20:25:09 +00:00
/* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
2010-10-14 18:44:44 +00:00
} ;
doTestOneTestCase ( rangeTestcasesQwerty , nRangeTestcasesQwerty , strRules , LEN ( strRules ) ) ;
}
static void TestSameStrengthListQuotedQwerty ( void )
{
const char * strRules [ ] = {
" &q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d " , /* Normal */
" &q<*wer &w<<*tyu &t<<<*iop &o=*asd " , /* Lists */
2010-10-14 20:25:09 +00:00
" &q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd' " , /* Lists with quotes */
/* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
/* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
2010-10-14 18:44:44 +00:00
} ;
2010-06-14 21:27:44 +00:00
doTestOneTestCase ( rangeTestcasesQwerty , nRangeTestcasesQwerty , strRules , LEN ( strRules ) ) ;
}
static void TestSameStrengthListRanges ( void )
{
const char * strRules [ ] = {
" &a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3 " ,
} ;
doTestOneTestCase ( rangeTestcases , nRangeTestcases , strRules , LEN ( strRules ) ) ;
}
static void TestSameStrengthListSupplementalRanges ( void )
{
const char * strRules [ ] = {
2014-02-25 21:21:49 +00:00
/* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
" & \\ u4e00<* \\ ufffb \\ U00010000- \\ U00010002 " ,
2010-06-14 21:27:44 +00:00
} ;
doTestOneTestCase ( rangeTestcasesSupplemental , nRangeTestcasesSupplemental , strRules , LEN ( strRules ) ) ;
}
static void TestSpecialCharacters ( void )
{
const char * strRules [ ] = {
/* Normal */
" &';'<'+'<','<'-'<'&'<'*' " ,
/* List */
" &';'<*'+,-&*' " ,
/* Range */
" &';'<*'+'-'-&*' " ,
} ;
const static OneTestCase specialCharacterStrings [ ] = {
{ { 0x003b } , { 0x002b } , UCOL_LESS } , /* ; < + */
{ { 0x002b } , { 0x002c } , UCOL_LESS } , /* + < , */
{ { 0x002c } , { 0x002d } , UCOL_LESS } , /* , < - */
{ { 0x002d } , { 0x0026 } , UCOL_LESS } , /* - < & */
} ;
doTestOneTestCase ( specialCharacterStrings , LEN ( specialCharacterStrings ) , strRules , LEN ( strRules ) ) ;
}
2010-10-14 18:44:44 +00:00
static void TestPrivateUseCharacters ( void )
{
const char * strRules [ ] = {
/* Normal */
" &' \\ u5ea7'<' \\ uE2D8'<' \\ uE2D9'<' \\ uE2DA'<' \\ uE2DB'<' \\ uE2DC'<' \\ u4e8d' " ,
" & \\ u5ea7< \\ uE2D8< \\ uE2D9< \\ uE2DA< \\ uE2DB< \\ uE2DC< \\ u4e8d " ,
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x5ea7 } , { 0xe2d8 } , UCOL_LESS } ,
{ { 0xe2d8 } , { 0xe2d9 } , UCOL_LESS } ,
{ { 0xe2d9 } , { 0xe2da } , UCOL_LESS } ,
{ { 0xe2da } , { 0xe2db } , UCOL_LESS } ,
{ { 0xe2db } , { 0xe2dc } , UCOL_LESS } ,
{ { 0xe2dc } , { 0x4e8d } , UCOL_LESS } ,
} ;
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
}
static void TestPrivateUseCharactersInList ( void )
{
const char * strRules [ ] = {
/* List */
" &' \\ u5ea7'<*' \\ uE2D8 \\ uE2D9 \\ uE2DA \\ uE2DB \\ uE2DC \\ u4e8d' " ,
/* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
" & \\ u5ea7<* \\ uE2D8 \\ uE2D9 \\ uE2DA \\ uE2DB \\ uE2DC \\ u4e8d " ,
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x5ea7 } , { 0xe2d8 } , UCOL_LESS } ,
{ { 0xe2d8 } , { 0xe2d9 } , UCOL_LESS } ,
{ { 0xe2d9 } , { 0xe2da } , UCOL_LESS } ,
{ { 0xe2da } , { 0xe2db } , UCOL_LESS } ,
{ { 0xe2db } , { 0xe2dc } , UCOL_LESS } ,
{ { 0xe2dc } , { 0x4e8d } , UCOL_LESS } ,
} ;
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
}
static void TestPrivateUseCharactersInRange ( void )
{
const char * strRules [ ] = {
/* Range */
" &' \\ u5ea7'<*' \\ uE2D8'-' \\ uE2DC \\ u4e8d' " ,
" & \\ u5ea7<* \\ uE2D8- \\ uE2DC \\ u4e8d " ,
/* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x5ea7 } , { 0xe2d8 } , UCOL_LESS } ,
{ { 0xe2d8 } , { 0xe2d9 } , UCOL_LESS } ,
{ { 0xe2d9 } , { 0xe2da } , UCOL_LESS } ,
{ { 0xe2da } , { 0xe2db } , UCOL_LESS } ,
{ { 0xe2db } , { 0xe2dc } , UCOL_LESS } ,
{ { 0xe2dc } , { 0x4e8d } , UCOL_LESS } ,
} ;
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
}
2010-06-14 21:27:44 +00:00
static void TestInvalidListsAndRanges ( void )
{
const char * invalidRules [ ] = {
/* Range not in starred expression */
" & \\ ufffe< \\ uffff- \\ U00010002 " ,
/* Range without start */
" &a<*-c " ,
/* Range without end */
" &a<*b- " ,
/* More than one hyphen */
" &a<*b-g-l " ,
/* Range in the wrong order */
" &a<*k-b " ,
} ;
UChar rule [ 500 ] ;
UErrorCode status = U_ZERO_ERROR ;
UParseError parse_error ;
int n_rules = LEN ( invalidRules ) ;
int rule_no ;
int length ;
UCollator * myCollation ;
for ( rule_no = 0 ; rule_no < n_rules ; + + rule_no ) {
length = u_unescape ( invalidRules [ rule_no ] , rule , 500 ) ;
if ( length = = 0 ) {
log_err ( " ERROR: The rule cannot be unescaped: %s \n " ) ;
2010-02-09 19:59:06 +00:00
return ;
}
2010-06-14 21:27:44 +00:00
myCollation = ucol_openRules ( rule , length , UCOL_ON , UCOL_TERTIARY , & parse_error , & status ) ;
2013-03-21 01:42:01 +00:00
( void ) myCollation ; /* Suppress set but not used warning. */
2010-06-14 21:27:44 +00:00
if ( ! U_FAILURE ( status ) ) {
log_err ( " ERROR: Could not cause a failure as expected: \n " ) ;
2010-02-09 19:59:06 +00:00
}
2010-06-14 21:27:44 +00:00
status = U_ZERO_ERROR ;
}
2010-02-09 19:59:06 +00:00
}
2010-10-27 18:02:52 +00:00
/*
* This test ensures that characters placed before a character in a different script have the same lead byte
* in their collation key before and after script reordering .
*/
static void TestBeforeRuleWithScriptReordering ( void )
{
UParseError error ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * myCollation ;
char srules [ 500 ] = " &[before 1] \\ u03b1 < \\ u0e01 " ;
UChar rules [ 500 ] ;
uint32_t rulesLength = 0 ;
2010-11-03 02:41:22 +00:00
int32_t reorderCodes [ 1 ] = { USCRIPT_GREEK } ;
2010-10-30 00:42:12 +00:00
UCollationResult collResult ;
uint8_t baseKey [ 256 ] ;
uint32_t baseKeyLength ;
uint8_t beforeKey [ 256 ] ;
uint32_t beforeKeyLength ;
2010-10-27 18:02:52 +00:00
UChar base [ ] = { 0x03b1 } ; /* base */
int32_t baseLen = sizeof ( base ) / sizeof ( * base ) ;
UChar before [ ] = { 0x0e01 } ; /* ko kai */
int32_t beforeLen = sizeof ( before ) / sizeof ( * before ) ;
2010-10-30 00:42:12 +00:00
/*UChar *data[] = { before, base };
genericRulesStarter ( srules , data , 2 ) ; */
2010-11-02 02:21:57 +00:00
log_verbose ( " Testing the &[before 1] rule with [reorder grek] \n " ) ;
2010-10-30 00:42:12 +00:00
2013-03-21 01:42:01 +00:00
( void ) beforeKeyLength ; /* Suppress set but not used warnings. */
( void ) baseKeyLength ;
2010-10-28 20:28:40 +00:00
2010-10-30 00:42:12 +00:00
/* build collator */
2010-11-01 22:23:49 +00:00
log_verbose ( " Testing the &[before 1] rule with [scriptReorder grek] \n " ) ;
2010-10-27 18:02:52 +00:00
rulesLength = u_unescape ( srules , rules , LEN ( rules ) ) ;
myCollation = ucol_openRules ( rules , rulesLength , UCOL_ON , UCOL_TERTIARY , & error , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
2010-10-30 00:42:12 +00:00
/* check collation results - before rule applied but not script reordering */
2010-10-28 20:28:40 +00:00
collResult = ucol_strcoll ( myCollation , base , baseLen , before , beforeLen ) ;
2010-10-30 00:42:12 +00:00
if ( collResult ! = UCOL_GREATER ) {
log_err ( " Collation result not correct before script reordering = %d \n " , collResult ) ;
}
2010-10-27 18:02:52 +00:00
2010-10-30 00:42:12 +00:00
/* check the lead byte of the collation keys before script reordering */
2010-10-28 20:28:40 +00:00
baseKeyLength = ucol_getSortKey ( myCollation , base , baseLen , baseKey , 256 ) ;
beforeKeyLength = ucol_getSortKey ( myCollation , before , beforeLen , beforeKey , 256 ) ;
2010-10-27 18:02:52 +00:00
if ( baseKey [ 0 ] ! = beforeKey [ 0 ] ) {
log_err ( " Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x \n " , baseKey [ 0 ] , beforeKey [ 0 ] ) ;
}
2010-10-30 00:42:12 +00:00
/* reorder the scripts */
2010-11-03 02:41:22 +00:00
ucol_setReorderCodes ( myCollation , reorderCodes , 1 , & status ) ;
2010-11-02 02:21:57 +00:00
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: while setting script order: %s \n " , myErrorName ( status ) ) ;
return ;
}
2010-10-27 18:02:52 +00:00
2010-10-30 00:42:12 +00:00
/* check collation results - before rule applied and after script reordering */
2010-10-27 18:02:52 +00:00
collResult = ucol_strcoll ( myCollation , base , baseLen , before , beforeLen ) ;
2010-10-30 00:42:12 +00:00
if ( collResult ! = UCOL_GREATER ) {
log_err ( " Collation result not correct after script reordering = %d \n " , collResult ) ;
}
/* check the lead byte of the collation keys after script reordering */
2010-10-27 18:02:52 +00:00
ucol_getSortKey ( myCollation , base , baseLen , baseKey , 256 ) ;
ucol_getSortKey ( myCollation , before , beforeLen , beforeKey , 256 ) ;
if ( baseKey [ 0 ] ! = beforeKey [ 0 ] ) {
2010-10-30 00:42:12 +00:00
log_err ( " Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x \n " , baseKey [ 0 ] , beforeKey [ 0 ] ) ;
2010-10-27 18:02:52 +00:00
}
ucol_close ( myCollation ) ;
}
2010-11-04 20:12:39 +00:00
/*
2010-11-05 18:43:45 +00:00
* Test that in a primary - compressed sort key all bytes except the first one are unchanged under script reordering .
2010-11-04 20:12:39 +00:00
*/
static void TestNonLeadBytesDuringCollationReordering ( void )
{
UErrorCode status = U_ZERO_ERROR ;
UCollator * myCollation ;
int32_t reorderCodes [ 1 ] = { USCRIPT_GREEK } ;
uint8_t baseKey [ 256 ] ;
uint32_t baseKeyLength ;
uint8_t reorderKey [ 256 ] ;
uint32_t reorderKeyLength ;
UChar testString [ ] = { 0x03b1 , 0x03b2 , 0x03b3 } ;
2011-06-10 18:56:08 +00:00
uint32_t i ;
2010-11-04 20:12:39 +00:00
log_verbose ( " Testing non-lead bytes in a sort key with and without reordering \n " ) ;
/* build collator tertiary */
myCollation = ucol_open ( " " , & status ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
baseKeyLength = ucol_getSortKey ( myCollation , testString , LEN ( testString ) , baseKey , 256 ) ;
ucol_setReorderCodes ( myCollation , reorderCodes , LEN ( reorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: setting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
reorderKeyLength = ucol_getSortKey ( myCollation , testString , LEN ( testString ) , reorderKey , 256 ) ;
if ( baseKeyLength ! = reorderKeyLength ) {
2011-04-07 18:33:27 +00:00
log_err ( " Key lengths not the same during reordering. \n " ) ;
2010-11-04 20:12:39 +00:00
return ;
}
for ( i = 1 ; i < baseKeyLength ; i + + ) {
if ( baseKey [ i ] ! = reorderKey [ i ] ) {
log_err ( " Collation key bytes not the same at position %d. \n " , i ) ;
return ;
}
}
ucol_close ( myCollation ) ;
/* build collator quaternary */
myCollation = ucol_open ( " " , & status ) ;
ucol_setStrength ( myCollation , UCOL_QUATERNARY ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
baseKeyLength = ucol_getSortKey ( myCollation , testString , LEN ( testString ) , baseKey , 256 ) ;
ucol_setReorderCodes ( myCollation , reorderCodes , LEN ( reorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: setting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
reorderKeyLength = ucol_getSortKey ( myCollation , testString , LEN ( testString ) , reorderKey , 256 ) ;
if ( baseKeyLength ! = reorderKeyLength ) {
2011-04-07 18:33:27 +00:00
log_err ( " Key lengths not the same during reordering. \n " ) ;
2010-11-04 20:12:39 +00:00
return ;
}
for ( i = 1 ; i < baseKeyLength ; i + + ) {
if ( baseKey [ i ] ! = reorderKey [ i ] ) {
log_err ( " Collation key bytes not the same at position %d. \n " , i ) ;
return ;
}
}
ucol_close ( myCollation ) ;
}
2010-11-05 20:01:14 +00:00
/*
* Test reordering API .
*/
static void TestReorderingAPI ( void )
{
UErrorCode status = U_ZERO_ERROR ;
UCollator * myCollation ;
int32_t reorderCodes [ 3 ] = { USCRIPT_GREEK , USCRIPT_HAN , UCOL_REORDER_CODE_PUNCTUATION } ;
2011-04-12 18:23:27 +00:00
int32_t duplicateReorderCodes [ ] = { USCRIPT_CUNEIFORM , USCRIPT_GREEK , UCOL_REORDER_CODE_CURRENCY , USCRIPT_EGYPTIAN_HIEROGLYPHS } ;
2011-05-09 23:54:46 +00:00
int32_t reorderCodesStartingWithDefault [ ] = { UCOL_REORDER_CODE_DEFAULT , USCRIPT_GREEK , USCRIPT_HAN , UCOL_REORDER_CODE_PUNCTUATION } ;
2010-11-05 20:01:14 +00:00
UCollationResult collResult ;
int32_t retrievedReorderCodesLength ;
2011-03-18 22:52:30 +00:00
int32_t retrievedReorderCodes [ 10 ] ;
2010-11-05 20:01:14 +00:00
UChar greekString [ ] = { 0x03b1 } ;
UChar punctuationString [ ] = { 0x203e } ;
2011-03-18 22:52:30 +00:00
int loopIndex ;
2010-11-05 20:01:14 +00:00
log_verbose ( " Testing non-lead bytes in a sort key with and without reordering \n " ) ;
/* build collator tertiary */
myCollation = ucol_open ( " " , & status ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
/* set the reorderding */
ucol_setReorderCodes ( myCollation , reorderCodes , LEN ( reorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: setting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
2011-03-18 22:52:30 +00:00
/* get the reordering */
2010-11-05 20:01:14 +00:00
retrievedReorderCodesLength = ucol_getReorderCodes ( myCollation , NULL , 0 , & status ) ;
2010-11-05 21:05:42 +00:00
if ( status ! = U_BUFFER_OVERFLOW_ERROR ) {
log_err_status ( status , " ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s \n " , myErrorName ( status ) ) ;
2010-11-05 20:01:14 +00:00
return ;
}
2010-11-05 21:05:42 +00:00
status = U_ZERO_ERROR ;
2010-11-05 20:01:14 +00:00
if ( retrievedReorderCodesLength ! = LEN ( reorderCodes ) ) {
log_err_status ( status , " ERROR: retrieved reorder codes length was %d but should have been %d \n " , retrievedReorderCodesLength , LEN ( reorderCodes ) ) ;
return ;
}
2011-03-18 22:52:30 +00:00
/* now let's really get it */
retrievedReorderCodesLength = ucol_getReorderCodes ( myCollation , retrievedReorderCodes , LEN ( retrievedReorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: getting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
if ( retrievedReorderCodesLength ! = LEN ( reorderCodes ) ) {
log_err_status ( status , " ERROR: retrieved reorder codes length was %d but should have been %d \n " , retrievedReorderCodesLength , LEN ( reorderCodes ) ) ;
return ;
}
for ( loopIndex = 0 ; loopIndex < retrievedReorderCodesLength ; loopIndex + + ) {
if ( retrievedReorderCodes [ loopIndex ] ! = reorderCodes [ loopIndex ] ) {
log_err_status ( status , " ERROR: retrieved reorder code doesn't match set reorder code at index %d \n " , loopIndex ) ;
return ;
}
}
2010-11-05 20:01:14 +00:00
collResult = ucol_strcoll ( myCollation , greekString , LEN ( greekString ) , punctuationString , LEN ( punctuationString ) ) ;
if ( collResult ! = UCOL_LESS ) {
log_err_status ( status , " ERROR: collation result should have been UCOL_LESS \n " ) ;
return ;
}
/* clear the reordering */
ucol_setReorderCodes ( myCollation , NULL , 0 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: setting reorder codes to NULL: %s \n " , myErrorName ( status ) ) ;
return ;
}
2011-03-18 22:52:30 +00:00
/* get the reordering again */
2010-11-05 20:01:14 +00:00
retrievedReorderCodesLength = ucol_getReorderCodes ( myCollation , NULL , 0 , & status ) ;
if ( retrievedReorderCodesLength ! = 0 ) {
log_err_status ( status , " ERROR: retrieved reorder codes length was %d but should have been %d \n " , retrievedReorderCodesLength , 0 ) ;
return ;
}
collResult = ucol_strcoll ( myCollation , greekString , LEN ( greekString ) , punctuationString , LEN ( punctuationString ) ) ;
if ( collResult ! = UCOL_GREATER ) {
log_err_status ( status , " ERROR: collation result should have been UCOL_GREATER \n " ) ;
return ;
}
2011-04-12 18:23:27 +00:00
/* test for error condition on duplicate reorder codes */
ucol_setReorderCodes ( myCollation , duplicateReorderCodes , LEN ( duplicateReorderCodes ) , & status ) ;
if ( ! U_FAILURE ( status ) ) {
2011-05-09 23:54:46 +00:00
log_err_status ( status , " ERROR: setting duplicate reorder codes did not generate a failure \n " ) ;
return ;
}
status = U_ZERO_ERROR ;
/* test for reorder codes after a reset code */
ucol_setReorderCodes ( myCollation , reorderCodesStartingWithDefault , LEN ( reorderCodesStartingWithDefault ) , & status ) ;
if ( ! U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: reorderd code sequence starting with default and having following codes didn't cause an error \n " ) ;
2011-04-12 18:23:27 +00:00
return ;
}
2010-11-05 20:01:14 +00:00
ucol_close ( myCollation ) ;
}
2011-03-18 22:52:30 +00:00
/*
* Test reordering API .
*/
static void TestReorderingAPIWithRuleCreatedCollator ( void )
{
UErrorCode status = U_ZERO_ERROR ;
UCollator * myCollation ;
UChar rules [ 90 ] ;
2013-09-13 21:21:11 +00:00
static const int32_t rulesReorderCodes [ 2 ] = { USCRIPT_HAN , USCRIPT_GREEK } ;
static const int32_t reorderCodes [ 3 ] = { USCRIPT_GREEK , USCRIPT_HAN , UCOL_REORDER_CODE_PUNCTUATION } ;
static const int32_t onlyDefault [ 1 ] = { UCOL_REORDER_CODE_DEFAULT } ;
2011-03-18 22:52:30 +00:00
UCollationResult collResult ;
int32_t retrievedReorderCodesLength ;
int32_t retrievedReorderCodes [ 10 ] ;
2013-09-13 21:21:11 +00:00
static const UChar greekString [ ] = { 0x03b1 } ;
static const UChar punctuationString [ ] = { 0x203e } ;
static const UChar hanString [ ] = { 0x65E5 , 0x672C } ;
2011-03-18 22:52:30 +00:00
int loopIndex ;
log_verbose ( " Testing non-lead bytes in a sort key with and without reordering \n " ) ;
/* build collator from rules */
u_uastrcpy ( rules , " [reorder Hani Grek] " ) ;
myCollation = ucol_openRules ( rules , u_strlen ( rules ) , UCOL_DEFAULT , UCOL_TERTIARY , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
/* get the reordering */
retrievedReorderCodesLength = ucol_getReorderCodes ( myCollation , retrievedReorderCodes , LEN ( retrievedReorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: getting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
if ( retrievedReorderCodesLength ! = LEN ( rulesReorderCodes ) ) {
log_err_status ( status , " ERROR: retrieved reorder codes length was %d but should have been %d \n " , retrievedReorderCodesLength , LEN ( rulesReorderCodes ) ) ;
return ;
}
for ( loopIndex = 0 ; loopIndex < retrievedReorderCodesLength ; loopIndex + + ) {
if ( retrievedReorderCodes [ loopIndex ] ! = rulesReorderCodes [ loopIndex ] ) {
log_err_status ( status , " ERROR: retrieved reorder code doesn't match set reorder code at index %d \n " , loopIndex ) ;
return ;
}
}
collResult = ucol_strcoll ( myCollation , greekString , LEN ( greekString ) , hanString , LEN ( hanString ) ) ;
if ( collResult ! = UCOL_GREATER ) {
2013-09-13 21:21:11 +00:00
log_err_status ( status , " ERROR: collation result should have been UCOL_GREATER \n " ) ;
2011-03-18 22:52:30 +00:00
return ;
}
2013-09-13 21:21:11 +00:00
/* set the reordering */
2011-03-18 22:52:30 +00:00
ucol_setReorderCodes ( myCollation , reorderCodes , LEN ( reorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: setting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
2013-09-13 21:21:11 +00:00
2011-03-18 22:52:30 +00:00
/* get the reordering */
retrievedReorderCodesLength = ucol_getReorderCodes ( myCollation , NULL , 0 , & status ) ;
if ( status ! = U_BUFFER_OVERFLOW_ERROR ) {
log_err_status ( status , " ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s \n " , myErrorName ( status ) ) ;
return ;
}
status = U_ZERO_ERROR ;
if ( retrievedReorderCodesLength ! = LEN ( reorderCodes ) ) {
log_err_status ( status , " ERROR: retrieved reorder codes length was %d but should have been %d \n " , retrievedReorderCodesLength , LEN ( reorderCodes ) ) ;
return ;
}
/* now let's really get it */
retrievedReorderCodesLength = ucol_getReorderCodes ( myCollation , retrievedReorderCodes , LEN ( retrievedReorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: getting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
if ( retrievedReorderCodesLength ! = LEN ( reorderCodes ) ) {
log_err_status ( status , " ERROR: retrieved reorder codes length was %d but should have been %d \n " , retrievedReorderCodesLength , LEN ( reorderCodes ) ) ;
return ;
}
for ( loopIndex = 0 ; loopIndex < retrievedReorderCodesLength ; loopIndex + + ) {
if ( retrievedReorderCodes [ loopIndex ] ! = reorderCodes [ loopIndex ] ) {
log_err_status ( status , " ERROR: retrieved reorder code doesn't match set reorder code at index %d \n " , loopIndex ) ;
return ;
}
}
collResult = ucol_strcoll ( myCollation , greekString , LEN ( greekString ) , punctuationString , LEN ( punctuationString ) ) ;
if ( collResult ! = UCOL_LESS ) {
log_err_status ( status , " ERROR: collation result should have been UCOL_LESS \n " ) ;
return ;
}
2013-09-13 21:21:11 +00:00
2011-03-18 22:52:30 +00:00
/* clear the reordering */
ucol_setReorderCodes ( myCollation , NULL , 0 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: setting reorder codes to NULL: %s \n " , myErrorName ( status ) ) ;
return ;
}
/* get the reordering again */
retrievedReorderCodesLength = ucol_getReorderCodes ( myCollation , NULL , 0 , & status ) ;
if ( retrievedReorderCodesLength ! = 0 ) {
log_err_status ( status , " ERROR: retrieved reorder codes length was %d but should have been %d \n " , retrievedReorderCodesLength , 0 ) ;
return ;
}
collResult = ucol_strcoll ( myCollation , greekString , LEN ( greekString ) , punctuationString , LEN ( punctuationString ) ) ;
if ( collResult ! = UCOL_GREATER ) {
log_err_status ( status , " ERROR: collation result should have been UCOL_GREATER \n " ) ;
return ;
}
2013-09-13 21:21:11 +00:00
/* reset the reordering */
ucol_setReorderCodes ( myCollation , onlyDefault , 1 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: setting reorder codes to {default}: %s \n " , myErrorName ( status ) ) ;
return ;
}
retrievedReorderCodesLength = ucol_getReorderCodes ( myCollation , retrievedReorderCodes , LEN ( retrievedReorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: getting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
if ( retrievedReorderCodesLength ! = LEN ( rulesReorderCodes ) ) {
log_err_status ( status , " ERROR: retrieved reorder codes length was %d but should have been %d \n " , retrievedReorderCodesLength , LEN ( rulesReorderCodes ) ) ;
return ;
}
for ( loopIndex = 0 ; loopIndex < retrievedReorderCodesLength ; loopIndex + + ) {
if ( retrievedReorderCodes [ loopIndex ] ! = rulesReorderCodes [ loopIndex ] ) {
log_err_status ( status , " ERROR: retrieved reorder code doesn't match set reorder code at index %d \n " , loopIndex ) ;
return ;
}
}
2011-03-18 22:52:30 +00:00
ucol_close ( myCollation ) ;
}
static int compareUScriptCodes ( const void * a , const void * b )
{
return ( * ( int32_t * ) a - * ( int32_t * ) b ) ;
}
2011-06-10 18:56:08 +00:00
static void TestEquivalentReorderingScripts ( void ) {
2011-03-18 22:52:30 +00:00
UErrorCode status = U_ZERO_ERROR ;
int32_t equivalentScripts [ 50 ] ;
int32_t equivalentScriptsLength ;
int loopIndex ;
int32_t equivalentScriptsResult [ ] = {
USCRIPT_BOPOMOFO ,
USCRIPT_LISU ,
USCRIPT_LYCIAN ,
USCRIPT_CARIAN ,
USCRIPT_LYDIAN ,
USCRIPT_YI ,
USCRIPT_OLD_ITALIC ,
USCRIPT_GOTHIC ,
USCRIPT_DESERET ,
USCRIPT_SHAVIAN ,
USCRIPT_OSMANYA ,
USCRIPT_LINEAR_B ,
USCRIPT_CYPRIOT ,
USCRIPT_OLD_SOUTH_ARABIAN ,
USCRIPT_AVESTAN ,
USCRIPT_IMPERIAL_ARAMAIC ,
USCRIPT_INSCRIPTIONAL_PARTHIAN ,
USCRIPT_INSCRIPTIONAL_PAHLAVI ,
USCRIPT_UGARITIC ,
USCRIPT_OLD_PERSIAN ,
USCRIPT_CUNEIFORM ,
2011-12-08 22:37:24 +00:00
USCRIPT_EGYPTIAN_HIEROGLYPHS ,
USCRIPT_PHONETIC_POLLARD ,
USCRIPT_SORA_SOMPENG ,
USCRIPT_MEROITIC_CURSIVE ,
USCRIPT_MEROITIC_HIEROGLYPHS
2011-03-18 22:52:30 +00:00
} ;
2011-03-22 01:16:48 +00:00
2011-03-18 22:52:30 +00:00
qsort ( equivalentScriptsResult , LEN ( equivalentScriptsResult ) , sizeof ( int32_t ) , compareUScriptCodes ) ;
2011-03-24 00:09:06 +00:00
2011-03-18 22:52:30 +00:00
/* UScript.GOTHIC */
equivalentScriptsLength = ucol_getEquivalentReorderCodes ( USCRIPT_GOTHIC , equivalentScripts , LEN ( equivalentScripts ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: retrieving equivalent reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
2011-03-24 00:09:06 +00:00
/*
fprintf ( stdout , " @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ \n " ) ;
fprintf ( stdout , " equivalentScriptsLength = %d \n " , equivalentScriptsLength ) ;
for ( loopIndex = 0 ; loopIndex < equivalentScriptsLength ; loopIndex + + ) {
fprintf ( stdout , " %d = %x \n " , loopIndex , equivalentScripts [ loopIndex ] ) ;
}
*/
2011-03-18 22:52:30 +00:00
if ( equivalentScriptsLength ! = LEN ( equivalentScriptsResult ) ) {
log_err_status ( status , " ERROR: retrieved equivalent script length wrong: expected = %d, was = %d \n " , LEN ( equivalentScriptsResult ) , equivalentScriptsLength ) ;
return ;
}
for ( loopIndex = 0 ; loopIndex < equivalentScriptsLength ; loopIndex + + ) {
if ( equivalentScriptsResult [ loopIndex ] ! = equivalentScripts [ loopIndex ] ) {
log_err_status ( status , " ERROR: equivalent scripts results don't match: expected = %d, was = %d \n " , equivalentScriptsResult [ loopIndex ] , equivalentScripts [ loopIndex ] ) ;
return ;
}
}
/* UScript.SHAVIAN */
equivalentScriptsLength = ucol_getEquivalentReorderCodes ( USCRIPT_SHAVIAN , equivalentScripts , LEN ( equivalentScripts ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: retrieving equivalent reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
if ( equivalentScriptsLength ! = LEN ( equivalentScriptsResult ) ) {
log_err_status ( status , " ERROR: retrieved equivalent script length wrong: expected = %d, was = %d \n " , LEN ( equivalentScriptsResult ) , equivalentScriptsLength ) ;
return ;
}
for ( loopIndex = 0 ; loopIndex < equivalentScriptsLength ; loopIndex + + ) {
if ( equivalentScriptsResult [ loopIndex ] ! = equivalentScripts [ loopIndex ] ) {
log_err_status ( status , " ERROR: equivalent scripts results don't match: expected = %d, was = %d \n " , equivalentScriptsResult [ loopIndex ] , equivalentScripts [ loopIndex ] ) ;
return ;
}
}
}
2011-06-10 18:56:08 +00:00
static void TestReorderingAcrossCloning ( void )
2011-04-12 18:23:27 +00:00
{
UErrorCode status = U_ZERO_ERROR ;
UCollator * myCollation ;
int32_t reorderCodes [ 3 ] = { USCRIPT_GREEK , USCRIPT_HAN , UCOL_REORDER_CODE_PUNCTUATION } ;
UCollator * clonedCollation ;
int32_t retrievedReorderCodesLength ;
int32_t retrievedReorderCodes [ 10 ] ;
int loopIndex ;
log_verbose ( " Testing non-lead bytes in a sort key with and without reordering \n " ) ;
/* build collator tertiary */
myCollation = ucol_open ( " " , & status ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
/* set the reorderding */
ucol_setReorderCodes ( myCollation , reorderCodes , LEN ( reorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: setting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
/* clone the collator */
2013-09-17 19:48:50 +00:00
clonedCollation = ucol_safeClone ( myCollation , NULL , NULL , & status ) ;
2011-04-12 18:23:27 +00:00
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: cloning collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
/* get the reordering */
retrievedReorderCodesLength = ucol_getReorderCodes ( clonedCollation , retrievedReorderCodes , LEN ( retrievedReorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: getting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
if ( retrievedReorderCodesLength ! = LEN ( reorderCodes ) ) {
log_err_status ( status , " ERROR: retrieved reorder codes length was %d but should have been %d \n " , retrievedReorderCodesLength , LEN ( reorderCodes ) ) ;
return ;
}
for ( loopIndex = 0 ; loopIndex < retrievedReorderCodesLength ; loopIndex + + ) {
if ( retrievedReorderCodes [ loopIndex ] ! = reorderCodes [ loopIndex ] ) {
log_err_status ( status , " ERROR: retrieved reorder code doesn't match set reorder code at index %d \n " , loopIndex ) ;
return ;
}
}
/*uprv_free(buffer);*/
ucol_close ( myCollation ) ;
ucol_close ( clonedCollation ) ;
}
2011-03-18 22:52:30 +00:00
2010-10-30 00:42:12 +00:00
/*
2011-03-16 17:58:55 +00:00
* Utility function to test one collation reordering test case set .
2010-10-30 00:42:12 +00:00
* @ param testcases Array of test cases .
* @ param n_testcases Size of the array testcases .
2011-04-12 18:23:27 +00:00
* @ param reorderTokens Array of reordering codes .
* @ param reorderTokensLen Size of the array reorderTokens .
2010-10-30 00:42:12 +00:00
*/
2010-11-04 20:12:39 +00:00
static void doTestOneReorderingAPITestCase ( const OneTestCase testCases [ ] , uint32_t testCasesLen , const int32_t reorderTokens [ ] , int32_t reorderTokensLen )
2010-10-30 00:42:12 +00:00
{
2011-06-10 18:56:08 +00:00
uint32_t testCaseNum ;
2010-10-30 00:42:12 +00:00
UErrorCode status = U_ZERO_ERROR ;
UCollator * myCollation ;
2011-03-16 17:58:55 +00:00
myCollation = ucol_open ( " " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
ucol_setReorderCodes ( myCollation , reorderTokens , reorderTokensLen , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: while setting script order: %s \n " , myErrorName ( status ) ) ;
return ;
}
2010-11-02 02:21:57 +00:00
2011-03-16 17:58:55 +00:00
for ( testCaseNum = 0 ; testCaseNum < testCasesLen ; + + testCaseNum ) {
doTest ( myCollation ,
testCases [ testCaseNum ] . source ,
testCases [ testCaseNum ] . target ,
testCases [ testCaseNum ] . result
) ;
2010-10-30 00:42:12 +00:00
}
2011-03-16 17:58:55 +00:00
ucol_close ( myCollation ) ;
2010-10-30 00:42:12 +00:00
}
2010-10-27 18:02:52 +00:00
static void TestGreekFirstReorder ( void )
{
2010-10-30 00:42:12 +00:00
const char * strRules [ ] = {
2010-11-02 02:21:57 +00:00
" [reorder Grek] "
2010-10-30 00:42:12 +00:00
} ;
2010-10-27 18:02:52 +00:00
2010-10-30 00:42:12 +00:00
const int32_t apiRules [ ] = {
USCRIPT_GREEK
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x0391 } , { 0x0391 } , UCOL_EQUAL } ,
{ { 0x0041 } , { 0x0391 } , UCOL_GREATER } ,
{ { 0x03B1 , 0x0041 } , { 0x03B1 , 0x0391 } , UCOL_GREATER } ,
{ { 0x0060 } , { 0x0391 } , UCOL_LESS } ,
{ { 0x0391 } , { 0xe2dc } , UCOL_LESS } ,
{ { 0x0391 } , { 0x0060 } , UCOL_GREATER } ,
} ;
/* Test rules creation */
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
/* Test collation reordering API */
doTestOneReorderingAPITestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , apiRules , LEN ( apiRules ) ) ;
2010-10-27 18:02:52 +00:00
}
static void TestGreekLastReorder ( void )
{
2010-10-30 00:42:12 +00:00
const char * strRules [ ] = {
2010-11-02 02:21:57 +00:00
" [reorder Zzzz Grek] "
2010-10-30 00:42:12 +00:00
} ;
2010-10-27 18:02:52 +00:00
2010-10-30 00:42:12 +00:00
const int32_t apiRules [ ] = {
USCRIPT_UNKNOWN , USCRIPT_GREEK
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x0391 } , { 0x0391 } , UCOL_EQUAL } ,
{ { 0x0041 } , { 0x0391 } , UCOL_LESS } ,
{ { 0x03B1 , 0x0041 } , { 0x03B1 , 0x0391 } , UCOL_LESS } ,
{ { 0x0060 } , { 0x0391 } , UCOL_LESS } ,
{ { 0x0391 } , { 0xe2dc } , UCOL_GREATER } ,
} ;
/* Test rules creation */
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
/* Test collation reordering API */
doTestOneReorderingAPITestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , apiRules , LEN ( apiRules ) ) ;
2010-10-27 18:02:52 +00:00
}
static void TestNonScriptReorder ( void )
{
2010-10-30 00:42:12 +00:00
const char * strRules [ ] = {
2010-11-02 02:21:57 +00:00
" [reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy] "
2010-10-30 00:42:12 +00:00
} ;
2010-10-27 18:02:52 +00:00
2010-10-30 00:42:12 +00:00
const int32_t apiRules [ ] = {
2010-11-04 20:12:39 +00:00
USCRIPT_GREEK , UCOL_REORDER_CODE_SYMBOL , UCOL_REORDER_CODE_DIGIT , USCRIPT_LATIN ,
UCOL_REORDER_CODE_PUNCTUATION , UCOL_REORDER_CODE_SPACE , USCRIPT_UNKNOWN ,
UCOL_REORDER_CODE_CURRENCY
2010-10-30 00:42:12 +00:00
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x0391 } , { 0x0041 } , UCOL_LESS } ,
{ { 0x0041 } , { 0x0391 } , UCOL_GREATER } ,
{ { 0x0060 } , { 0x0041 } , UCOL_LESS } ,
{ { 0x0060 } , { 0x0391 } , UCOL_GREATER } ,
{ { 0x0024 } , { 0x0041 } , UCOL_GREATER } ,
} ;
/* Test rules creation */
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
/* Test collation reordering API */
doTestOneReorderingAPITestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , apiRules , LEN ( apiRules ) ) ;
}
static void TestHaniReorder ( void )
{
const char * strRules [ ] = {
2010-11-02 02:21:57 +00:00
" [reorder Hani] "
2010-10-30 00:42:12 +00:00
} ;
const int32_t apiRules [ ] = {
USCRIPT_HAN
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x4e00 } , { 0x0041 } , UCOL_LESS } ,
{ { 0x4e00 } , { 0x0060 } , UCOL_GREATER } ,
{ { 0xD86D , 0xDF40 } , { 0x0041 } , UCOL_LESS } ,
{ { 0xD86D , 0xDF40 } , { 0x0060 } , UCOL_GREATER } ,
{ { 0x4e00 } , { 0xD86D , 0xDF40 } , UCOL_LESS } ,
{ { 0xfa27 } , { 0x0041 } , UCOL_LESS } ,
{ { 0xD869 , 0xDF00 } , { 0x0041 } , UCOL_LESS } ,
} ;
/* Test rules creation */
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
/* Test collation reordering API */
doTestOneReorderingAPITestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , apiRules , LEN ( apiRules ) ) ;
2010-10-27 18:02:52 +00:00
}
2012-02-03 00:05:00 +00:00
static void TestHaniReorderWithOtherRules ( void )
{
const char * strRules [ ] = {
" [reorder Hani] &b<a "
} ;
2012-08-03 19:09:42 +00:00
/*const int32_t apiRules[] = {
2012-02-03 00:05:00 +00:00
USCRIPT_HAN
2012-08-03 19:09:42 +00:00
} ; */
2012-02-03 00:05:00 +00:00
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x4e00 } , { 0x0041 } , UCOL_LESS } ,
{ { 0x4e00 } , { 0x0060 } , UCOL_GREATER } ,
{ { 0xD86D , 0xDF40 } , { 0x0041 } , UCOL_LESS } ,
{ { 0xD86D , 0xDF40 } , { 0x0060 } , UCOL_GREATER } ,
{ { 0x4e00 } , { 0xD86D , 0xDF40 } , UCOL_LESS } ,
{ { 0xfa27 } , { 0x0041 } , UCOL_LESS } ,
{ { 0xD869 , 0xDF00 } , { 0x0041 } , UCOL_LESS } ,
{ { 0x0062 } , { 0x0061 } , UCOL_LESS } ,
} ;
/* Test rules creation */
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
}
2011-06-10 18:56:08 +00:00
static void TestMultipleReorder ( void )
2011-01-27 20:50:53 +00:00
{
const char * strRules [ ] = {
" [reorder Grek Zzzz DIGIT Latn Hani] "
} ;
const int32_t apiRules [ ] = {
USCRIPT_GREEK , USCRIPT_UNKNOWN , UCOL_REORDER_CODE_DIGIT , USCRIPT_LATIN , USCRIPT_HAN
} ;
const static OneTestCase collationTestCases [ ] = {
{ { 0x0391 } , { 0x0041 } , UCOL_LESS } ,
{ { 0x0031 } , { 0x0041 } , UCOL_LESS } ,
{ { 0x0041 } , { 0x4e00 } , UCOL_LESS } ,
} ;
/* Test rules creation */
doTestOneTestCase ( collationTestCases , LEN ( collationTestCases ) , strRules , LEN ( strRules ) ) ;
/* Test collation reordering API */
doTestOneReorderingAPITestCase ( collationTestCases , LEN ( collationTestCases ) , apiRules , LEN ( apiRules ) ) ;
}
2011-11-23 19:50:11 +00:00
/*
* Test that covers issue reported in ticket 8814
*/
2012-08-05 16:33:16 +00:00
static void TestReorderWithNumericCollation ( void )
2011-11-23 19:50:11 +00:00
{
UErrorCode status = U_ZERO_ERROR ;
UCollator * myCollation ;
UCollator * myReorderCollation ;
int32_t reorderCodes [ ] = { UCOL_REORDER_CODE_SPACE , UCOL_REORDER_CODE_PUNCTUATION , UCOL_REORDER_CODE_SYMBOL , UCOL_REORDER_CODE_DIGIT , USCRIPT_GREEK , USCRIPT_LATIN , USCRIPT_HEBREW , UCOL_REORDER_CODE_OTHERS } ;
/* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
UChar fortyThreeP [ ] = { 0x0034 , 0x0033 , 0x0050 } ; */
UChar fortyS [ ] = { 0x0053 } ;
UChar fortyThreeP [ ] = { 0x0050 } ;
uint8_t fortyS_sortKey [ 128 ] ;
int32_t fortyS_sortKey_Length ;
uint8_t fortyThreeP_sortKey [ 128 ] ;
int32_t fortyThreeP_sortKey_Length ;
uint8_t fortyS_sortKey_reorder [ 128 ] ;
int32_t fortyS_sortKey_reorder_Length ;
uint8_t fortyThreeP_sortKey_reorder [ 128 ] ;
int32_t fortyThreeP_sortKey_reorder_Length ;
UCollationResult collResult ;
UCollationResult collResultReorder ;
log_verbose ( " Testing reordering with and without numeric collation \n " ) ;
/* build collator tertiary with numeric */
myCollation = ucol_open ( " " , & status ) ;
/*
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
*/
ucol_setAttribute ( myCollation , UCOL_NUMERIC_COLLATION , UCOL_ON , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
/* build collator tertiary with numeric and reordering */
myReorderCollation = ucol_open ( " " , & status ) ;
/*
ucol_setStrength ( myReorderCollation , UCOL_TERTIARY ) ;
*/
ucol_setAttribute ( myReorderCollation , UCOL_NUMERIC_COLLATION , UCOL_ON , & status ) ;
ucol_setReorderCodes ( myReorderCollation , reorderCodes , LEN ( reorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
fortyS_sortKey_Length = ucol_getSortKey ( myCollation , fortyS , LEN ( fortyS ) , fortyS_sortKey , 128 ) ;
fortyThreeP_sortKey_Length = ucol_getSortKey ( myCollation , fortyThreeP , LEN ( fortyThreeP ) , fortyThreeP_sortKey , 128 ) ;
fortyS_sortKey_reorder_Length = ucol_getSortKey ( myReorderCollation , fortyS , LEN ( fortyS ) , fortyS_sortKey_reorder , 128 ) ;
fortyThreeP_sortKey_reorder_Length = ucol_getSortKey ( myReorderCollation , fortyThreeP , LEN ( fortyThreeP ) , fortyThreeP_sortKey_reorder , 128 ) ;
if ( fortyS_sortKey_Length < 0 | | fortyThreeP_sortKey_Length < 0 | | fortyS_sortKey_reorder_Length < 0 | | fortyThreeP_sortKey_reorder_Length < 0 ) {
log_err_status ( status , " ERROR: couldn't generate sort keys \n " ) ;
return ;
}
collResult = ucol_strcoll ( myCollation , fortyS , LEN ( fortyS ) , fortyThreeP , LEN ( fortyThreeP ) ) ;
collResultReorder = ucol_strcoll ( myReorderCollation , fortyS , LEN ( fortyS ) , fortyThreeP , LEN ( fortyThreeP ) ) ;
/*
fprintf ( stderr , " \t collResult = %x \n " , collResult ) ;
fprintf ( stderr , " \t collResultReorder = %x \n " , collResultReorder ) ;
fprintf ( stderr , " \n fortyS \n " ) ;
for ( i = 0 ; i < fortyS_sortKey_Length ; i + + ) {
fprintf ( stderr , " %x --- %x \n " , fortyS_sortKey [ i ] , fortyS_sortKey_reorder [ i ] ) ;
}
fprintf ( stderr , " \n fortyThreeP \n " ) ;
for ( i = 0 ; i < fortyThreeP_sortKey_Length ; i + + ) {
fprintf ( stderr , " %x --- %x \n " , fortyThreeP_sortKey [ i ] , fortyThreeP_sortKey_reorder [ i ] ) ;
}
*/
if ( collResult ! = collResultReorder ) {
log_err_status ( status , " ERROR: collation results should have been the same. \n " ) ;
return ;
}
ucol_close ( myCollation ) ;
ucol_close ( myReorderCollation ) ;
}
2010-11-01 22:23:49 +00:00
static int compare_uint8_t_arrays ( const uint8_t * a , const uint8_t * b )
{
for ( ; * a = = * b ; + + a , + + b ) {
if ( * a = = 0 ) {
return 0 ;
}
}
return ( * a < * b ? - 1 : 1 ) ;
}
2011-12-06 00:23:46 +00:00
static void TestImportRulesDeWithPhonebook ( void )
{
const char * normalRules [ ] = {
" &a< \\ u00e6< \\ u00c6< \\ u00dc< \\ u00fc " ,
" &a<< \\ u00e6<< \\ u00c6<< \\ u00dc<< \\ u00fc " ,
" &a<< \\ u00e6<<< \\ u00c6<< \\ u00dc<< \\ u00fc " ,
} ;
const OneTestCase normalTests [ ] = {
{ { 0x00e6 } , { 0x00c6 } , UCOL_LESS } ,
{ { 0x00fc } , { 0x00dc } , UCOL_GREATER } ,
} ;
const char * importRules [ ] = {
" &a< \\ u00e6< \\ u00c6< \\ u00dc< \\ u00fc[import de-u-co-phonebk] " ,
" &a<< \\ u00e6<< \\ u00c6<< \\ u00dc<< \\ u00fc[import de-u-co-phonebk] " ,
" &a<< \\ u00e6<<< \\ u00c6<< \\ u00dc<< \\ u00fc[import de-u-co-phonebk] " ,
} ;
const OneTestCase importTests [ ] = {
{ { 0x00e6 } , { 0x00c6 } , UCOL_LESS } ,
{ { 0x00fc } , { 0x00dc } , UCOL_LESS } ,
} ;
doTestOneTestCase ( normalTests , LEN ( normalTests ) , normalRules , LEN ( normalRules ) ) ;
doTestOneTestCase ( importTests , LEN ( importTests ) , importRules , LEN ( importRules ) ) ;
}
2012-08-03 21:51:00 +00:00
#if 0
2011-12-06 00:23:46 +00:00
static void TestImportRulesFiWithEor ( void )
{
/* DUCET. */
const char * defaultRules [ ] = {
" &a<b " , /* Dummy rule. */
} ;
const OneTestCase defaultTests [ ] = {
2011-12-09 21:29:27 +00:00
{ { 0x0110 } , { 0x00F0 } , UCOL_LESS } ,
{ { 0x00a3 } , { 0x00a5 } , UCOL_LESS } ,
{ { 0x0061 } , { 0x0061 , 0x00a3 } , UCOL_LESS } ,
2011-12-06 00:23:46 +00:00
} ;
/* European Ordering rules: ignore currency characters. */
const char * eorRules [ ] = {
" [import root-u-co-eor] " ,
} ;
const OneTestCase eorTests [ ] = {
2011-12-09 21:29:27 +00:00
{ { 0x0110 } , { 0x00F0 } , UCOL_LESS } ,
{ { 0x00a3 } , { 0x00a5 } , UCOL_EQUAL } ,
{ { 0x0061 } , { 0x0061 , 0x00a3 } , UCOL_EQUAL } ,
2011-12-06 00:23:46 +00:00
} ;
const char * fiStdRules [ ] = {
" [import fi-u-co-standard] " ,
} ;
const OneTestCase fiStdTests [ ] = {
2011-12-09 21:29:27 +00:00
{ { 0x0110 } , { 0x00F0 } , UCOL_GREATER } ,
{ { 0x00a3 } , { 0x00a5 } , UCOL_LESS } ,
{ { 0x0061 } , { 0x0061 , 0x00a3 } , UCOL_LESS } ,
2011-12-06 00:23:46 +00:00
} ;
/* Both European Ordering Rules and Fi Standard Rules. */
const char * eorFiStdRules [ ] = {
" [import root-u-co-eor][import fi-u-co-standard] " ,
} ;
/* This is essentially same as the one before once fi.txt is updated with import. */
const char * fiEorRules [ ] = {
" [import fi-u-co-eor] " ,
} ;
const OneTestCase fiEorTests [ ] = {
2011-12-09 21:29:27 +00:00
{ { 0x0110 } , { 0x00F0 } , UCOL_GREATER } ,
{ { 0x00a3 } , { 0x00a5 } , UCOL_EQUAL } ,
{ { 0x0061 } , { 0x0061 , 0x00a3 } , UCOL_EQUAL } ,
2011-12-06 00:23:46 +00:00
} ;
doTestOneTestCase ( defaultTests , LEN ( defaultTests ) , defaultRules , LEN ( defaultRules ) ) ;
doTestOneTestCase ( eorTests , LEN ( eorTests ) , eorRules , LEN ( eorRules ) ) ;
doTestOneTestCase ( fiStdTests , LEN ( fiStdTests ) , fiStdRules , LEN ( fiStdRules ) ) ;
doTestOneTestCase ( fiEorTests , LEN ( fiEorTests ) , eorFiStdRules , LEN ( eorFiStdRules ) ) ;
2013-09-17 06:26:26 +00:00
log_knownIssue ( " 8962 " , NULL ) ;
2011-12-06 00:23:46 +00:00
/* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
eor {
Sequence {
" [import root-u-co-eor][import fi-u-co-standard] "
}
Version { " 21.0 " }
}
*/
/* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
}
2012-08-03 21:51:00 +00:00
# endif
2011-12-06 00:23:46 +00:00
#if 0
/*
* This test case tests inclusion with the unihan rules , but this cannot be included now , unless
* the resource files are built with - includeUnihanColl option .
* TODO : Uncomment this function and make it work when unihan rules are built by default .
*/
static void TestImportRulesCJKWithUnihan ( void )
{
/* DUCET. */
const char * defaultRules [ ] = {
" &a<b " , /* Dummy rule. */
} ;
const OneTestCase defaultTests [ ] = {
2011-12-09 21:29:27 +00:00
{ { 0x3402 } , { 0x4e1e } , UCOL_GREATER } ,
2011-12-06 00:23:46 +00:00
} ;
/* European Ordering rules: ignore currency characters. */
const char * unihanRules [ ] = {
" [import ko-u-co-unihan] " ,
} ;
const OneTestCase unihanTests [ ] = {
2011-12-09 21:29:27 +00:00
{ { 0x3402 } , { 0x4e1e } , UCOL_LESS } ,
2011-12-06 00:23:46 +00:00
} ;
doTestOneTestCase ( defaultTests , LEN ( defaultTests ) , defaultRules , LEN ( defaultRules ) ) ;
doTestOneTestCase ( unihanTests , LEN ( unihanTests ) , unihanRules , LEN ( unihanRules ) ) ;
}
# endif
2010-11-01 22:23:49 +00:00
static void TestImport ( void )
{
UCollator * vicoll ;
UCollator * escoll ;
UCollator * viescoll ;
UCollator * importviescoll ;
UParseError error ;
UErrorCode status = U_ZERO_ERROR ;
UChar * virules ;
int32_t viruleslength ;
UChar * esrules ;
int32_t esruleslength ;
UChar * viesrules ;
int32_t viesruleslength ;
char srules [ 500 ] = " [import vi][import es] " ;
UChar rules [ 500 ] ;
uint32_t length = 0 ;
int32_t itemCount ;
int32_t i , k ;
UChar32 start ;
UChar32 end ;
UChar str [ 500 ] ;
int32_t strLength ;
uint8_t sk1 [ 500 ] ;
uint8_t sk2 [ 500 ] ;
UBool b ;
USet * tailoredSet ;
USet * importTailoredSet ;
vicoll = ucol_open ( " vi " , & status ) ;
2010-11-11 05:37:40 +00:00
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: Call ucol_open( \" vi \" , ...): %s \n " , myErrorName ( status ) ) ;
return ;
}
2010-11-01 22:23:49 +00:00
virules = ( UChar * ) ucol_getRules ( vicoll , & viruleslength ) ;
2014-04-01 19:59:27 +00:00
if ( viruleslength = = 0 ) {
log_data_err ( " missing vi tailoring rule string \n " ) ;
ucol_close ( vicoll ) ;
return ;
}
2010-11-01 22:23:49 +00:00
escoll = ucol_open ( " es " , & status ) ;
esrules = ( UChar * ) ucol_getRules ( escoll , & esruleslength ) ;
viesrules = ( UChar * ) uprv_malloc ( ( viruleslength + esruleslength + 1 ) * sizeof ( UChar * ) ) ;
viesrules [ 0 ] = 0 ;
u_strcat ( viesrules , virules ) ;
u_strcat ( viesrules , esrules ) ;
viesruleslength = viruleslength + esruleslength ;
viescoll = ucol_openRules ( viesrules , viesruleslength , UCOL_ON , UCOL_TERTIARY , & error , & status ) ;
/* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
length = u_unescape ( srules , rules , 500 ) ;
importviescoll = ucol_openRules ( rules , length , UCOL_ON , UCOL_TERTIARY , & error , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
tailoredSet = ucol_getTailoredSet ( viescoll , & status ) ;
importTailoredSet = ucol_getTailoredSet ( importviescoll , & status ) ;
if ( ! uset_equals ( tailoredSet , importTailoredSet ) ) {
log_err ( " Tailored sets not equal " ) ;
}
uset_close ( importTailoredSet ) ;
itemCount = uset_getItemCount ( tailoredSet ) ;
for ( i = 0 ; i < itemCount ; i + + ) {
strLength = uset_getItem ( tailoredSet , i , & start , & end , str , 500 , & status ) ;
if ( strLength < 2 ) {
for ( ; start < = end ; start + + ) {
k = 0 ;
U16_APPEND ( str , k , 500 , start , b ) ;
2013-03-21 01:42:01 +00:00
( void ) b ; /* Suppress set but not used warning. */
2010-11-01 22:23:49 +00:00
ucol_getSortKey ( viescoll , str , 1 , sk1 , 500 ) ;
ucol_getSortKey ( importviescoll , str , 1 , sk2 , 500 ) ;
if ( compare_uint8_t_arrays ( sk1 , sk2 ) ! = 0 ) {
log_err ( " Sort key for %s not equal \n " , str ) ;
break ;
}
}
} else {
ucol_getSortKey ( viescoll , str , strLength , sk1 , 500 ) ;
ucol_getSortKey ( importviescoll , str , strLength , sk2 , 500 ) ;
if ( compare_uint8_t_arrays ( sk1 , sk2 ) ! = 0 ) {
log_err ( " ZZSort key for %s not equal \n " , str ) ;
break ;
}
}
}
uset_close ( tailoredSet ) ;
2010-11-02 16:46:23 +00:00
uprv_free ( viesrules ) ;
ucol_close ( vicoll ) ;
ucol_close ( escoll ) ;
ucol_close ( viescoll ) ;
ucol_close ( importviescoll ) ;
2010-11-01 22:23:49 +00:00
}
static void TestImportWithType ( void )
{
UCollator * vicoll ;
UCollator * decoll ;
UCollator * videcoll ;
UCollator * importvidecoll ;
UParseError error ;
UErrorCode status = U_ZERO_ERROR ;
const UChar * virules ;
int32_t viruleslength ;
const UChar * derules ;
int32_t deruleslength ;
UChar * viderules ;
int32_t videruleslength ;
const char srules [ 500 ] = " [import vi][import de-u-co-phonebk] " ;
UChar rules [ 500 ] ;
uint32_t length = 0 ;
int32_t itemCount ;
int32_t i , k ;
UChar32 start ;
UChar32 end ;
UChar str [ 500 ] ;
int32_t strLength ;
uint8_t sk1 [ 500 ] ;
uint8_t sk2 [ 500 ] ;
USet * tailoredSet ;
USet * importTailoredSet ;
vicoll = ucol_open ( " vi " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
virules = ucol_getRules ( vicoll , & viruleslength ) ;
2014-04-01 19:59:27 +00:00
if ( viruleslength = = 0 ) {
log_data_err ( " missing vi tailoring rule string \n " ) ;
ucol_close ( vicoll ) ;
return ;
}
2010-11-01 22:23:49 +00:00
/* decoll = ucol_open("de@collation=phonebook", &status); */
decoll = ucol_open ( " de-u-co-phonebk " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
derules = ucol_getRules ( decoll , & deruleslength ) ;
viderules = ( UChar * ) uprv_malloc ( ( viruleslength + deruleslength + 1 ) * sizeof ( UChar * ) ) ;
viderules [ 0 ] = 0 ;
u_strcat ( viderules , virules ) ;
u_strcat ( viderules , derules ) ;
videruleslength = viruleslength + deruleslength ;
videcoll = ucol_openRules ( viderules , videruleslength , UCOL_ON , UCOL_TERTIARY , & error , & status ) ;
/* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
length = u_unescape ( srules , rules , 500 ) ;
importvidecoll = ucol_openRules ( rules , length , UCOL_ON , UCOL_TERTIARY , & error , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
tailoredSet = ucol_getTailoredSet ( videcoll , & status ) ;
importTailoredSet = ucol_getTailoredSet ( importvidecoll , & status ) ;
if ( ! uset_equals ( tailoredSet , importTailoredSet ) ) {
log_err ( " Tailored sets not equal " ) ;
}
uset_close ( importTailoredSet ) ;
itemCount = uset_getItemCount ( tailoredSet ) ;
for ( i = 0 ; i < itemCount ; i + + ) {
strLength = uset_getItem ( tailoredSet , i , & start , & end , str , 500 , & status ) ;
if ( strLength < 2 ) {
for ( ; start < = end ; start + + ) {
k = 0 ;
U16_APPEND_UNSAFE ( str , k , start ) ;
ucol_getSortKey ( videcoll , str , 1 , sk1 , 500 ) ;
ucol_getSortKey ( importvidecoll , str , 1 , sk2 , 500 ) ;
if ( compare_uint8_t_arrays ( sk1 , sk2 ) ! = 0 ) {
log_err ( " Sort key for %s not equal \n " , str ) ;
break ;
}
}
} else {
ucol_getSortKey ( videcoll , str , strLength , sk1 , 500 ) ;
ucol_getSortKey ( importvidecoll , str , strLength , sk2 , 500 ) ;
if ( compare_uint8_t_arrays ( sk1 , sk2 ) ! = 0 ) {
log_err ( " Sort key for %s not equal \n " , str ) ;
break ;
}
}
}
uset_close ( tailoredSet ) ;
2010-11-02 16:46:23 +00:00
uprv_free ( viderules ) ;
ucol_close ( videcoll ) ;
ucol_close ( importvidecoll ) ;
ucol_close ( vicoll ) ;
ucol_close ( decoll ) ;
2010-11-01 22:23:49 +00:00
}
2011-04-07 18:33:27 +00:00
/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
static const UChar longUpperStr1 [ ] = { /* 155 chars */
0x49 , 0x56 , 0x20 , 0x49 , 0x4E , 0x54 , 0x45 , 0x52 , 0x4E , 0x41 , 0x54 , 0x49 , 0x4F , 0x4E , 0x41 , 0x4C ,
0x20 , 0x53 , 0x43 , 0x49 , 0x45 , 0x4E , 0x54 , 0x49 , 0x46 , 0x49 , 0x43 , 0x20 , 0x2D , 0x20 , 0x50 , 0x52 ,
0x41 , 0x43 , 0x54 , 0x49 , 0x43 , 0x41 , 0x4C , 0x20 , 0x43 , 0x4F , 0x4E , 0x46 , 0x45 , 0x52 , 0x45 , 0x4E ,
0x43 , 0x45 , 0x20 , 0x22 , 0x47 , 0x45 , 0x4F , 0x50 , 0x4F , 0x4C , 0x49 , 0x54 , 0x49 , 0x43 , 0x53 , 0x2C ,
0x20 , 0x47 , 0x45 , 0x4F , 0x45 , 0x43 , 0x4F , 0x4E , 0x4F , 0x4D , 0x49 , 0x43 , 0x53 , 0x20 , 0x41 , 0x4E ,
0x44 , 0x20 , 0x49 , 0x4E , 0x54 , 0x45 , 0x52 , 0x4E , 0x41 , 0x54 , 0x49 , 0x4F , 0x4E , 0x41 , 0x4C , 0x20 ,
0x52 , 0x45 , 0x4C , 0x41 , 0x54 , 0x49 , 0x4F , 0x4E , 0x53 , 0x20 , 0x50 , 0x52 , 0x4F , 0x42 , 0x4C , 0x45 ,
0x4D , 0x53 , 0x22 , 0x20 , 0x32 , 0x32 , 0x2D , 0x32 , 0x33 , 0x20 , 0x4A , 0x75 , 0x6E , 0x65 , 0x20 , 0x32 ,
0x30 , 0x31 , 0x30 , 0x2C , 0x20 , 0x53 , 0x74 , 0x2E , 0x20 , 0x50 , 0x65 , 0x74 , 0x65 , 0x72 , 0x73 , 0x62 ,
0x75 , 0x72 , 0x67 , 0x2C , 0x20 , 0x52 , 0x75 , 0x73 , 0x73 , 0x69 , 0x61
} ;
/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
static const UChar longUpperStr2 [ ] = { /* 125 chars, > 128 collation elements */
0x42 , 0xC1 , 0x43 , 0xC9 , 0x44 , 0xCD , 0x46 , 0xD3 , 0x47 , 0xDA , 0x48 , 0xC0 , 0x4A , 0xC8 , 0x4B , 0xCC , 0x4C , 0xD2 , 0x4D , 0xD9 , 0x4E , 0xC2 , 0x50 , 0xCA , 0x20 ,
0x42 , 0xC1 , 0x43 , 0xC9 , 0x44 , 0xCD , 0x46 , 0xD3 , 0x47 , 0xDA , 0x48 , 0xC0 , 0x4A , 0xC8 , 0x4B , 0xCC , 0x4C , 0xD2 , 0x4D , 0xD9 , 0x4E , 0xC2 , 0x50 , 0xCA , 0x20 ,
0x42 , 0xC1 , 0x43 , 0xC9 , 0x44 , 0xCD , 0x46 , 0xD3 , 0x47 , 0xDA , 0x48 , 0xC0 , 0x4A , 0xC8 , 0x4B , 0xCC , 0x4C , 0xD2 , 0x4D , 0xD9 , 0x4E , 0xC2 , 0x50 , 0xCA , 0x20 ,
0x42 , 0xC1 , 0x43 , 0xC9 , 0x44 , 0xCD , 0x46 , 0xD3 , 0x47 , 0xDA , 0x48 , 0xC0 , 0x4A , 0xC8 , 0x4B , 0xCC , 0x4C , 0xD2 , 0x4D , 0xD9 , 0x4E , 0xC2 , 0x50 , 0xCA , 0x20 ,
0x42 , 0xC1 , 0x43 , 0xC9 , 0x44 , 0xCD , 0x46 , 0xD3 , 0x47 , 0xDA , 0x48 , 0xC0 , 0x4A , 0xC8 , 0x4B , 0xCC , 0x4C , 0xD2 , 0x4D , 0xD9 , 0x4E , 0xC2 , 0x50 , 0xCA , 0x20
} ;
/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
static const UChar longUpperStr3 [ ] = { /* 324 chars */
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20 ,
0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 , 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F , 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 , 0x58 , 0x59 , 0x5A , 0x20
} ;
# define MY_ARRAY_LEN(array) (sizeof(array) / sizeof(array[0]))
typedef struct {
const UChar * longUpperStrPtr ;
int32_t longUpperStrLen ;
} LongUpperStrItem ;
/* String pointers must be in reverse collation order of the corresponding strings */
static const LongUpperStrItem longUpperStrItems [ ] = {
{ longUpperStr1 , MY_ARRAY_LEN ( longUpperStr1 ) } ,
{ longUpperStr2 , MY_ARRAY_LEN ( longUpperStr2 ) } ,
{ longUpperStr3 , MY_ARRAY_LEN ( longUpperStr3 ) } ,
{ NULL , 0 }
} ;
2014-02-25 21:21:49 +00:00
enum { kCollKeyLenMax = 850 } ; /* may change with collation changes */
2011-04-07 18:33:27 +00:00
/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
static void TestCaseLevelBufferOverflow ( void )
{
UErrorCode status = U_ZERO_ERROR ;
UCollator * ucol = ucol_open ( " root " , & status ) ;
if ( U_SUCCESS ( status ) ) {
ucol_setAttribute ( ucol , UCOL_CASE_LEVEL , UCOL_ON , & status ) ;
if ( U_SUCCESS ( status ) ) {
const LongUpperStrItem * itemPtr ;
uint8_t sortKeyA [ kCollKeyLenMax ] , sortKeyB [ kCollKeyLenMax ] ;
for ( itemPtr = longUpperStrItems ; itemPtr - > longUpperStrPtr ! = NULL ; itemPtr + + ) {
int32_t sortKeyLen ;
if ( itemPtr > longUpperStrItems ) {
uprv_strcpy ( ( char * ) sortKeyB , ( char * ) sortKeyA ) ;
}
sortKeyLen = ucol_getSortKey ( ucol , itemPtr - > longUpperStrPtr , itemPtr - > longUpperStrLen , sortKeyA , kCollKeyLenMax ) ;
if ( sortKeyLen < = 0 | | sortKeyLen > kCollKeyLenMax ) {
log_err ( " ERROR sort key length from ucol_getSortKey is %d \n " , sortKeyLen ) ;
break ;
}
if ( itemPtr > longUpperStrItems ) {
int compareResult = uprv_strcmp ( ( char * ) sortKeyA , ( char * ) sortKeyB ) ;
if ( compareResult > = 0 ) {
log_err ( " ERROR in sort key comparison result, expected -1, got %d \n " , compareResult ) ;
}
}
}
} else {
log_err_status ( status , " ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s \n " , myErrorName ( status ) ) ;
}
ucol_close ( ucol ) ;
} else {
log_err_status ( status , " ERROR in ucol_open for root: %s \n " , myErrorName ( status ) ) ;
}
}
2013-12-11 22:01:45 +00:00
/* Test for #10595 */
static const UChar testJapaneseName [ ] = { 0x4F50 , 0x3005 , 0x6728 , 0x002C , 0x6B66 , 0 } ; /* Sa sa Ki, Takeshi */
# define KEY_PART_SIZE 16
static void TestNextSortKeyPartJaIdentical ( void )
{
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll ;
uint8_t keyPart [ KEY_PART_SIZE ] ;
UCharIterator iter ;
uint32_t state [ 2 ] = { 0 , 0 } ;
int32_t keyPartLen ;
coll = ucol_open ( " ja " , & status ) ;
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_IDENTICAL , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of Japanese collator with identical strength: %s \n " , myErrorName ( status ) ) ;
return ;
}
uiter_setString ( & iter , testJapaneseName , 5 ) ;
keyPartLen = KEY_PART_SIZE ;
while ( keyPartLen = = KEY_PART_SIZE ) {
keyPartLen = ucol_nextSortKeyPart ( coll , & iter , state , keyPart , KEY_PART_SIZE , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in iterating next sort key part: %s \n " , myErrorName ( status ) ) ;
break ;
}
}
ucol_close ( coll ) ;
}
2011-04-07 18:33:27 +00:00
2002-10-30 05:44:54 +00:00
# define TEST(x) addTest(root, &x, "tscoll / cmsccoll / " # x)
2001-03-20 07:22:33 +00:00
void addMiscCollTest ( TestNode * * root )
2001-05-17 23:09:35 +00:00
{
2004-04-28 05:31:19 +00:00
TEST ( TestRuleOptions ) ;
TEST ( TestBeforePrefixFailure ) ;
TEST ( TestContractionClosure ) ;
TEST ( TestPrefixCompose ) ;
TEST ( TestStrCollIdenticalPrefix ) ;
TEST ( TestPrefix ) ;
2004-11-11 23:34:58 +00:00
TEST ( TestNewJapanese ) ;
2004-04-28 05:31:19 +00:00
/*TEST(TestLimitations);*/
TEST ( TestNonChars ) ;
TEST ( TestExtremeCompression ) ;
TEST ( TestSurrogates ) ;
TEST ( TestVariableTopSetting ) ;
2014-02-25 21:21:49 +00:00
TEST ( TestMaxVariable ) ;
2004-04-28 05:31:19 +00:00
TEST ( TestBocsuCoverage ) ;
TEST ( TestCyrillicTailoring ) ;
TEST ( TestCase ) ;
TEST ( IncompleteCntTest ) ;
TEST ( BlackBirdTest ) ;
TEST ( FunkyATest ) ;
TEST ( BillFairmanTest ) ;
TEST ( TestChMove ) ;
TEST ( TestImplicitTailoring ) ;
TEST ( TestFCDProblem ) ;
TEST ( TestEmptyRule ) ;
2004-05-14 07:10:56 +00:00
/*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
2004-04-28 05:31:19 +00:00
TEST ( TestJ815 ) ;
/*TEST(TestJ831);*/ /* we changed lv locale */
TEST ( TestBefore ) ;
TEST ( TestHangulTailoring ) ;
TEST ( TestUCARules ) ;
TEST ( TestIncrementalNormalize ) ;
TEST ( TestComposeDecompose ) ;
TEST ( TestCompressOverlap ) ;
TEST ( TestContraction ) ;
TEST ( TestExpansion ) ;
/*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
/*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
2002-10-30 06:09:25 +00:00
TEST ( TestOptimize ) ;
2002-10-30 05:44:54 +00:00
TEST ( TestSuppressContractions ) ;
2003-02-20 01:13:36 +00:00
TEST ( Alexis2 ) ;
2003-04-30 23:26:55 +00:00
TEST ( TestHebrewUCA ) ;
TEST ( TestPartialSortKeyTermination ) ;
TEST ( TestSettings ) ;
2003-05-01 00:57:27 +00:00
TEST ( TestEquals ) ;
2003-05-29 21:15:26 +00:00
TEST ( TestJ2726 ) ;
2003-07-22 16:49:56 +00:00
TEST ( NullRule ) ;
2003-08-18 22:12:04 +00:00
TEST ( TestNumericCollation ) ;
2003-11-12 20:45:53 +00:00
TEST ( TestTibetanConformance ) ;
2004-01-16 07:14:08 +00:00
TEST ( TestPinyinProblem ) ;
2004-04-28 05:31:19 +00:00
TEST ( TestSeparateTrees ) ;
2004-05-08 07:59:36 +00:00
TEST ( TestBeforePinyin ) ;
TEST ( TestBeforeTightening ) ;
/*TEST(TestMoreBefore);*/
2004-05-14 07:10:56 +00:00
TEST ( TestTailorNULL ) ;
2005-09-17 06:26:58 +00:00
TEST ( TestUpperFirstQuaternary ) ;
2006-01-28 08:25:52 +00:00
TEST ( TestJ4960 ) ;
2006-07-06 06:30:06 +00:00
TEST ( TestJ5223 ) ;
2006-08-22 17:51:36 +00:00
TEST ( TestJ5232 ) ;
2006-09-07 20:12:11 +00:00
TEST ( TestJ5367 ) ;
2008-04-17 05:19:19 +00:00
TEST ( TestHiragana ) ;
2007-11-30 04:29:20 +00:00
TEST ( TestSortKeyConsistency ) ;
TEST ( TestVI5913 ) ; /* VI, RO tailored rules */
2007-12-21 00:08:12 +00:00
TEST ( TestCroatianSortKey ) ;
2008-04-04 22:47:43 +00:00
TEST ( TestTailor6179 ) ;
TEST ( TestUCAPrecontext ) ;
2008-06-11 21:37:03 +00:00
TEST ( TestOutOfBuffer5468 ) ;
2010-02-09 19:59:06 +00:00
TEST ( TestSameStrengthList ) ;
2010-11-01 22:23:49 +00:00
2010-06-14 21:27:44 +00:00
TEST ( TestSameStrengthListQuoted ) ;
TEST ( TestSameStrengthListSupplemental ) ;
TEST ( TestSameStrengthListQwerty ) ;
2010-10-14 18:44:44 +00:00
TEST ( TestSameStrengthListQuotedQwerty ) ;
2010-06-14 21:27:44 +00:00
TEST ( TestSameStrengthListRanges ) ;
TEST ( TestSameStrengthListSupplementalRanges ) ;
TEST ( TestSpecialCharacters ) ;
2010-10-14 18:44:44 +00:00
TEST ( TestPrivateUseCharacters ) ;
TEST ( TestPrivateUseCharactersInList ) ;
TEST ( TestPrivateUseCharactersInRange ) ;
2010-11-01 22:23:49 +00:00
TEST ( TestInvalidListsAndRanges ) ;
2011-12-06 00:23:46 +00:00
TEST ( TestImportRulesDeWithPhonebook ) ;
2012-02-02 22:53:19 +00:00
/* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
2011-12-06 00:23:46 +00:00
/* TEST(TestImportRulesCJKWithUnihan); */
2010-11-01 22:23:49 +00:00
TEST ( TestImport ) ;
TEST ( TestImportWithType ) ;
2010-10-30 00:42:12 +00:00
2010-11-04 20:12:39 +00:00
TEST ( TestBeforeRuleWithScriptReordering ) ;
TEST ( TestNonLeadBytesDuringCollationReordering ) ;
2010-11-05 20:01:14 +00:00
TEST ( TestReorderingAPI ) ;
2011-03-18 22:52:30 +00:00
TEST ( TestReorderingAPIWithRuleCreatedCollator ) ;
TEST ( TestEquivalentReorderingScripts ) ;
2010-10-30 00:42:12 +00:00
TEST ( TestGreekFirstReorder ) ;
TEST ( TestGreekLastReorder ) ;
TEST ( TestNonScriptReorder ) ;
TEST ( TestHaniReorder ) ;
2012-02-03 00:05:00 +00:00
TEST ( TestHaniReorderWithOtherRules ) ;
2011-01-27 20:50:53 +00:00
TEST ( TestMultipleReorder ) ;
2011-04-12 18:23:27 +00:00
TEST ( TestReorderingAcrossCloning ) ;
2012-08-03 21:51:00 +00:00
TEST ( TestReorderWithNumericCollation ) ;
2011-11-23 19:50:11 +00:00
2011-04-07 18:33:27 +00:00
TEST ( TestCaseLevelBufferOverflow ) ;
2013-12-11 22:01:45 +00:00
TEST ( TestNextSortKeyPartJaIdentical ) ;
2001-03-20 07:22:33 +00:00
}
2002-09-20 17:54:45 +00:00
# endif /* #if !UCONFIG_NO_COLLATION */