2010-06-14 21:27:44 +00:00
2001-03-02 00:42:43 +00:00
/********************************************************************
2001-05-17 23:09:35 +00:00
* COPYRIGHT :
2010-01-06 23:50:03 +00:00
* Copyright ( c ) 2001 - 2010 , International Business Machines Corporation and
2001-03-02 00:42:43 +00:00
* others . All Rights Reserved .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-03-22 19:33:43 +00:00
/*******************************************************************************
2001-03-02 00:42:43 +00:00
*
* File cmsccoll . C
*
2001-03-22 19:33:43 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-03-02 00:42:43 +00:00
/**
2001-03-22 19:33:43 +00:00
* These are the tests specific to ICU 1.8 and above , that I didn ' t know where
* to fit .
2001-03-02 00:42:43 +00:00
*/
2001-03-17 00:59:44 +00:00
# include <stdio.h>
2002-09-20 17:54:45 +00:00
2001-03-02 00:42:43 +00:00
# include "unicode/utypes.h"
2002-09-20 17:54:45 +00:00
# if !UCONFIG_NO_COLLATION
2001-03-02 00:42:43 +00:00
# include "unicode/ucol.h"
2001-03-15 22:29:33 +00:00
# include "unicode/ucoleitr.h"
2001-03-02 00:42:43 +00:00
# include "unicode/uloc.h"
# include "cintltst.h"
# include "ccolltst.h"
2001-03-06 03:42:35 +00:00
# include "callcoll.h"
2001-03-02 00:42:43 +00:00
# include "unicode/ustring.h"
# include "string.h"
2001-03-20 00:56:37 +00:00
# include "ucol_imp.h"
2001-03-20 07:22:33 +00:00
# include "ucol_tok.h"
# include "cmemory.h"
2001-05-25 22:00:24 +00:00
# include "cstring.h"
2003-12-29 03:43:38 +00:00
# include "uassert.h"
2001-08-16 00:58:53 +00:00
# include "unicode/parseerr.h"
2003-02-20 01:13:36 +00:00
# include "unicode/ucnv.h"
2008-01-05 01:27:56 +00:00
# include "unicode/ures.h"
2010-11-04 20:12:39 +00:00
# include "unicode/uscript.h"
2003-04-30 23:26:55 +00:00
# include "uparse.h"
2008-04-04 22:47:43 +00:00
# include "putilimp.h"
2001-03-02 00:42:43 +00:00
2004-04-28 05:31:19 +00:00
# define LEN(a) (sizeof(a) / sizeof(a[0]))
2001-03-30 03:49:29 +00:00
# define MAX_TOKEN_LEN 16
2003-05-15 17:38:42 +00:00
2007-07-19 00:19:21 +00:00
typedef UCollationResult tst_strcoll ( void * collator , const int object ,
2001-05-17 23:09:35 +00:00
const UChar * source , const int sLen ,
2001-04-13 00:05:42 +00:00
const UChar * target , const int tLen ) ;
2001-03-30 03:49:29 +00:00
2001-03-06 00:57:48 +00:00
2001-03-15 22:29:33 +00:00
2001-04-30 19:11:32 +00:00
const static char cnt1 [ ] [ 10 ] = {
2003-01-20 07:42:58 +00:00
2001-04-30 19:11:32 +00:00
" AA " ,
" AC " ,
" AZ " ,
" AQ " ,
" AB " ,
" ABZ " ,
" ABQ " ,
" Z " ,
" ABC " ,
" Q " ,
" B "
} ;
const static char cnt2 [ ] [ 10 ] = {
" DA " ,
" DAD " ,
" DAZ " ,
" MAR " ,
" Z " ,
" DAVIS " ,
" MARK " ,
" DAV " ,
" DAVI "
} ;
2002-03-28 18:26:25 +00:00
static void IncompleteCntTest ( void )
2001-03-14 18:55:56 +00:00
{
UErrorCode status = U_ZERO_ERROR ;
2001-10-17 02:19:48 +00:00
UChar temp [ 90 ] ;
UChar t1 [ 90 ] ;
UChar t2 [ 90 ] ;
2001-03-14 18:55:56 +00:00
UCollator * coll = NULL ;
uint32_t i = 0 , j = 0 ;
uint32_t size = 0 ;
2001-05-17 23:09:35 +00:00
2001-03-14 18:55:56 +00:00
u_uastrcpy ( temp , " & Z < ABC < Q < B " ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( temp , u_strlen ( temp ) , UCOL_OFF , UCOL_DEFAULT_STRENGTH , NULL , & status ) ;
2001-03-15 02:35:49 +00:00
2001-03-14 18:55:56 +00:00
if ( U_SUCCESS ( status ) ) {
size = sizeof ( cnt1 ) / sizeof ( cnt1 [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
2001-03-15 22:29:33 +00:00
UCollationElements * iter ;
2001-03-14 18:55:56 +00:00
u_uastrcpy ( t1 , cnt1 [ i ] ) ;
u_uastrcpy ( t2 , cnt1 [ j ] ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
2001-03-15 22:29:33 +00:00
/* synwee : added collation element iterator test */
iter = ucol_openElements ( coll , t2 , u_strlen ( t2 ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Creation of iterator failed \n " ) ;
break ;
}
backAndForth ( iter ) ;
2002-07-29 21:04:18 +00:00
ucol_closeElements ( iter ) ;
2001-03-14 18:55:56 +00:00
}
}
2001-05-17 23:09:35 +00:00
}
2001-03-02 00:42:43 +00:00
2001-03-14 18:55:56 +00:00
ucol_close ( coll ) ;
2001-03-02 00:42:43 +00:00
2001-03-14 18:55:56 +00:00
u_uastrcpy ( temp , " & Z < DAVIS < MARK <DAV " ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( temp , u_strlen ( temp ) , UCOL_OFF , UCOL_DEFAULT_STRENGTH , NULL , & status ) ;
2001-03-15 02:35:49 +00:00
2001-03-14 18:55:56 +00:00
if ( U_SUCCESS ( status ) ) {
size = sizeof ( cnt2 ) / sizeof ( cnt2 [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
2001-03-15 22:29:33 +00:00
UCollationElements * iter ;
2001-03-14 18:55:56 +00:00
u_uastrcpy ( t1 , cnt2 [ i ] ) ;
u_uastrcpy ( t2 , cnt2 [ j ] ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
2001-03-15 22:29:33 +00:00
/* synwee : added collation element iterator test */
iter = ucol_openElements ( coll , t2 , u_strlen ( t2 ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Creation of iterator failed \n " ) ;
break ;
}
backAndForth ( iter ) ;
2002-07-29 21:04:18 +00:00
ucol_closeElements ( iter ) ;
2001-03-14 18:55:56 +00:00
}
}
2001-05-17 23:09:35 +00:00
}
2001-03-14 18:55:56 +00:00
ucol_close ( coll ) ;
2001-03-02 00:42:43 +00:00
}
2001-03-14 18:55:56 +00:00
2001-03-15 02:35:49 +00:00
const static char shifted [ ] [ 20 ] = {
" black bird " ,
" black-bird " ,
" blackbird " ,
" black Bird " ,
" black-Bird " ,
" blackBird " ,
" black birds " ,
" black-birds " ,
" blackbirds "
} ;
const static UCollationResult shiftedTert [ ] = {
2007-07-19 00:19:21 +00:00
UCOL_EQUAL ,
2001-03-15 02:35:49 +00:00
UCOL_EQUAL ,
UCOL_EQUAL ,
UCOL_LESS ,
UCOL_EQUAL ,
UCOL_EQUAL ,
UCOL_LESS ,
UCOL_EQUAL ,
UCOL_EQUAL
} ;
const static char nonignorable [ ] [ 20 ] = {
" black bird " ,
" black Bird " ,
" black birds " ,
" black-bird " ,
" black-Bird " ,
" black-birds " ,
" blackbird " ,
" blackBird " ,
" blackbirds "
} ;
2002-03-28 18:26:25 +00:00
static void BlackBirdTest ( void ) {
2001-03-15 02:35:49 +00:00
UErrorCode status = U_ZERO_ERROR ;
2001-10-17 02:19:48 +00:00
UChar t1 [ 90 ] ;
UChar t2 [ 90 ] ;
2001-03-15 02:35:49 +00:00
uint32_t i = 0 , j = 0 ;
uint32_t size = 0 ;
2001-03-20 20:11:48 +00:00
UCollator * coll = ucol_open ( " en_US " , & status ) ;
2001-03-15 02:35:49 +00:00
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_OFF , & status ) ;
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , UCOL_NON_IGNORABLE , & status ) ;
if ( U_SUCCESS ( status ) ) {
size = sizeof ( nonignorable ) / sizeof ( nonignorable [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
u_uastrcpy ( t1 , nonignorable [ i ] ) ;
u_uastrcpy ( t2 , nonignorable [ j ] ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
}
}
2001-05-17 23:09:35 +00:00
}
2001-03-15 02:35:49 +00:00
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , UCOL_SHIFTED , & status ) ;
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_QUATERNARY , & status ) ;
if ( U_SUCCESS ( status ) ) {
size = sizeof ( shifted ) / sizeof ( shifted [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
u_uastrcpy ( t1 , shifted [ i ] ) ;
u_uastrcpy ( t2 , shifted [ j ] ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
}
}
2001-05-17 23:09:35 +00:00
}
2001-03-15 02:35:49 +00:00
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_TERTIARY , & status ) ;
if ( U_SUCCESS ( status ) ) {
size = sizeof ( shifted ) / sizeof ( shifted [ 0 ] ) ;
for ( i = 1 ; i < size ; i + + ) {
u_uastrcpy ( t1 , shifted [ i - 1 ] ) ;
u_uastrcpy ( t2 , shifted [ i ] ) ;
doTest ( coll , t1 , t2 , shiftedTert [ i ] ) ;
}
2001-05-17 23:09:35 +00:00
}
2001-03-15 02:35:49 +00:00
ucol_close ( coll ) ;
}
2001-03-16 19:03:53 +00:00
const static UChar testSourceCases [ ] [ MAX_TOKEN_LEN ] = {
{ 0x0041 /*'A'*/ , 0x0300 , 0x0301 , 0x0000 } ,
{ 0x0041 /*'A'*/ , 0x0300 , 0x0316 , 0x0000 } ,
{ 0x0041 /*'A'*/ , 0x0300 , 0x0000 } ,
{ 0x00C0 , 0x0301 , 0x0000 } ,
/* this would work with forced normalization */
{ 0x00C0 , 0x0316 , 0x0000 }
} ;
const static UChar testTargetCases [ ] [ MAX_TOKEN_LEN ] = {
{ 0x0041 /*'A'*/ , 0x0301 , 0x0300 , 0x0000 } ,
{ 0x0041 /*'A'*/ , 0x0316 , 0x0300 , 0x0000 } ,
{ 0x00C0 , 0 } ,
{ 0x0041 /*'A'*/ , 0x0301 , 0x0300 , 0x0000 } ,
/* this would work with forced normalization */
{ 0x0041 /*'A'*/ , 0x0316 , 0x0300 , 0x0000 }
} ;
const static UCollationResult results [ ] = {
UCOL_GREATER ,
UCOL_EQUAL ,
UCOL_EQUAL ,
UCOL_GREATER ,
UCOL_EQUAL
} ;
2002-03-28 18:26:25 +00:00
static void FunkyATest ( void )
2001-03-16 19:03:53 +00:00
{
2001-05-17 23:09:35 +00:00
2001-03-16 19:03:53 +00:00
int32_t i ;
UErrorCode status = U_ZERO_ERROR ;
2001-05-04 00:02:24 +00:00
UCollator * myCollation ;
2001-03-20 20:11:48 +00:00
myCollation = ucol_open ( " en_US " , & status ) ;
2001-03-16 19:03:53 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
2001-03-30 03:49:29 +00:00
return ;
2001-03-16 19:03:53 +00:00
}
log_verbose ( " Testing some A letters, for some reason \n " ) ;
ucol_setAttribute ( myCollation , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
for ( i = 0 ; i < 4 ; i + + )
{
doTest ( myCollation , testSourceCases [ i ] , testTargetCases [ i ] , results [ i ] ) ;
}
ucol_close ( myCollation ) ;
}
2001-03-20 00:56:37 +00:00
UColAttributeValue caseFirst [ ] = {
UCOL_OFF ,
UCOL_LOWER_FIRST ,
UCOL_UPPER_FIRST
} ;
2001-03-17 00:59:44 +00:00
UColAttributeValue alternateHandling [ ] = {
2001-03-20 00:56:37 +00:00
UCOL_NON_IGNORABLE ,
2001-03-17 00:59:44 +00:00
UCOL_SHIFTED
} ;
UColAttributeValue caseLevel [ ] = {
2001-03-20 00:56:37 +00:00
UCOL_OFF ,
2001-03-17 00:59:44 +00:00
UCOL_ON
} ;
UColAttributeValue strengths [ ] = {
2001-03-20 00:56:37 +00:00
UCOL_PRIMARY ,
2001-03-17 00:59:44 +00:00
UCOL_SECONDARY ,
UCOL_TERTIARY ,
UCOL_QUATERNARY ,
UCOL_IDENTICAL
} ;
2003-05-12 16:45:40 +00:00
#if 0
static const char * strengthsC [ ] = {
2001-11-14 06:55:15 +00:00
" UCOL_PRIMARY " ,
" UCOL_SECONDARY " ,
" UCOL_TERTIARY " ,
" UCOL_QUATERNARY " ,
" UCOL_IDENTICAL "
} ;
2001-03-22 19:33:43 +00:00
static const char * caseFirstC [ ] = {
2001-03-20 00:56:37 +00:00
" UCOL_OFF " ,
" UCOL_LOWER_FIRST " ,
" UCOL_UPPER_FIRST "
} ;
2001-03-22 19:33:43 +00:00
static const char * alternateHandlingC [ ] = {
2001-03-20 00:56:37 +00:00
" UCOL_NON_IGNORABLE " ,
" UCOL_SHIFTED "
} ;
2001-03-22 19:33:43 +00:00
static const char * caseLevelC [ ] = {
2001-03-20 00:56:37 +00:00
" UCOL_OFF " ,
" UCOL_ON "
} ;
2001-06-06 20:48:57 +00:00
/* not used currently - does not test only prints */
2002-03-28 18:26:25 +00:00
static void PrintMarkDavis ( void )
2001-03-17 00:59:44 +00:00
{
UErrorCode status = U_ZERO_ERROR ;
UChar m [ 256 ] ;
uint8_t sortkey [ 256 ] ;
2001-03-20 20:11:48 +00:00
UCollator * coll = ucol_open ( " en_US " , & status ) ;
2001-03-20 00:56:37 +00:00
uint32_t h , i , j , k , sortkeysize ;
2001-03-17 00:59:44 +00:00
uint32_t sizem = 0 ;
2001-03-20 00:56:37 +00:00
char buffer [ 512 ] ;
uint32_t len = 512 ;
2001-03-17 00:59:44 +00:00
2001-03-22 18:12:36 +00:00
log_verbose ( " PrintMarkDavis " ) ;
2001-03-17 00:59:44 +00:00
u_uastrcpy ( m , " Mark Davis " ) ;
sizem = u_strlen ( m ) ;
m [ 1 ] = 0xe4 ;
for ( i = 0 ; i < sizem ; i + + ) {
fprintf ( stderr , " \\ u%04X " , m [ i ] ) ;
}
fprintf ( stderr , " \n " ) ;
2001-03-20 00:56:37 +00:00
for ( h = 0 ; h < sizeof ( caseFirst ) / sizeof ( caseFirst [ 0 ] ) ; h + + ) {
ucol_setAttribute ( coll , UCOL_CASE_FIRST , caseFirst [ i ] , & status ) ;
fprintf ( stderr , " caseFirst: %s \n " , caseFirstC [ h ] ) ;
for ( i = 0 ; i < sizeof ( alternateHandling ) / sizeof ( alternateHandling [ 0 ] ) ; i + + ) {
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , alternateHandling [ i ] , & status ) ;
fprintf ( stderr , " AltHandling: %s \n " , alternateHandlingC [ i ] ) ;
for ( j = 0 ; j < sizeof ( caseLevel ) / sizeof ( caseLevel [ 0 ] ) ; j + + ) {
ucol_setAttribute ( coll , UCOL_CASE_LEVEL , caseLevel [ j ] , & status ) ;
fprintf ( stderr , " caseLevel: %s \n " , caseLevelC [ j ] ) ;
for ( k = 0 ; k < sizeof ( strengths ) / sizeof ( strengths [ 0 ] ) ; k + + ) {
ucol_setAttribute ( coll , UCOL_STRENGTH , strengths [ k ] , & status ) ;
sortkeysize = ucol_getSortKey ( coll , m , sizem , sortkey , 256 ) ;
fprintf ( stderr , " strength: %s \n Sortkey: " , strengthsC [ k ] ) ;
fprintf ( stderr , " %s \n " , ucol_sortKeyToString ( coll , sortkey , buffer , & len ) ) ;
2001-03-17 00:59:44 +00:00
}
2001-03-20 00:56:37 +00:00
2001-03-17 00:59:44 +00:00
}
2001-03-20 00:56:37 +00:00
2001-03-17 00:59:44 +00:00
}
2001-03-20 00:56:37 +00:00
2001-03-17 00:59:44 +00:00
}
}
2001-06-06 20:48:57 +00:00
# endif
2001-03-17 00:59:44 +00:00
2002-03-28 18:26:25 +00:00
static void BillFairmanTest ( void ) {
2001-03-21 00:24:48 +00:00
/*
* * check for actual locale via ICU resource bundles
* *
* * lp points to the original locale ( " fr_FR_.... " )
*/
2001-03-27 00:40:24 +00:00
UResourceBundle * lr , * cr ;
UErrorCode lec = U_ZERO_ERROR ;
const char * lp = " fr_FR_you_ll_never_find_this_locale " ;
log_verbose ( " BillFairmanTest \n " ) ;
lr = ures_open ( NULL , lp , & lec ) ;
if ( lr ) {
2003-11-11 21:41:47 +00:00
cr = ures_getByKey ( lr , " collations " , 0 , & lec ) ;
2001-03-27 00:40:24 +00:00
if ( cr ) {
2009-04-23 00:23:57 +00:00
lp = ures_getLocaleByType ( cr , ULOC_ACTUAL_LOCALE , & lec ) ;
2001-03-27 00:40:24 +00:00
if ( lp ) {
if ( U_SUCCESS ( lec ) ) {
if ( strcmp ( lp , " fr " ) ! = 0 ) {
log_err ( " Wrong locale for French Collation Data, expected \" fr \" got %s " , lp ) ;
}
}
}
ures_close ( cr ) ;
2001-03-21 22:05:42 +00:00
}
2001-03-27 00:40:24 +00:00
ures_close ( lr ) ;
2001-03-21 22:05:42 +00:00
}
2001-03-20 00:56:37 +00:00
}
2001-03-22 19:33:43 +00:00
static void testPrimary ( UCollator * col , const UChar * p , const UChar * q ) {
2001-03-20 00:56:37 +00:00
UChar source [ 256 ] = { ' \0 ' } ;
UChar target [ 256 ] = { ' \0 ' } ;
2001-04-05 22:48:23 +00:00
UChar preP = 0x31a3 ;
UChar preQ = 0x310d ;
/*
UChar preP = ( * p > 0x0400 & & * p < 0x0500 ) ? 0x00e1 : 0x491 ;
UChar preQ = ( * p > 0x0400 & & * p < 0x0500 ) ? 0x0041 : 0x413 ;
*/
2001-04-04 06:42:13 +00:00
/*log_verbose("Testing primary\n");*/
2001-04-02 23:12:19 +00:00
2001-03-20 07:22:33 +00:00
doTest ( col , p , q , UCOL_LESS ) ;
/*
2001-03-20 00:56:37 +00:00
UCollationResult result = ucol_strcoll ( col , p , u_strlen ( p ) , q , u_strlen ( q ) ) ;
if ( result ! = UCOL_LESS ) {
aescstrdup ( p , utfSource , 256 ) ;
aescstrdup ( q , utfTarget , 256 ) ;
fprintf ( file , " Primary failed source: %s target: %s \n " , utfSource , utfTarget ) ;
}
2001-03-20 07:22:33 +00:00
*/
2001-04-05 22:48:23 +00:00
source [ 0 ] = preP ;
2001-03-21 01:04:09 +00:00
u_strcpy ( source + 1 , p ) ;
2001-04-05 22:48:23 +00:00
target [ 0 ] = preQ ;
2001-03-21 01:04:09 +00:00
u_strcpy ( target + 1 , q ) ;
2001-03-20 07:22:33 +00:00
doTest ( col , source , target , UCOL_LESS ) ;
/*
2001-03-21 22:05:42 +00:00
fprintf ( file , " Primary swamps 2nd failed source: %s target: %s \n " , utfSource , utfTarget ) ;
2001-03-20 07:22:33 +00:00
*/
2001-03-20 00:56:37 +00:00
}
2001-05-17 23:09:35 +00:00
2001-03-22 19:33:43 +00:00
static void testSecondary ( UCollator * col , const UChar * p , const UChar * q ) {
2001-03-20 00:56:37 +00:00
UChar source [ 256 ] = { ' \0 ' } ;
UChar target [ 256 ] = { ' \0 ' } ;
2001-04-04 06:42:13 +00:00
/*log_verbose("Testing secondary\n");*/
2001-04-02 23:12:19 +00:00
2001-03-20 07:22:33 +00:00
doTest ( col , p , q , UCOL_LESS ) ;
/*
2001-03-21 22:05:42 +00:00
fprintf ( file , " secondary failed source: %s target: %s \n " , utfSource , utfTarget ) ;
2001-03-20 07:22:33 +00:00
*/
2001-03-21 00:24:48 +00:00
source [ 0 ] = 0x0053 ;
2001-03-21 01:04:09 +00:00
u_strcpy ( source + 1 , p ) ;
2001-03-21 00:24:48 +00:00
target [ 0 ] = 0x0073 ;
2001-03-21 01:04:09 +00:00
u_strcpy ( target + 1 , q ) ;
2001-03-20 07:22:33 +00:00
doTest ( col , source , target , UCOL_LESS ) ;
/*
2001-03-21 22:05:42 +00:00
fprintf ( file , " secondary swamps 3rd failed source: %s target: %s \n " , utfSource , utfTarget ) ;
2001-03-20 07:22:33 +00:00
*/
2001-03-21 06:03:56 +00:00
u_strcpy ( source , p ) ;
2001-03-21 01:04:09 +00:00
source [ u_strlen ( p ) ] = 0x62 ;
2001-03-21 06:03:56 +00:00
source [ u_strlen ( p ) + 1 ] = 0 ;
2001-03-21 01:04:09 +00:00
2001-03-21 06:03:56 +00:00
u_strcpy ( target , q ) ;
2001-03-21 01:04:09 +00:00
target [ u_strlen ( q ) ] = 0x61 ;
2001-03-21 06:03:56 +00:00
target [ u_strlen ( q ) + 1 ] = 0 ;
2001-03-21 01:04:09 +00:00
2001-03-20 07:22:33 +00:00
doTest ( col , source , target , UCOL_GREATER ) ;
2001-03-21 06:03:56 +00:00
2001-03-20 07:22:33 +00:00
/*
2001-03-21 22:05:42 +00:00
fprintf ( file , " secondary is swamped by 1 failed source: %s target: %s \n " , utfSource , utfTarget ) ;
2001-03-20 07:22:33 +00:00
*/
2001-03-20 00:56:37 +00:00
}
2001-03-22 19:33:43 +00:00
static void testTertiary ( UCollator * col , const UChar * p , const UChar * q ) {
2001-03-20 00:56:37 +00:00
UChar source [ 256 ] = { ' \0 ' } ;
UChar target [ 256 ] = { ' \0 ' } ;
2001-03-21 22:05:42 +00:00
2001-04-04 06:42:13 +00:00
/*log_verbose("Testing tertiary\n");*/
2001-04-02 23:12:19 +00:00
2001-03-20 07:22:33 +00:00
doTest ( col , p , q , UCOL_LESS ) ;
/*
2001-03-21 22:05:42 +00:00
fprintf ( file , " Tertiary failed source: %s target: %s \n " , utfSource , utfTarget ) ;
2001-03-20 07:22:33 +00:00
*/
2001-03-20 00:56:37 +00:00
source [ 0 ] = 0x0020 ;
2001-03-21 01:04:09 +00:00
u_strcpy ( source + 1 , p ) ;
2001-03-20 00:56:37 +00:00
target [ 0 ] = 0x002D ;
2001-03-21 01:04:09 +00:00
u_strcpy ( target + 1 , q ) ;
2001-03-20 07:22:33 +00:00
doTest ( col , source , target , UCOL_LESS ) ;
/*
2001-03-21 22:05:42 +00:00
fprintf ( file , " Tertiary swamps 4th failed source: %s target: %s \n " , utfSource , utfTarget ) ;
2001-03-20 07:22:33 +00:00
*/
2001-03-21 06:03:56 +00:00
u_strcpy ( source , p ) ;
2001-03-21 01:04:09 +00:00
source [ u_strlen ( p ) ] = 0xE0 ;
2001-03-21 06:03:56 +00:00
source [ u_strlen ( p ) + 1 ] = 0 ;
2001-03-21 01:04:09 +00:00
2001-03-21 06:03:56 +00:00
u_strcpy ( target , q ) ;
2001-03-21 01:04:09 +00:00
target [ u_strlen ( q ) ] = 0x61 ;
2001-03-21 06:03:56 +00:00
target [ u_strlen ( q ) + 1 ] = 0 ;
2001-03-21 01:04:09 +00:00
2001-03-20 07:22:33 +00:00
doTest ( col , source , target , UCOL_GREATER ) ;
2001-03-21 06:03:56 +00:00
2001-03-20 07:22:33 +00:00
/*
2001-03-21 22:05:42 +00:00
fprintf ( file , " Tertiary is swamped by 3rd failed source: %s target: %s \n " , utfSource , utfTarget ) ;
2001-03-20 07:22:33 +00:00
*/
2001-03-20 00:56:37 +00:00
}
2001-03-22 19:33:43 +00:00
static void testEquality ( UCollator * col , const UChar * p , const UChar * q ) {
/*
2001-03-20 00:56:37 +00:00
UChar source [ 256 ] = { ' \0 ' } ;
UChar target [ 256 ] = { ' \0 ' } ;
2001-03-22 19:33:43 +00:00
*/
2001-03-21 22:05:42 +00:00
2001-03-20 07:22:33 +00:00
doTest ( col , p , q , UCOL_EQUAL ) ;
/*
2001-03-21 22:05:42 +00:00
fprintf ( file , " Primary failed source: %s target: %s \n " , utfSource , utfTarget ) ;
2001-03-20 07:22:33 +00:00
*/
2001-03-20 00:56:37 +00:00
}
2001-03-22 19:33:43 +00:00
static void testCollator ( UCollator * coll , UErrorCode * status ) {
2001-03-20 07:22:33 +00:00
const UChar * rules = NULL , * current = NULL ;
2001-03-22 19:33:43 +00:00
int32_t ruleLen = 0 ;
2001-03-20 07:22:33 +00:00
uint32_t strength = 0 ;
uint32_t chOffset = 0 ; uint32_t chLen = 0 ;
uint32_t exOffset = 0 ; uint32_t exLen = 0 ;
2001-09-27 23:19:12 +00:00
uint32_t prefixOffset = 0 ; uint32_t prefixLen = 0 ;
2001-04-30 19:11:32 +00:00
uint32_t firstEx = 0 ;
2001-03-22 19:33:43 +00:00
/* uint32_t rExpsLen = 0; */
2001-03-21 22:05:42 +00:00
uint32_t firstLen = 0 ;
2001-03-20 07:22:33 +00:00
UBool varT = FALSE ; UBool top_ = TRUE ;
2002-03-15 23:51:22 +00:00
uint16_t specs = 0 ;
2001-03-20 07:22:33 +00:00
UBool startOfRules = TRUE ;
2001-04-30 19:11:32 +00:00
UBool lastReset = FALSE ;
2001-10-31 23:59:35 +00:00
UBool before = FALSE ;
2003-12-17 06:49:32 +00:00
uint32_t beforeStrength = 0 ;
2001-03-20 07:22:33 +00:00
UColTokenParser src ;
2001-03-30 00:23:46 +00:00
UColOptionSet opts ;
2001-03-20 07:22:33 +00:00
UChar first [ 256 ] ;
UChar second [ 256 ] ;
2001-10-31 23:59:35 +00:00
UChar tempB [ 256 ] ;
uint32_t tempLen ;
2001-03-20 07:22:33 +00:00
UChar * rulesCopy = NULL ;
2001-08-16 00:58:53 +00:00
UParseError parseError ;
2008-09-15 20:55:02 +00:00
2010-06-14 21:27:44 +00:00
uprv_memset ( & src , 0 , sizeof ( UColTokenParser ) ) ;
2008-09-15 20:55:02 +00:00
src . opts = & opts ;
2001-03-20 07:22:33 +00:00
2001-03-21 22:05:42 +00:00
rules = ucol_getRules ( coll , & ruleLen ) ;
if ( U_SUCCESS ( * status ) & & ruleLen > 0 ) {
2010-11-02 08:27:22 +00:00
rulesCopy = ( UChar * ) uprv_malloc ( ( ruleLen + UCOL_TOK_EXTRA_RULE_SPACE_SIZE ) * sizeof ( UChar ) ) ;
2001-03-21 22:05:42 +00:00
uprv_memcpy ( rulesCopy , rules , ruleLen * sizeof ( UChar ) ) ;
2002-10-17 23:12:43 +00:00
src . current = src . source = rulesCopy ;
2001-03-21 22:05:42 +00:00
src . end = rulesCopy + ruleLen ;
src . extraCurrent = src . end ;
src . extraEnd = src . end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE ;
* first = * second = 0 ;
2001-09-28 16:34:05 +00:00
2010-10-28 17:28:06 +00:00
/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
the rules copy in src . source to get reallocated , freeing the original pointer in rulesCopy */
2002-03-07 19:06:00 +00:00
while ( ( current = ucol_tok_parseNextToken ( & src , startOfRules , & parseError , status ) ) ! = NULL ) {
strength = src . parsedToken . strength ;
chOffset = src . parsedToken . charsOffset ;
chLen = src . parsedToken . charsLen ;
exOffset = src . parsedToken . extensionOffset ;
exLen = src . parsedToken . extensionLen ;
prefixOffset = src . parsedToken . prefixOffset ;
prefixLen = src . parsedToken . prefixLen ;
specs = src . parsedToken . flags ;
2001-03-21 22:05:42 +00:00
startOfRules = FALSE ;
2001-05-18 20:53:01 +00:00
varT = ( UBool ) ( ( specs & UCOL_TOK_VARIABLE_TOP ) ! = 0 ) ;
top_ = ( UBool ) ( ( specs & UCOL_TOK_TOP ) ! = 0 ) ;
2003-08-06 17:29:38 +00:00
if ( top_ ) { /* if reset is on top, the sequence is broken. We should have an empty string */
second [ 0 ] = 0 ;
} else {
2010-10-28 17:28:06 +00:00
u_strncpy ( second , src . source + chOffset , chLen ) ;
2003-08-06 17:29:38 +00:00
second [ chLen ] = 0 ;
2001-03-21 22:05:42 +00:00
2003-08-06 17:29:38 +00:00
if ( exLen > 0 & & firstEx = = 0 ) {
2010-10-28 17:28:06 +00:00
u_strncat ( first , src . source + exOffset , exLen ) ;
2003-08-06 17:29:38 +00:00
first [ firstLen + exLen ] = 0 ;
}
2001-03-21 22:05:42 +00:00
2003-08-06 17:29:38 +00:00
if ( lastReset = = TRUE & & prefixLen ! = 0 ) {
u_strncpy ( first + prefixLen , first , firstLen ) ;
2010-10-28 17:28:06 +00:00
u_strncpy ( first , src . source + prefixOffset , prefixLen ) ;
2003-08-06 17:29:38 +00:00
first [ firstLen + prefixLen ] = 0 ;
firstLen = firstLen + prefixLen ;
}
2001-10-31 23:59:35 +00:00
2003-08-06 17:29:38 +00:00
if ( before = = TRUE ) { /* swap first and second */
u_strcpy ( tempB , first ) ;
u_strcpy ( first , second ) ;
u_strcpy ( second , tempB ) ;
2001-10-31 23:59:35 +00:00
2003-08-06 17:29:38 +00:00
tempLen = firstLen ;
firstLen = chLen ;
chLen = tempLen ;
2001-10-31 23:59:35 +00:00
2003-08-06 17:29:38 +00:00
tempLen = firstEx ;
firstEx = exLen ;
exLen = tempLen ;
2003-12-17 06:49:32 +00:00
if ( beforeStrength < strength ) {
strength = beforeStrength ;
}
2003-08-06 17:29:38 +00:00
}
2001-10-31 23:59:35 +00:00
}
2001-04-30 19:11:32 +00:00
lastReset = FALSE ;
2001-03-21 22:05:42 +00:00
switch ( strength ) {
case UCOL_IDENTICAL :
testEquality ( coll , first , second ) ;
break ;
case UCOL_PRIMARY :
testPrimary ( coll , first , second ) ;
break ;
case UCOL_SECONDARY :
2001-03-22 19:33:43 +00:00
testSecondary ( coll , first , second ) ;
break ;
2001-03-21 22:05:42 +00:00
case UCOL_TERTIARY :
2001-03-22 19:33:43 +00:00
testTertiary ( coll , first , second ) ;
break ;
2001-03-21 22:05:42 +00:00
case UCOL_TOK_RESET :
2001-10-31 23:59:35 +00:00
lastReset = TRUE ;
before = ( UBool ) ( ( specs & UCOL_TOK_BEFORE ) ! = 0 ) ;
2003-12-17 06:49:32 +00:00
if ( before ) {
beforeStrength = ( specs & UCOL_TOK_BEFORE ) - 1 ;
}
2001-10-31 23:59:35 +00:00
break ;
2001-03-21 22:05:42 +00:00
default :
break ;
}
2001-10-31 23:59:35 +00:00
if ( before = = TRUE & & strength ! = UCOL_TOK_RESET ) { /* first and second were swapped */
2004-11-11 23:34:58 +00:00
before = FALSE ;
2001-10-31 23:59:35 +00:00
} else {
firstLen = chLen ;
firstEx = exLen ;
u_strcpy ( first , second ) ;
}
2001-03-21 22:05:42 +00:00
}
2010-11-02 08:27:22 +00:00
uprv_free ( src . source ) ;
2001-03-21 22:05:42 +00:00
}
}
2007-07-19 06:01:57 +00:00
static UCollationResult ucaTest ( void * collator , const int object , const UChar * source , const int sLen , const UChar * target , const int tLen ) {
2001-04-13 00:05:42 +00:00
UCollator * UCA = ( UCollator * ) collator ;
return ucol_strcoll ( UCA , source , sLen , target , tLen ) ;
}
2001-05-04 00:02:24 +00:00
/*
2007-07-19 06:01:57 +00:00
static UCollationResult winTest ( void * collator , const int object , const UChar * source , const int sLen , const UChar * target , const int tLen ) {
2005-02-17 00:19:44 +00:00
# ifdef U_WINDOWS
2001-04-13 00:05:42 +00:00
LCID lcid = ( LCID ) collator ;
2007-07-19 06:01:57 +00:00
return ( UCollationResult ) CompareString ( lcid , 0 , source , sLen , target , tLen ) ;
2001-04-13 00:05:42 +00:00
# else
return 0 ;
# endif
}
2001-05-04 00:02:24 +00:00
*/
2001-04-13 00:05:42 +00:00
2001-05-17 23:09:35 +00:00
static UCollationResult swampEarlier ( tst_strcoll * func , void * collator , int opts ,
UChar s1 , UChar s2 ,
const UChar * s , const uint32_t sLen ,
2001-04-13 00:05:42 +00:00
const UChar * t , const uint32_t tLen ) {
UChar source [ 256 ] = { 0 } ;
UChar target [ 256 ] = { 0 } ;
2001-05-17 23:09:35 +00:00
2001-04-13 00:05:42 +00:00
source [ 0 ] = s1 ;
u_strcpy ( source + 1 , s ) ;
target [ 0 ] = s2 ;
u_strcpy ( target + 1 , t ) ;
return func ( collator , opts , source , sLen + 1 , target , tLen + 1 ) ;
}
2001-05-17 23:09:35 +00:00
static UCollationResult swampLater ( tst_strcoll * func , void * collator , int opts ,
UChar s1 , UChar s2 ,
const UChar * s , const uint32_t sLen ,
2001-04-13 00:05:42 +00:00
const UChar * t , const uint32_t tLen ) {
UChar source [ 256 ] = { 0 } ;
UChar target [ 256 ] = { 0 } ;
2001-05-17 23:09:35 +00:00
2001-04-13 00:05:42 +00:00
u_strcpy ( source , s ) ;
source [ sLen ] = s1 ;
u_strcpy ( target , t ) ;
target [ tLen ] = s2 ;
return func ( collator , opts , source , sLen + 1 , target , tLen + 1 ) ;
}
2001-05-17 23:09:35 +00:00
static uint32_t probeStrength ( tst_strcoll * func , void * collator , int opts ,
const UChar * s , const uint32_t sLen ,
2001-04-13 00:05:42 +00:00
const UChar * t , const uint32_t tLen ,
UCollationResult result ) {
2001-06-06 20:48:57 +00:00
/*UChar fPrimary = 0x6d;*/
/*UChar sPrimary = 0x6e;*/
2001-04-13 00:05:42 +00:00
UChar fSecondary = 0x310d ;
UChar sSecondary = 0x31a3 ;
UChar fTertiary = 0x310f ;
UChar sTertiary = 0x31b7 ;
UCollationResult oposite ;
if ( result = = UCOL_EQUAL ) {
return UCOL_IDENTICAL ;
} else if ( result = = UCOL_GREATER ) {
oposite = UCOL_LESS ;
} else {
oposite = UCOL_GREATER ;
}
2001-04-04 23:44:22 +00:00
2001-04-13 00:05:42 +00:00
if ( swampEarlier ( func , collator , opts , sSecondary , fSecondary , s , sLen , t , tLen ) = = result ) {
return UCOL_PRIMARY ;
2001-05-17 23:09:35 +00:00
} else if ( ( swampEarlier ( func , collator , opts , sTertiary , 0x310f , s , sLen , t , tLen ) = = result ) & &
2001-04-13 00:05:42 +00:00
( swampEarlier ( func , collator , opts , 0x310f , sTertiary , s , sLen , t , tLen ) = = result ) ) {
return UCOL_SECONDARY ;
2001-05-17 23:09:35 +00:00
} else if ( ( swampLater ( func , collator , opts , sTertiary , fTertiary , s , sLen , t , tLen ) = = result ) & &
2001-04-13 00:05:42 +00:00
( swampLater ( func , collator , opts , fTertiary , sTertiary , s , sLen , t , tLen ) = = result ) ) {
return UCOL_TERTIARY ;
2001-05-17 23:09:35 +00:00
} else if ( ( swampLater ( func , collator , opts , sTertiary , 0x310f , s , sLen , t , tLen ) = = oposite ) & &
2001-04-13 00:05:42 +00:00
( swampLater ( func , collator , opts , fTertiary , sTertiary , s , sLen , t , tLen ) = = oposite ) ) {
return UCOL_QUATERNARY ;
} else {
return UCOL_IDENTICAL ;
}
}
2001-04-04 23:44:22 +00:00
2001-04-13 00:05:42 +00:00
static char * getRelationSymbol ( UCollationResult res , uint32_t strength , char * buffer ) {
uint32_t i = 0 ;
2001-04-04 23:44:22 +00:00
2001-04-13 00:05:42 +00:00
if ( res = = UCOL_EQUAL | | strength = = 0xdeadbeef ) {
buffer [ 0 ] = ' = ' ;
buffer [ 1 ] = ' = ' ;
buffer [ 2 ] = ' \0 ' ;
} else if ( res = = UCOL_GREATER ) {
for ( i = 0 ; i < strength + 1 ; i + + ) {
buffer [ i ] = ' > ' ;
}
buffer [ strength + 1 ] = ' \0 ' ;
2001-04-04 23:44:22 +00:00
} else {
2001-04-13 00:05:42 +00:00
for ( i = 0 ; i < strength + 1 ; i + + ) {
buffer [ i ] = ' < ' ;
}
buffer [ strength + 1 ] = ' \0 ' ;
2001-04-04 23:44:22 +00:00
}
2001-04-13 00:05:42 +00:00
return buffer ;
2001-04-04 23:44:22 +00:00
}
2001-04-13 00:05:42 +00:00
2001-05-17 23:09:35 +00:00
static void logFailure ( const char * platform , const char * test ,
2001-04-13 00:05:42 +00:00
const UChar * source , const uint32_t sLen ,
const UChar * target , const uint32_t tLen ,
UCollationResult realRes , uint32_t realStrength ,
2001-05-29 22:53:07 +00:00
UCollationResult expRes , uint32_t expStrength , UBool error ) {
2001-04-04 23:44:22 +00:00
uint32_t i = 0 ;
2003-12-29 03:43:38 +00:00
char sEsc [ 256 ] , s [ 256 ] , tEsc [ 256 ] , t [ 256 ] , b [ 256 ] , output [ 512 ] , relation [ 256 ] ;
2004-12-08 23:02:08 +00:00
static int32_t maxOutputLength = 0 ;
int32_t outputLength ;
2001-04-04 23:44:22 +00:00
2001-04-13 00:05:42 +00:00
* sEsc = * tEsc = * s = * t = 0 ;
2001-05-29 22:53:07 +00:00
if ( error = = TRUE ) {
log_err ( " Difference between expected and generated order. Run test with -v for more info \n " ) ;
2010-04-07 16:18:38 +00:00
} else if ( getTestOption ( VERBOSITY_OPTION ) = = 0 ) {
2003-12-29 03:43:38 +00:00
return ;
2001-05-29 22:53:07 +00:00
}
2001-04-04 23:44:22 +00:00
for ( i = 0 ; i < sLen ; i + + ) {
2001-04-13 00:05:42 +00:00
sprintf ( b , " %04X " , source [ i ] ) ;
strcat ( sEsc , " \\ u " ) ;
2001-04-04 23:44:22 +00:00
strcat ( sEsc , b ) ;
2001-04-13 00:05:42 +00:00
strcat ( s , b ) ;
strcat ( s , " " ) ;
2001-05-24 22:37:38 +00:00
if ( source [ i ] < 0x80 ) {
sprintf ( b , " (%c) " , source [ i ] ) ;
strcat ( sEsc , b ) ;
}
2001-04-04 23:44:22 +00:00
}
for ( i = 0 ; i < tLen ; i + + ) {
2001-04-13 00:05:42 +00:00
sprintf ( b , " %04X " , target [ i ] ) ;
strcat ( tEsc , " \\ u " ) ;
2001-04-04 23:44:22 +00:00
strcat ( tEsc , b ) ;
2001-04-13 00:05:42 +00:00
strcat ( t , b ) ;
strcat ( t , " " ) ;
2001-05-24 22:37:38 +00:00
if ( target [ i ] < 0x80 ) {
sprintf ( b , " (%c) " , target [ i ] ) ;
strcat ( tEsc , b ) ;
}
2001-04-04 23:44:22 +00:00
}
2001-04-13 00:05:42 +00:00
/*
strcpy ( output , " [[ " ) ;
strcat ( output , sEsc ) ;
strcat ( output , getRelationSymbol ( expRes , expStrength , relation ) ) ;
strcat ( output , tEsc ) ;
strcat ( output , " : " ) ;
strcat ( output , sEsc ) ;
strcat ( output , getRelationSymbol ( realRes , realStrength , relation ) ) ;
strcat ( output , tEsc ) ;
strcat ( output , " ]] " ) ;
2001-04-04 23:44:22 +00:00
2001-04-13 00:05:42 +00:00
log_verbose ( " %s " , output ) ;
*/
strcpy ( output , " DIFF: " ) ;
2001-04-30 19:11:32 +00:00
strcat ( output , s ) ;
strcat ( output , " : " ) ;
strcat ( output , t ) ;
2001-04-13 00:05:42 +00:00
strcat ( output , test ) ;
strcat ( output , " : " ) ;
strcat ( output , sEsc ) ;
strcat ( output , getRelationSymbol ( expRes , expStrength , relation ) ) ;
strcat ( output , tEsc ) ;
strcat ( output , " " ) ;
strcat ( output , platform ) ;
strcat ( output , " : " ) ;
strcat ( output , sEsc ) ;
strcat ( output , getRelationSymbol ( realRes , realStrength , relation ) ) ;
strcat ( output , tEsc ) ;
2004-12-08 23:02:08 +00:00
outputLength = ( int32_t ) strlen ( output ) ;
2003-12-29 03:43:38 +00:00
if ( outputLength > maxOutputLength ) {
maxOutputLength = outputLength ;
U_ASSERT ( outputLength < sizeof ( output ) ) ;
}
2001-04-30 19:11:32 +00:00
log_verbose ( " %s \n " , output ) ;
2001-04-13 00:05:42 +00:00
2001-04-30 19:11:32 +00:00
}
2001-04-13 00:05:42 +00:00
2001-05-30 16:09:09 +00:00
/*
2001-05-04 00:02:24 +00:00
static void printOutRules ( const UChar * rules ) {
2001-04-30 19:11:32 +00:00
uint32_t len = u_strlen ( rules ) ;
uint32_t i = 0 ;
char toPrint ;
uint32_t line = 0 ;
fprintf ( stdout , " Rules: " ) ;
for ( i = 0 ; i < len ; i + + ) {
if ( rules [ i ] < 0x7f & & rules [ i ] > = 0x20 ) {
toPrint = ( char ) rules [ i ] ;
if ( toPrint = = ' & ' ) {
line = 1 ;
fprintf ( stdout , " \n & " ) ;
} else if ( toPrint = = ' ; ' ) {
fprintf ( stdout , " << " ) ;
line + = 2 ;
} else if ( toPrint = = ' , ' ) {
fprintf ( stdout , " <<< " ) ;
line + = 3 ;
} else {
fprintf ( stdout , " %c " , toPrint ) ;
line + + ;
}
} else if ( rules [ i ] < 0x3400 | | rules [ i ] > = 0xa000 ) {
fprintf ( stdout , " \\ u%04X " , rules [ i ] ) ;
line + = 6 ;
}
if ( line > 72 ) {
fprintf ( stdout , " \n " ) ;
line = 0 ;
}
}
log_verbose ( " \n " ) ;
2001-04-13 00:05:42 +00:00
}
2001-05-30 16:09:09 +00:00
*/
2001-04-30 19:11:32 +00:00
2001-05-29 22:53:07 +00:00
static uint32_t testSwitch ( tst_strcoll * func , void * collator , int opts , uint32_t strength , const UChar * first , const UChar * second , const char * msg , UBool error ) {
2001-04-13 00:05:42 +00:00
uint32_t diffs = 0 ;
UCollationResult realResult ;
uint32_t realStrength ;
uint32_t sLen = u_strlen ( first ) ;
uint32_t tLen = u_strlen ( second ) ;
realResult = func ( collator , opts , first , sLen , second , tLen ) ;
realStrength = probeStrength ( func , collator , opts , first , sLen , second , tLen , realResult ) ;
if ( strength = = UCOL_IDENTICAL & & realResult ! = UCOL_IDENTICAL ) {
2001-05-29 22:53:07 +00:00
logFailure ( msg , " tailoring " , first , sLen , second , tLen , realResult , realStrength , UCOL_EQUAL , strength , error ) ;
2001-04-13 00:05:42 +00:00
diffs + + ;
} else if ( realResult ! = UCOL_LESS | | realStrength ! = strength ) {
2001-05-29 22:53:07 +00:00
logFailure ( msg , " tailoring " , first , sLen , second , tLen , realResult , realStrength , UCOL_LESS , strength , error ) ;
2001-04-13 00:05:42 +00:00
diffs + + ;
2001-05-17 23:09:35 +00:00
}
2001-04-13 00:05:42 +00:00
return diffs ;
2001-04-04 23:44:22 +00:00
}
2001-04-13 00:05:42 +00:00
2001-05-30 16:09:09 +00:00
static void testAgainstUCA ( UCollator * coll , UCollator * UCA , const char * refName , UBool error , UErrorCode * status ) {
2001-04-04 06:42:13 +00:00
const UChar * rules = NULL , * current = NULL ;
int32_t ruleLen = 0 ;
uint32_t strength = 0 ;
uint32_t chOffset = 0 ; uint32_t chLen = 0 ;
uint32_t exOffset = 0 ; uint32_t exLen = 0 ;
2001-09-27 23:19:12 +00:00
uint32_t prefixOffset = 0 ; uint32_t prefixLen = 0 ;
2001-04-04 06:42:13 +00:00
/* uint32_t rExpsLen = 0; */
uint32_t firstLen = 0 , secondLen = 0 ;
UBool varT = FALSE ; UBool top_ = TRUE ;
2002-03-15 23:51:22 +00:00
uint16_t specs = 0 ;
2001-04-04 06:42:13 +00:00
UBool startOfRules = TRUE ;
UColTokenParser src ;
UColOptionSet opts ;
UChar first [ 256 ] ;
UChar second [ 256 ] ;
UChar * rulesCopy = NULL ;
uint32_t UCAdiff = 0 ;
2001-04-13 00:05:42 +00:00
uint32_t Windiff = 1 ;
2001-08-16 00:58:53 +00:00
UParseError parseError ;
2001-04-04 06:42:13 +00:00
2010-06-14 21:27:44 +00:00
uprv_memset ( & src , 0 , sizeof ( UColTokenParser ) ) ;
2001-04-04 06:42:13 +00:00
src . opts = & opts ;
rules = ucol_getRules ( coll , & ruleLen ) ;
2001-04-30 19:11:32 +00:00
/*printOutRules(rules);*/
2001-04-04 06:42:13 +00:00
if ( U_SUCCESS ( * status ) & & ruleLen > 0 ) {
2010-11-02 01:20:29 +00:00
rulesCopy = ( UChar * ) uprv_malloc ( ( ruleLen + UCOL_TOK_EXTRA_RULE_SPACE_SIZE ) * sizeof ( UChar ) ) ;
2001-04-04 06:42:13 +00:00
uprv_memcpy ( rulesCopy , rules , ruleLen * sizeof ( UChar ) ) ;
2002-10-17 23:12:43 +00:00
src . current = src . source = rulesCopy ;
2001-04-04 06:42:13 +00:00
src . end = rulesCopy + ruleLen ;
src . extraCurrent = src . end ;
src . extraEnd = src . end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE ;
* first = * second = 0 ;
2010-10-30 00:42:12 +00:00
/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
the rules copy in src . source to get reallocated , freeing the original pointer in rulesCopy */
2002-03-07 19:06:00 +00:00
while ( ( current = ucol_tok_parseNextToken ( & src , startOfRules , & parseError , status ) ) ! = NULL ) {
strength = src . parsedToken . strength ;
chOffset = src . parsedToken . charsOffset ;
chLen = src . parsedToken . charsLen ;
exOffset = src . parsedToken . extensionOffset ;
exLen = src . parsedToken . extensionLen ;
prefixOffset = src . parsedToken . prefixOffset ;
prefixLen = src . parsedToken . prefixLen ;
specs = src . parsedToken . flags ;
2001-04-04 06:42:13 +00:00
startOfRules = FALSE ;
2001-05-18 20:53:01 +00:00
varT = ( UBool ) ( ( specs & UCOL_TOK_VARIABLE_TOP ) ! = 0 ) ;
top_ = ( UBool ) ( ( specs & UCOL_TOK_TOP ) ! = 0 ) ;
2001-04-04 06:42:13 +00:00
2010-10-25 04:29:10 +00:00
u_strncpy ( second , src . source + chOffset , chLen ) ;
2001-04-04 06:42:13 +00:00
second [ chLen ] = 0 ;
secondLen = chLen ;
if ( exLen > 0 ) {
2010-10-25 04:29:10 +00:00
u_strncat ( first , src . source + exOffset , exLen ) ;
2001-04-04 06:42:13 +00:00
first [ firstLen + exLen ] = 0 ;
firstLen + = exLen ;
2001-05-17 23:09:35 +00:00
}
2001-04-04 06:42:13 +00:00
2001-04-13 00:05:42 +00:00
if ( strength ! = UCOL_TOK_RESET ) {
if ( ( * first < 0x3400 | | * first > = 0xa000 ) & & ( * second < 0x3400 | | * second > = 0xa000 ) ) {
2001-05-29 22:53:07 +00:00
UCAdiff + = testSwitch ( & ucaTest , ( void * ) UCA , 0 , strength , first , second , refName , error ) ;
2001-04-13 00:05:42 +00:00
/*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
}
2001-04-04 06:42:13 +00:00
}
2001-04-13 00:05:42 +00:00
2001-04-04 06:42:13 +00:00
firstLen = chLen ;
u_strcpy ( first , second ) ;
}
2001-04-13 00:05:42 +00:00
if ( UCAdiff ! = 0 & & Windiff ! = 0 ) {
log_verbose ( " \n " ) ;
}
2001-04-04 06:42:13 +00:00
if ( UCAdiff = = 0 ) {
2001-05-17 23:16:34 +00:00
log_verbose ( " No immediate difference with %s! \n " , refName ) ;
2001-04-13 00:05:42 +00:00
}
if ( Windiff = = 0 ) {
log_verbose ( " No immediate difference with Win32! \n " ) ;
2001-04-04 06:42:13 +00:00
}
2010-11-02 01:20:29 +00:00
uprv_free ( src . source ) ;
2001-04-04 06:42:13 +00:00
}
}
2004-11-11 23:34:58 +00:00
/*
* Takes two CEs ( lead and continuation ) and
2002-08-03 03:44:40 +00:00
* compares them as CEs should be compared :
* primary vs . primary , secondary vs . secondary
* tertiary vs . tertiary
*/
2002-08-28 15:55:37 +00:00
static int32_t compareCEs ( uint32_t s1 , uint32_t s2 ,
2002-08-03 03:44:40 +00:00
uint32_t t1 , uint32_t t2 ) {
uint32_t s = 0 , t = 0 ;
if ( s1 = = t1 & & s2 = = t2 ) {
return 0 ;
}
2004-11-11 23:34:58 +00:00
s = ( s1 & 0xFFFF0000 ) | ( ( s2 & 0xFFFF0000 ) > > 16 ) ;
t = ( t1 & 0xFFFF0000 ) | ( ( t2 & 0xFFFF0000 ) > > 16 ) ;
2002-08-03 03:44:40 +00:00
if ( s < t ) {
return - 1 ;
} else if ( s > t ) {
return 1 ;
} else {
2002-08-28 15:55:37 +00:00
s = ( s1 & 0x0000FF00 ) | ( s2 & 0x0000FF00 ) > > 8 ;
t = ( t1 & 0x0000FF00 ) | ( t2 & 0x0000FF00 ) > > 8 ;
2002-08-03 03:44:40 +00:00
if ( s < t ) {
return - 1 ;
} else if ( s > t ) {
return 1 ;
} else {
2002-08-28 15:55:37 +00:00
s = ( s1 & 0x000000FF ) < < 8 | ( s2 & 0x000000FF ) ;
t = ( t1 & 0x000000FF ) < < 8 | ( t2 & 0x000000FF ) ;
2002-08-03 03:44:40 +00:00
if ( s < t ) {
return - 1 ;
} else {
return 1 ;
}
}
}
}
2003-08-06 17:29:38 +00:00
typedef struct {
uint32_t startCE ;
uint32_t startContCE ;
uint32_t limitCE ;
uint32_t limitContCE ;
} indirectBoundaries ;
/* these values are used for finding CE values for indirect positioning. */
/* Indirect positioning is a mechanism for allowing resets on symbolic */
/* values. It only works for resets and you cannot tailor indirect names */
/* An indirect name can define either an anchor point or a range. An */
/* anchor point behaves in exactly the same way as a code point in reset */
/* would, except that it cannot be tailored. A range (we currently only */
/* know for the [top] range will explicitly set the upper bound for */
/* generated CEs, thus allowing for better control over how many CEs can */
/* be squeezed between in the range without performance penalty. */
/* In that respect, we use [top] for tailoring of locales that use CJK */
/* characters. Other indirect values are currently a pure convenience, */
/* they can be used to assure that the CEs will be always positioned in */
/* the same place relative to a point with known properties (e.g. first */
/* primary ignorable). */
static indirectBoundaries ucolIndirectBoundaries [ 15 ] ;
static UBool indirectBoundariesSet = FALSE ;
2004-11-11 23:34:58 +00:00
static void setIndirectBoundaries ( uint32_t indexR , uint32_t * start , uint32_t * end ) {
2008-02-10 20:17:14 +00:00
/* Set values for the top - TODO: once we have values for all the indirects, we are going */
/* to initalize here. */
ucolIndirectBoundaries [ indexR ] . startCE = start [ 0 ] ;
ucolIndirectBoundaries [ indexR ] . startContCE = start [ 1 ] ;
if ( end ) {
ucolIndirectBoundaries [ indexR ] . limitCE = end [ 0 ] ;
ucolIndirectBoundaries [ indexR ] . limitContCE = end [ 1 ] ;
} else {
ucolIndirectBoundaries [ indexR ] . limitCE = 0 ;
ucolIndirectBoundaries [ indexR ] . limitContCE = 0 ;
}
2003-08-06 17:29:38 +00:00
}
2001-03-23 23:58:13 +00:00
static void testCEs ( UCollator * coll , UErrorCode * status ) {
2008-02-10 20:17:14 +00:00
const UChar * rules = NULL , * current = NULL ;
int32_t ruleLen = 0 ;
uint32_t strength = 0 ;
uint32_t maxStrength = UCOL_IDENTICAL ;
uint32_t baseCE , baseContCE , nextCE , nextContCE , currCE , currContCE ;
uint32_t lastCE ;
uint32_t lastContCE ;
int32_t result = 0 ;
uint32_t chOffset = 0 ; uint32_t chLen = 0 ;
uint32_t exOffset = 0 ; uint32_t exLen = 0 ;
uint32_t prefixOffset = 0 ; uint32_t prefixLen = 0 ;
uint32_t oldOffset = 0 ;
/* uint32_t rExpsLen = 0; */
/* uint32_t firstLen = 0; */
uint16_t specs = 0 ;
UBool varT = FALSE ; UBool top_ = TRUE ;
UBool startOfRules = TRUE ;
UBool before = FALSE ;
UColTokenParser src ;
UColOptionSet opts ;
UParseError parseError ;
UChar * rulesCopy = NULL ;
2010-01-06 23:50:03 +00:00
collIterate * c = uprv_new_collIterate ( status ) ;
2008-02-10 20:17:14 +00:00
UCAConstants * consts = NULL ;
uint32_t UCOL_RESET_TOP_VALUE , /*UCOL_RESET_TOP_CONT, */
UCOL_NEXT_TOP_VALUE , UCOL_NEXT_TOP_CONT ;
2008-09-15 19:29:43 +00:00
const char * colLoc ;
2008-02-10 20:17:14 +00:00
UCollator * UCA = ucol_open ( " root " , status ) ;
2009-04-23 00:23:57 +00:00
2008-02-10 20:17:14 +00:00
if ( U_FAILURE ( * status ) ) {
log_err ( " Could not open root collator %s \n " , u_errorName ( * status ) ) ;
2010-01-06 23:50:03 +00:00
uprv_delete_collIterate ( c ) ;
2008-02-10 20:17:14 +00:00
return ;
}
2009-04-23 00:23:57 +00:00
2008-09-15 19:29:43 +00:00
colLoc = ucol_getLocaleByType ( coll , ULOC_ACTUAL_LOCALE , status ) ;
if ( U_FAILURE ( * status ) ) {
log_err ( " Could not get collator name: %s \n " , u_errorName ( * status ) ) ;
2010-01-06 23:50:03 +00:00
ucol_close ( UCA ) ;
uprv_delete_collIterate ( c ) ;
2008-09-15 19:29:43 +00:00
return ;
}
2001-03-23 23:58:13 +00:00
2010-06-14 22:07:11 +00:00
uprv_memset ( & src , 0 , sizeof ( UColTokenParser ) ) ;
2008-02-10 20:17:14 +00:00
consts = ( UCAConstants * ) ( ( uint8_t * ) UCA - > image + UCA - > image - > UCAConsts ) ;
UCOL_RESET_TOP_VALUE = consts - > UCA_LAST_NON_VARIABLE [ 0 ] ;
/*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
UCOL_NEXT_TOP_VALUE = consts - > UCA_FIRST_IMPLICIT [ 0 ] ;
UCOL_NEXT_TOP_CONT = consts - > UCA_FIRST_IMPLICIT [ 1 ] ;
baseCE = baseContCE = nextCE = nextContCE = currCE = currContCE = lastCE = lastContCE = UCOL_NOT_FOUND ;
src . opts = & opts ;
rules = ucol_getRules ( coll , & ruleLen ) ;
src . invUCA = ucol_initInverseUCA ( status ) ;
if ( indirectBoundariesSet = = FALSE ) {
/* UCOL_RESET_TOP_VALUE */
setIndirectBoundaries ( 0 , consts - > UCA_LAST_NON_VARIABLE , consts - > UCA_FIRST_IMPLICIT ) ;
/* UCOL_FIRST_PRIMARY_IGNORABLE */
setIndirectBoundaries ( 1 , consts - > UCA_FIRST_PRIMARY_IGNORABLE , 0 ) ;
/* UCOL_LAST_PRIMARY_IGNORABLE */
setIndirectBoundaries ( 2 , consts - > UCA_LAST_PRIMARY_IGNORABLE , 0 ) ;
/* UCOL_FIRST_SECONDARY_IGNORABLE */
setIndirectBoundaries ( 3 , consts - > UCA_FIRST_SECONDARY_IGNORABLE , 0 ) ;
/* UCOL_LAST_SECONDARY_IGNORABLE */
setIndirectBoundaries ( 4 , consts - > UCA_LAST_SECONDARY_IGNORABLE , 0 ) ;
/* UCOL_FIRST_TERTIARY_IGNORABLE */
setIndirectBoundaries ( 5 , consts - > UCA_FIRST_TERTIARY_IGNORABLE , 0 ) ;
/* UCOL_LAST_TERTIARY_IGNORABLE */
setIndirectBoundaries ( 6 , consts - > UCA_LAST_TERTIARY_IGNORABLE , 0 ) ;
/* UCOL_FIRST_VARIABLE */
setIndirectBoundaries ( 7 , consts - > UCA_FIRST_VARIABLE , 0 ) ;
/* UCOL_LAST_VARIABLE */
setIndirectBoundaries ( 8 , consts - > UCA_LAST_VARIABLE , 0 ) ;
/* UCOL_FIRST_NON_VARIABLE */
setIndirectBoundaries ( 9 , consts - > UCA_FIRST_NON_VARIABLE , 0 ) ;
/* UCOL_LAST_NON_VARIABLE */
setIndirectBoundaries ( 10 , consts - > UCA_LAST_NON_VARIABLE , consts - > UCA_FIRST_IMPLICIT ) ;
/* UCOL_FIRST_IMPLICIT */
setIndirectBoundaries ( 11 , consts - > UCA_FIRST_IMPLICIT , 0 ) ;
/* UCOL_LAST_IMPLICIT */
setIndirectBoundaries ( 12 , consts - > UCA_LAST_IMPLICIT , consts - > UCA_FIRST_TRAILING ) ;
/* UCOL_FIRST_TRAILING */
setIndirectBoundaries ( 13 , consts - > UCA_FIRST_TRAILING , 0 ) ;
/* UCOL_LAST_TRAILING */
setIndirectBoundaries ( 14 , consts - > UCA_LAST_TRAILING , 0 ) ;
ucolIndirectBoundaries [ 14 ] . limitCE = ( consts - > UCA_PRIMARY_SPECIAL_MIN < < 24 ) ;
indirectBoundariesSet = TRUE ;
}
2003-08-06 17:29:38 +00:00
2001-03-23 23:58:13 +00:00
2008-02-10 20:17:14 +00:00
if ( U_SUCCESS ( * status ) & & ruleLen > 0 ) {
2010-11-02 08:27:22 +00:00
rulesCopy = ( UChar * ) uprv_malloc ( ( ruleLen + UCOL_TOK_EXTRA_RULE_SPACE_SIZE ) * sizeof ( UChar ) ) ;
2008-02-10 20:17:14 +00:00
uprv_memcpy ( rulesCopy , rules , ruleLen * sizeof ( UChar ) ) ;
src . current = src . source = rulesCopy ;
src . end = rulesCopy + ruleLen ;
src . extraCurrent = src . end ;
src . extraEnd = src . end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE ;
2010-10-28 17:28:06 +00:00
/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
the rules copy in src . source to get reallocated , freeing the original pointer in rulesCopy */
2008-02-10 20:17:14 +00:00
while ( ( current = ucol_tok_parseNextToken ( & src , startOfRules , & parseError , status ) ) ! = NULL ) {
strength = src . parsedToken . strength ;
chOffset = src . parsedToken . charsOffset ;
chLen = src . parsedToken . charsLen ;
exOffset = src . parsedToken . extensionOffset ;
exLen = src . parsedToken . extensionLen ;
prefixOffset = src . parsedToken . prefixOffset ;
prefixLen = src . parsedToken . prefixLen ;
specs = src . parsedToken . flags ;
startOfRules = FALSE ;
varT = ( UBool ) ( ( specs & UCOL_TOK_VARIABLE_TOP ) ! = 0 ) ;
top_ = ( UBool ) ( ( specs & UCOL_TOK_TOP ) ! = 0 ) ;
2010-10-28 17:28:06 +00:00
uprv_init_collIterate ( coll , src . source + chOffset , chLen , c , status ) ;
2008-02-10 20:17:14 +00:00
2010-01-06 23:50:03 +00:00
currCE = ucol_getNextCE ( coll , c , status ) ;
2010-10-28 17:28:06 +00:00
if ( currCE = = 0 & & UCOL_ISTHAIPREVOWEL ( * ( src . source + chOffset ) ) ) {
2008-02-10 20:17:14 +00:00
log_verbose ( " Thai prevowel detected. Will pick next CE \n " ) ;
2010-01-06 23:50:03 +00:00
currCE = ucol_getNextCE ( coll , c , status ) ;
2008-02-10 20:17:14 +00:00
}
2002-03-07 19:06:00 +00:00
2010-01-06 23:50:03 +00:00
currContCE = ucol_getNextCE ( coll , c , status ) ;
2008-02-10 20:17:14 +00:00
if ( ! isContinuation ( currContCE ) ) {
currContCE = 0 ;
}
2001-03-23 23:58:13 +00:00
2008-02-10 20:17:14 +00:00
/* we need to repack CEs here */
2001-03-23 23:58:13 +00:00
2008-02-10 20:17:14 +00:00
if ( strength = = UCOL_TOK_RESET ) {
before = ( UBool ) ( ( specs & UCOL_TOK_BEFORE ) ! = 0 ) ;
if ( top_ = = TRUE ) {
2010-06-14 21:27:44 +00:00
int32_t tokenIndex = src . parsedToken . indirectIndex ;
2001-03-23 23:58:13 +00:00
2010-06-14 21:27:44 +00:00
nextCE = baseCE = currCE = ucolIndirectBoundaries [ tokenIndex ] . startCE ;
nextContCE = baseContCE = currContCE = ucolIndirectBoundaries [ tokenIndex ] . startContCE ;
2008-02-10 20:17:14 +00:00
} else {
nextCE = baseCE = currCE ;
nextContCE = baseContCE = currContCE ;
}
maxStrength = UCOL_IDENTICAL ;
} else {
if ( strength < maxStrength ) {
maxStrength = strength ;
if ( baseCE = = UCOL_RESET_TOP_VALUE ) {
log_verbose ( " Resetting to [top] \n " ) ;
nextCE = UCOL_NEXT_TOP_VALUE ;
nextContCE = UCOL_NEXT_TOP_CONT ;
} else {
result = ucol_inv_getNextCE ( & src , baseCE & 0xFFFFFF3F , baseContCE , & nextCE , & nextContCE , maxStrength ) ;
}
if ( result < 0 ) {
2010-10-28 17:28:06 +00:00
if ( ucol_isTailored ( coll , * ( src . source + oldOffset ) , status ) ) {
log_verbose ( " Reset is tailored codepoint %04X, don't know how to continue, taking next test \n " , * ( src . source + oldOffset ) ) ;
2008-02-10 20:17:14 +00:00
return ;
} else {
2008-09-15 19:29:43 +00:00
log_err ( " %s: couldn't find the CE \n " , colLoc ) ;
2008-02-10 20:17:14 +00:00
return ;
}
}
}
2001-03-23 23:58:13 +00:00
2008-02-10 20:17:14 +00:00
currCE & = 0xFFFFFF3F ;
currContCE & = 0xFFFFFFBF ;
2002-08-03 03:44:40 +00:00
2008-02-10 20:17:14 +00:00
if ( maxStrength = = UCOL_IDENTICAL ) {
if ( baseCE ! = currCE | | baseContCE ! = currContCE ) {
2008-09-15 19:29:43 +00:00
log_err ( " %s: current CE (initial strength UCOL_EQUAL) \n " , colLoc ) ;
2008-02-10 20:17:14 +00:00
}
} else {
if ( strength = = UCOL_IDENTICAL ) {
if ( lastCE ! = currCE | | lastContCE ! = currContCE ) {
2008-09-15 19:29:43 +00:00
log_err ( " %s: current CE (initial strength UCOL_EQUAL) \n " , colLoc ) ;
2008-02-10 20:17:14 +00:00
}
} else {
if ( compareCEs ( currCE , currContCE , nextCE , nextContCE ) > 0 ) {
/*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
2008-09-15 19:29:43 +00:00
log_err ( " %s: current CE is not less than base CE \n " , colLoc ) ;
2008-02-10 20:17:14 +00:00
}
if ( ! before ) {
if ( compareCEs ( currCE , currContCE , lastCE , lastContCE ) < 0 ) {
/*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
2008-09-15 19:29:43 +00:00
log_err ( " %s: sequence of generated CEs is broken \n " , colLoc ) ;
2008-02-10 20:17:14 +00:00
}
} else {
before = FALSE ;
if ( compareCEs ( currCE , currContCE , lastCE , lastContCE ) > 0 ) {
/*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
2008-09-15 19:29:43 +00:00
log_err ( " %s: sequence of generated CEs is broken \n " , colLoc ) ;
2008-02-10 20:17:14 +00:00
}
}
}
}
2004-11-11 23:34:58 +00:00
2001-03-23 23:58:13 +00:00
}
2008-02-10 20:17:14 +00:00
oldOffset = chOffset ;
lastCE = currCE & 0xFFFFFF3F ;
lastContCE = currContCE & 0xFFFFFFBF ;
2001-03-23 23:58:13 +00:00
}
2010-11-02 08:27:22 +00:00
uprv_free ( src . source ) ;
2001-03-23 23:58:13 +00:00
}
2008-02-10 20:17:14 +00:00
ucol_close ( UCA ) ;
2010-01-06 23:50:03 +00:00
uprv_delete_collIterate ( c ) ;
2001-03-23 23:58:13 +00:00
}
2001-06-06 20:48:57 +00:00
#if 0
/* these locales are now picked from index RB */
2001-03-22 19:33:43 +00:00
static const char * localesToTest [ ] = {
2001-04-02 18:49:30 +00:00
" ar " , " bg " , " ca " , " cs " , " da " ,
2001-05-17 23:09:35 +00:00
" el " , " en_BE " , " en_US_POSIX " ,
" es " , " et " , " fi " , " fr " , " hi " ,
" hr " , " hu " , " is " , " iw " , " ja " ,
" ko " , " lt " , " lv " , " mk " , " mt " ,
" nb " , " nn " , " nn_NO " , " pl " , " ro " ,
2001-04-06 22:54:29 +00:00
" ru " , " sh " , " sk " , " sl " , " sq " ,
2001-05-17 23:09:35 +00:00
" sr " , " sv " , " th " , " tr " , " uk " ,
2001-03-21 22:05:42 +00:00
" vi " , " zh " , " zh_TW "
} ;
2001-06-06 20:48:57 +00:00
# endif
2001-03-20 07:22:33 +00:00
2001-03-22 19:33:43 +00:00
static const char * rulesToTest [ ] = {
2002-08-03 03:44:40 +00:00
/* Funky fa rule */
" & \\ u0622 < \\ u0627 << \\ u0671 < \\ u0621 " ,
2001-04-02 23:12:19 +00:00
/*"& Z < p, P",*/
/* Cui Mins rules */
2002-08-01 20:54:18 +00:00
" &[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U " , /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
" &[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U " , /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
" &[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U " , /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
" &[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U " , /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
" &[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U " , /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
" &[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U " , /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
" &[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U " /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
2001-03-21 22:05:42 +00:00
} ;
2001-03-20 07:22:33 +00:00
2001-04-06 22:54:29 +00:00
2002-03-28 18:26:25 +00:00
static void TestCollations ( void ) {
2008-02-19 07:09:38 +00:00
int32_t noOfLoc = uloc_countAvailable ( ) ;
int32_t i = 0 , j = 0 ;
2001-05-17 23:09:35 +00:00
2008-02-19 07:09:38 +00:00
UErrorCode status = U_ZERO_ERROR ;
char cName [ 256 ] ;
UChar name [ 256 ] ;
int32_t nameSize ;
2001-04-04 23:44:22 +00:00
2008-02-19 07:09:38 +00:00
const char * locName = NULL ;
UCollator * coll = NULL ;
UCollator * UCA = ucol_open ( " " , & status ) ;
UColAttributeValue oldStrength = ucol_getAttribute ( UCA , UCOL_STRENGTH , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Could not open UCA collator %s \n " , u_errorName ( status ) ) ;
2008-02-19 07:09:38 +00:00
return ;
2001-10-31 23:59:35 +00:00
}
2008-02-19 07:09:38 +00:00
ucol_setAttribute ( UCA , UCOL_STRENGTH , UCOL_QUATERNARY , & status ) ;
2010-11-02 08:27:22 +00:00
for ( i = 0 ; i < noOfLoc ; i + + ) {
2008-02-19 07:09:38 +00:00
status = U_ZERO_ERROR ;
locName = uloc_getAvailable ( i ) ;
if ( uprv_strcmp ( " ja " , locName ) = = 0 ) {
log_verbose ( " Don't know how to test prefixes \n " ) ;
continue ;
2001-04-13 00:05:42 +00:00
}
2008-02-19 07:09:38 +00:00
if ( hasCollationElements ( locName ) ) {
nameSize = uloc_getDisplayName ( locName , NULL , name , 256 , & status ) ;
for ( j = 0 ; j < nameSize ; j + + ) {
cName [ j ] = ( char ) name [ j ] ;
}
cName [ nameSize ] = 0 ;
log_verbose ( " \n Testing locale %s (%s) \n " , locName , cName ) ;
coll = ucol_open ( locName , & status ) ;
if ( U_SUCCESS ( status ) ) {
testAgainstUCA ( coll , UCA , " UCA " , FALSE , & status ) ;
ucol_close ( coll ) ;
} else {
log_err ( " Couldn't instantiate collator for locale %s, error: %s \n " , locName , u_errorName ( status ) ) ;
status = U_ZERO_ERROR ;
}
2003-11-11 21:41:47 +00:00
}
2001-04-04 23:44:22 +00:00
}
2008-02-19 07:09:38 +00:00
ucol_setAttribute ( UCA , UCOL_STRENGTH , oldStrength , & status ) ;
ucol_close ( UCA ) ;
2001-04-04 23:44:22 +00:00
}
2002-03-28 18:26:25 +00:00
static void RamsRulesTest ( void ) {
2008-02-19 07:09:38 +00:00
UErrorCode status = U_ZERO_ERROR ;
int32_t i = 0 ;
UCollator * coll = NULL ;
UChar rule [ 2048 ] ;
uint32_t ruleLen ;
int32_t noOfLoc = uloc_countAvailable ( ) ;
const char * locName = NULL ;
log_verbose ( " RamsRulesTest \n " ) ;
2010-07-13 19:43:25 +00:00
if ( uprv_strcmp ( " km " , uloc_getDefault ( ) ) = = 0 | | uprv_strcmp ( " km_KH " , uloc_getDefault ( ) ) = = 0 ) {
/* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
return ;
}
2008-02-19 07:09:38 +00:00
for ( i = 0 ; i < noOfLoc ; i + + ) {
locName = uloc_getAvailable ( i ) ;
if ( hasCollationElements ( locName ) ) {
if ( uprv_strcmp ( " ja " , locName ) = = 0 ) {
log_verbose ( " Don't know how to test Japanese because of prefixes \n " ) ;
continue ;
}
if ( uprv_strcmp ( " de__PHONEBOOK " , locName ) = = 0 ) {
log_verbose ( " Don't know how to test Phonebook because the reset is on an expanding character \n " ) ;
continue ;
}
2010-10-19 03:31:50 +00:00
if ( uprv_strcmp ( " bn " , locName ) = = 0 | |
uprv_strcmp ( " en_US_POSIX " , locName ) = = 0 | |
uprv_strcmp ( " km " , locName ) = = 0 | |
2008-02-19 07:09:38 +00:00
uprv_strcmp ( " km_KH " , locName ) = = 0 | |
2010-10-19 04:09:15 +00:00
uprv_strcmp ( " my " , locName ) = = 0 | |
2009-01-28 22:01:32 +00:00
uprv_strcmp ( " si " , locName ) = = 0 | |
uprv_strcmp ( " si_LK " , locName ) = = 0 | |
2008-02-19 07:09:38 +00:00
uprv_strcmp ( " zh " , locName ) = = 0 | |
2010-10-19 03:31:50 +00:00
uprv_strcmp ( " zh_Hant " , locName ) = = 0
) {
log_verbose ( " Don't know how to test %s. "
2010-11-04 19:15:00 +00:00
" TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale. \n " , locName ) ;
2010-10-19 03:31:50 +00:00
continue ;
2008-02-19 07:09:38 +00:00
}
log_verbose ( " Testing locale %s \n " , locName ) ;
2010-11-04 19:15:00 +00:00
status = U_ZERO_ERROR ;
2008-02-19 07:09:38 +00:00
coll = ucol_open ( locName , & status ) ;
if ( U_SUCCESS ( status ) ) {
2010-11-04 19:15:00 +00:00
if ( ( status ! = U_USING_DEFAULT_WARNING ) & & ( status ! = U_USING_FALLBACK_WARNING ) ) {
2008-02-19 07:09:38 +00:00
if ( coll - > image - > jamoSpecial = = TRUE ) {
2010-11-04 19:15:00 +00:00
log_err ( " %s has special JAMOs \n " , locName ) ;
2008-02-19 07:09:38 +00:00
}
ucol_setAttribute ( coll , UCOL_CASE_FIRST , UCOL_OFF , & status ) ;
testCollator ( coll , & status ) ;
testCEs ( coll , & status ) ;
2010-11-04 19:15:00 +00:00
} else {
log_verbose ( " Skipping %s: %s \n " , locName , u_errorName ( status ) ) ;
}
ucol_close ( coll ) ;
} else {
log_err ( " Could not open %s: %s \n " , locName , u_errorName ( status ) ) ;
2008-02-19 07:09:38 +00:00
}
2001-06-09 01:23:05 +00:00
}
2001-04-06 22:54:29 +00:00
}
2008-02-19 07:09:38 +00:00
for ( i = 0 ; i < sizeof ( rulesToTest ) / sizeof ( rulesToTest [ 0 ] ) ; i + + ) {
log_verbose ( " Testing rule: %s \n " , rulesToTest [ i ] ) ;
ruleLen = u_unescape ( rulesToTest [ i ] , rule , 2048 ) ;
2010-11-04 19:15:00 +00:00
status = U_ZERO_ERROR ;
2008-02-19 07:09:38 +00:00
coll = ucol_openRules ( rule , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
if ( U_SUCCESS ( status ) ) {
testCollator ( coll , & status ) ;
testCEs ( coll , & status ) ;
ucol_close ( coll ) ;
2010-11-04 19:15:00 +00:00
} else {
log_err ( " Could not test rule: %s: '%s' \n " , u_errorName ( status ) , rulesToTest [ i ] ) ;
2008-02-19 07:09:38 +00:00
}
2001-03-20 07:22:33 +00:00
}
2001-04-04 06:42:13 +00:00
2001-03-22 18:12:36 +00:00
}
2002-03-28 18:26:25 +00:00
static void IsTailoredTest ( void ) {
2008-02-19 07:09:38 +00:00
UErrorCode status = U_ZERO_ERROR ;
uint32_t i = 0 ;
UCollator * coll = NULL ;
UChar rule [ 2048 ] ;
UChar tailored [ 2048 ] ;
UChar notTailored [ 2048 ] ;
uint32_t ruleLen , tailoredLen , notTailoredLen ;
2001-03-21 22:05:42 +00:00
2008-02-19 07:09:38 +00:00
log_verbose ( " IsTailoredTest \n " ) ;
2001-05-17 23:09:35 +00:00
2008-02-19 07:09:38 +00:00
u_uastrcpy ( rule , " &Z < A, B, C;c < d " ) ;
ruleLen = u_strlen ( rule ) ;
2001-03-22 18:12:36 +00:00
2008-02-19 07:09:38 +00:00
u_uastrcpy ( tailored , " ABCcd " ) ;
tailoredLen = u_strlen ( tailored ) ;
2001-03-22 18:12:36 +00:00
2008-02-19 07:09:38 +00:00
u_uastrcpy ( notTailored , " ZabD " ) ;
notTailoredLen = u_strlen ( notTailored ) ;
2001-03-21 22:05:42 +00:00
2008-02-19 07:09:38 +00:00
coll = ucol_openRules ( rule , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
if ( U_SUCCESS ( status ) ) {
for ( i = 0 ; i < tailoredLen ; i + + ) {
if ( ! ucol_isTailored ( coll , tailored [ i ] , & status ) ) {
log_err ( " %i: %04X should be tailored - it is reported as not \n " , i , tailored [ i ] ) ;
}
}
for ( i = 0 ; i < notTailoredLen ; i + + ) {
if ( ucol_isTailored ( coll , notTailored [ i ] , & status ) ) {
log_err ( " %i: %04X should not be tailored - it is reported as it is \n " , i , notTailored [ i ] ) ;
}
}
ucol_close ( coll ) ;
2001-03-22 18:12:36 +00:00
}
2008-02-19 07:09:38 +00:00
else {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Can't tailor rules \n " ) ;
2008-02-19 07:09:38 +00:00
}
/* Code coverage */
status = U_ZERO_ERROR ;
coll = ucol_open ( " ja " , & status ) ;
if ( ! ucol_isTailored ( coll , 0x4E9C , & status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " 0x4E9C should be tailored - it is reported as not \n " ) ;
2001-03-22 18:12:36 +00:00
}
ucol_close ( coll ) ;
2001-03-20 07:22:33 +00:00
}
2001-10-08 19:32:09 +00:00
2001-04-13 00:05:42 +00:00
const static char chTest [ ] [ 20 ] = {
2001-05-02 05:05:06 +00:00
" c " ,
2001-04-13 00:05:42 +00:00
" C " ,
" ca " , " cb " , " cx " , " cy " , " CZ " ,
" c \\ u030C " , " C \\ u030C " ,
" h " ,
" H " ,
" ha " , " Ha " , " harly " , " hb " , " HB " , " hx " , " HX " , " hy " , " HY " ,
" ch " , " cH " , " Ch " , " CH " ,
2001-05-02 05:05:06 +00:00
" cha " , " charly " , " che " , " chh " , " chch " , " chr " ,
2001-05-17 23:09:35 +00:00
" i " , " I " , " iarly " ,
2001-04-13 00:05:42 +00:00
" r " , " R " ,
" r \\ u030C " , " R \\ u030C " ,
" s " ,
" S " ,
" s \\ u030C " , " S \\ u030C " ,
" z " , " Z " ,
2001-05-02 05:05:06 +00:00
" z \\ u030C " , " Z \\ u030C "
2001-04-13 00:05:42 +00:00
} ;
static void TestChMove ( void ) {
2008-02-19 07:09:38 +00:00
UChar t1 [ 256 ] = { 0 } ;
UChar t2 [ 256 ] = { 0 } ;
2001-04-13 00:05:42 +00:00
2008-02-19 07:09:38 +00:00
uint32_t i = 0 , j = 0 ;
uint32_t size = 0 ;
UErrorCode status = U_ZERO_ERROR ;
2001-04-13 00:05:42 +00:00
2008-02-19 07:09:38 +00:00
UCollator * coll = ucol_open ( " cs " , & status ) ;
2001-04-13 00:05:42 +00:00
2008-02-19 07:09:38 +00:00
if ( U_SUCCESS ( status ) ) {
size = sizeof ( chTest ) / sizeof ( chTest [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
u_unescape ( chTest [ i ] , t1 , 256 ) ;
u_unescape ( chTest [ j ] , t2 , 256 ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
}
}
2001-04-13 00:05:42 +00:00
}
2008-02-19 07:09:38 +00:00
else {
2010-07-14 16:09:03 +00:00
log_data_err ( " Can't open collator " ) ;
2008-02-19 07:09:38 +00:00
}
ucol_close ( coll ) ;
2001-04-13 00:05:42 +00:00
}
2004-01-16 23:44:58 +00:00
2001-04-23 03:50:15 +00:00
const static char impTest [ ] [ 20 ] = {
" \\ u4e00 " ,
" a " ,
" A " ,
" b " ,
" B " ,
" \\ u4e01 "
} ;
static void TestImplicitTailoring ( void ) {
2006-09-28 08:41:37 +00:00
static const struct {
2004-01-16 23:44:58 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * data [ 10 ] ;
2004-01-16 23:44:58 +00:00
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
2004-01-16 23:44:58 +00:00
{ " &[before 1] \\ u4e00 < b < c &[before 1] \\ u4e00 < d < e " , { " d " , " e " , " b " , " c " , " \\ u4e00 " } , 5 } ,
{ " & \\ u4e00 < a <<< A < b <<< B " , { " \\ u4e00 " , " a " , " A " , " b " , " B " , " \\ u4e01 " } , 6 } ,
{ " &[before 1] \\ u4e00 < \\ u4e01 < \\ u4e02 " , { " \\ u4e01 " , " \\ u4e02 " , " \\ u4e00 " } , 3 } ,
{ " &[before 1] \\ u4e01 < \\ u4e02 < \\ u4e03 " , { " \\ u4e02 " , " \\ u4e03 " , " \\ u4e01 " } , 3 }
} ;
int32_t i = 0 ;
for ( i = 0 ; i < sizeof ( tests ) / sizeof ( tests [ 0 ] ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len ) ;
}
/*
2001-04-23 03:50:15 +00:00
UChar t1 [ 256 ] = { 0 } ;
UChar t2 [ 256 ] = { 0 } ;
2001-05-04 00:02:24 +00:00
const char * rule = " & \\ u4e00 < a <<< A < b <<< B " ;
2001-04-23 03:50:15 +00:00
uint32_t i = 0 , j = 0 ;
uint32_t size = 0 ;
uint32_t ruleLen = 0 ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = NULL ;
ruleLen = u_unescape ( rule , t1 , 256 ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( t1 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
2001-04-23 03:50:15 +00:00
if ( U_SUCCESS ( status ) ) {
size = sizeof ( impTest ) / sizeof ( impTest [ 0 ] ) ;
for ( i = 0 ; i < size - 1 ; i + + ) {
for ( j = i + 1 ; j < size ; j + + ) {
u_unescape ( impTest [ i ] , t1 , 256 ) ;
u_unescape ( impTest [ j ] , t2 , 256 ) ;
doTest ( coll , t1 , t2 , UCOL_LESS ) ;
}
}
2001-05-17 23:09:35 +00:00
}
2001-10-17 02:19:48 +00:00
else {
log_err ( " Can't open collator " ) ;
}
ucol_close ( coll ) ;
2004-01-16 23:44:58 +00:00
*/
2001-04-23 03:50:15 +00:00
}
static void TestFCDProblem ( void ) {
UChar t1 [ 256 ] = { 0 } ;
UChar t2 [ 256 ] = { 0 } ;
2001-05-04 00:02:24 +00:00
const char * s1 = " \\ u0430 \\ u0306 \\ u0325 " ;
const char * s2 = " \\ u04D1 \\ u0325 " ;
2001-04-23 03:50:15 +00:00
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " " , & status ) ;
u_unescape ( s1 , t1 , 256 ) ;
u_unescape ( s2 , t2 , 256 ) ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_OFF , & status ) ;
doTest ( coll , t1 , t2 , UCOL_EQUAL ) ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
doTest ( coll , t1 , t2 , UCOL_EQUAL ) ;
2001-10-17 02:19:48 +00:00
ucol_close ( coll ) ;
2001-04-23 03:50:15 +00:00
}
2007-12-15 10:01:09 +00:00
/*
The largest normalization form is 18 for NFKC / NFKD , 4 for NFD and 3 for NFC
We ' re only using NFC / NFD in this test .
*/
# define NORM_BUFFER_TEST_LEN 18
2001-05-08 23:38:16 +00:00
typedef struct {
2001-11-10 06:54:28 +00:00
UChar32 u ;
2001-05-08 23:38:16 +00:00
UChar NFC [ NORM_BUFFER_TEST_LEN ] ;
UChar NFD [ NORM_BUFFER_TEST_LEN ] ;
} tester ;
2001-04-19 19:01:39 +00:00
static void TestComposeDecompose ( void ) {
2007-12-15 10:01:09 +00:00
/* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
static const UChar UNICODESET_STR [ ] = {
0x5B , 0x5B , 0x3A , 0x4E , 0x46 , 0x44 , 0x5F , 0x49 , 0x6E , 0x65 , 0x72 , 0x74 , 0x3D , 0x66 , 0x61 ,
0x6C , 0x73 , 0x65 , 0x3A , 0x5D , 0x5B , 0x3A , 0x4E , 0x46 , 0x43 , 0x5F , 0x49 , 0x6E , 0x65 , 0x72 ,
0x74 , 0x3D , 0x66 , 0x61 , 0x6C , 0x73 , 0x65 , 0x3A , 0x5D , 0x5D , 0
} ;
2002-05-31 01:18:14 +00:00
int32_t noOfLoc ;
2001-05-08 23:38:16 +00:00
int32_t i = 0 , j = 0 ;
2001-05-17 23:09:35 +00:00
2001-05-08 23:38:16 +00:00
UErrorCode status = U_ZERO_ERROR ;
const char * locName = NULL ;
uint32_t nfcSize ;
uint32_t nfdSize ;
2002-05-31 01:18:14 +00:00
tester * * t ;
2001-05-08 23:38:16 +00:00
uint32_t noCases = 0 ;
UCollator * coll = NULL ;
2001-11-10 06:54:28 +00:00
UChar32 u = 0 ;
UChar comp [ NORM_BUFFER_TEST_LEN ] ;
uint32_t len = 0 ;
2003-07-24 23:23:19 +00:00
UCollationElements * iter ;
2007-12-15 10:01:09 +00:00
USet * charsToTest = uset_openPattern ( UNICODESET_STR , - 1 , & status ) ;
2008-02-08 03:39:40 +00:00
int32_t charsToTestSize ;
2001-05-17 23:09:35 +00:00
2002-05-31 01:18:14 +00:00
noOfLoc = uloc_countAvailable ( ) ;
2003-06-04 19:02:41 +00:00
coll = ucol_open ( " " , & status ) ;
2009-06-09 21:28:13 +00:00
if ( U_FAILURE ( status ) ) {
log_data_err ( " Error opening collator -> %s (Are you missing data?) \n " , u_errorName ( status ) ) ;
2007-12-15 10:01:09 +00:00
return ;
2003-06-04 19:02:41 +00:00
}
2008-02-08 03:39:40 +00:00
charsToTestSize = uset_size ( charsToTest ) ;
2008-02-10 20:17:14 +00:00
if ( charsToTestSize < = 0 ) {
log_err ( " Set was zero. Missing data? \n " ) ;
return ;
}
2008-02-08 03:39:40 +00:00
t = malloc ( charsToTestSize * sizeof ( tester * ) ) ;
t [ 0 ] = ( tester * ) malloc ( sizeof ( tester ) ) ;
log_verbose ( " Testing UCA extensively for %d characters \n " , charsToTestSize ) ;
2001-05-17 23:09:35 +00:00
2007-12-15 10:01:09 +00:00
for ( u = 0 ; u < charsToTestSize ; u + + ) {
UChar32 ch = uset_charAt ( charsToTest , u ) ;
len = 0 ;
UTF_APPEND_CHAR_UNSAFE ( comp , len , ch ) ;
2001-11-10 06:54:28 +00:00
nfcSize = unorm_normalize ( comp , len , UNORM_NFC , 0 , t [ noCases ] - > NFC , NORM_BUFFER_TEST_LEN , & status ) ;
nfdSize = unorm_normalize ( comp , len , UNORM_NFD , 0 , t [ noCases ] - > NFD , NORM_BUFFER_TEST_LEN , & status ) ;
2001-05-17 23:09:35 +00:00
2004-11-11 23:34:58 +00:00
if ( nfcSize ! = nfdSize | | ( uprv_memcmp ( t [ noCases ] - > NFC , t [ noCases ] - > NFD , nfcSize * sizeof ( UChar ) ) ! = 0 )
2001-11-10 06:54:28 +00:00
| | ( len ! = nfdSize | | ( uprv_memcmp ( comp , t [ noCases ] - > NFD , nfdSize * sizeof ( UChar ) ) ! = 0 ) ) ) {
2007-12-15 10:01:09 +00:00
t [ noCases ] - > u = ch ;
2001-11-10 06:54:28 +00:00
if ( len ! = nfdSize | | ( uprv_memcmp ( comp , t [ noCases ] - > NFD , nfdSize * sizeof ( UChar ) ) ! = 0 ) ) {
2007-12-15 10:01:09 +00:00
u_strncpy ( t [ noCases ] - > NFC , comp , len ) ;
t [ noCases ] - > NFC [ len ] = 0 ;
2001-11-10 06:54:28 +00:00
}
2001-05-08 23:38:16 +00:00
noCases + + ;
2002-07-29 21:04:18 +00:00
t [ noCases ] = ( tester * ) malloc ( sizeof ( tester ) ) ;
2001-06-22 18:35:01 +00:00
uprv_memset ( t [ noCases ] , 0 , sizeof ( tester ) ) ;
2004-11-11 23:34:58 +00:00
}
2001-05-08 23:38:16 +00:00
}
2007-12-15 10:01:09 +00:00
log_verbose ( " Testing %d/%d of possible test cases \n " , noCases , charsToTestSize ) ;
uset_close ( charsToTest ) ;
charsToTest = NULL ;
2001-05-17 23:09:35 +00:00
2002-09-20 16:02:16 +00:00
for ( u = 0 ; u < ( UChar32 ) noCases ; u + + ) {
2007-12-15 10:01:09 +00:00
if ( ! ucol_equal ( coll , t [ u ] - > NFC , - 1 , t [ u ] - > NFD , - 1 ) ) {
log_err ( " Failure: codePoint %05X fails TestComposeDecompose in the UCA \n " , t [ u ] - > u ) ;
doTest ( coll , t [ u ] - > NFC , t [ u ] - > NFD , UCOL_EQUAL ) ;
}
2001-06-22 18:35:01 +00:00
}
2001-11-10 06:54:28 +00:00
/*
2007-12-15 10:01:09 +00:00
for ( u = 0 ; u < charsToTestSize ; u + + ) {
2001-11-10 06:54:28 +00:00
if ( ! ( u & 0xFFFF ) ) {
log_verbose ( " %08X " , u ) ;
}
uprv_memset ( t [ noCases ] , 0 , sizeof ( tester ) ) ;
t [ noCases ] - > u = u ;
len = 0 ;
UTF_APPEND_CHAR_UNSAFE ( comp , len , u ) ;
comp [ len ] = 0 ;
nfcSize = unorm_normalize ( comp , len , UNORM_NFC , 0 , t [ noCases ] - > NFC , NORM_BUFFER_TEST_LEN , & status ) ;
nfdSize = unorm_normalize ( comp , len , UNORM_NFD , 0 , t [ noCases ] - > NFD , NORM_BUFFER_TEST_LEN , & status ) ;
doTest ( coll , comp , t [ noCases ] - > NFD , UCOL_EQUAL ) ;
doTest ( coll , comp , t [ noCases ] - > NFC , UCOL_EQUAL ) ;
}
*/
2001-06-22 18:35:01 +00:00
ucol_close ( coll ) ;
2001-11-10 06:54:28 +00:00
log_verbose ( " Testing locales, number of cases = %i \n " , noCases ) ;
2001-05-08 23:38:16 +00:00
for ( i = 0 ; i < noOfLoc ; i + + ) {
status = U_ZERO_ERROR ;
locName = uloc_getAvailable ( i ) ;
if ( hasCollationElements ( locName ) ) {
2001-06-22 18:35:01 +00:00
char cName [ 256 ] ;
UChar name [ 256 ] ;
int32_t nameSize = uloc_getDisplayName ( locName , NULL , name , sizeof ( cName ) , & status ) ;
2001-05-08 23:38:16 +00:00
2001-06-22 18:35:01 +00:00
for ( j = 0 ; j < nameSize ; j + + ) {
cName [ j ] = ( char ) name [ j ] ;
2001-05-08 23:38:16 +00:00
}
2001-06-22 18:35:01 +00:00
cName [ nameSize ] = 0 ;
log_verbose ( " \n Testing locale %s (%s) \n " , locName , cName ) ;
2001-05-17 23:09:35 +00:00
2001-05-08 23:38:16 +00:00
coll = ucol_open ( locName , & status ) ;
2001-10-31 23:59:35 +00:00
ucol_setStrength ( coll , UCOL_IDENTICAL ) ;
2003-12-10 23:56:55 +00:00
iter = ucol_openElements ( coll , t [ u ] - > NFD , u_strlen ( t [ u ] - > NFD ) , & status ) ;
2001-05-17 23:09:35 +00:00
2002-09-20 16:02:16 +00:00
for ( u = 0 ; u < ( UChar32 ) noCases ; u + + ) {
2007-12-15 10:01:09 +00:00
if ( ! ucol_equal ( coll , t [ u ] - > NFC , - 1 , t [ u ] - > NFD , - 1 ) ) {
log_err ( " Failure: codePoint %05X fails TestComposeDecompose for locale %s \n " , t [ u ] - > u , cName ) ;
doTest ( coll , t [ u ] - > NFC , t [ u ] - > NFD , UCOL_EQUAL ) ;
log_verbose ( " Testing NFC \n " ) ;
ucol_setText ( iter , t [ u ] - > NFC , u_strlen ( t [ u ] - > NFC ) , & status ) ;
backAndForth ( iter ) ;
log_verbose ( " Testing NFD \n " ) ;
ucol_setText ( iter , t [ u ] - > NFD , u_strlen ( t [ u ] - > NFD ) , & status ) ;
backAndForth ( iter ) ;
}
2001-05-08 23:38:16 +00:00
}
2003-12-10 23:56:55 +00:00
ucol_closeElements ( iter ) ;
2001-05-08 23:38:16 +00:00
ucol_close ( coll ) ;
2001-04-19 19:01:39 +00:00
}
}
2002-09-20 16:02:16 +00:00
for ( u = 0 ; u < = ( UChar32 ) noCases ; u + + ) {
2002-07-29 21:04:18 +00:00
free ( t [ u ] ) ;
2001-05-08 23:38:16 +00:00
}
2002-07-29 21:04:18 +00:00
free ( t ) ;
2001-04-19 19:01:39 +00:00
}
2002-03-28 18:26:25 +00:00
static void TestEmptyRule ( void ) {
2001-04-23 03:50:15 +00:00
UErrorCode status = U_ZERO_ERROR ;
UChar rulez [ ] = { 0 } ;
2001-09-22 01:24:03 +00:00
UCollator * coll = ucol_openRules ( rulez , 0 , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
2001-04-23 03:50:15 +00:00
ucol_close ( coll ) ;
}
2001-04-30 19:11:32 +00:00
2002-03-28 18:26:25 +00:00
static void TestUCARules ( void ) {
2001-04-30 19:11:32 +00:00
UErrorCode status = U_ZERO_ERROR ;
UChar b [ 256 ] ;
2001-05-31 06:07:06 +00:00
UChar * rules = b ;
2003-06-04 19:02:41 +00:00
uint32_t ruleLen = 0 ;
2001-05-31 06:07:06 +00:00
UCollator * UCAfromRules = NULL ;
2001-04-30 19:11:32 +00:00
UCollator * coll = ucol_open ( " " , & status ) ;
2003-06-04 19:02:41 +00:00
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
ruleLen = ucol_getRulesEx ( coll , UCOL_FULL_RULES , rules , 256 ) ;
2001-05-31 17:27:44 +00:00
log_verbose ( " TestUCARules \n " ) ;
2001-05-31 06:07:06 +00:00
if ( ruleLen > 256 ) {
rules = ( UChar * ) malloc ( ( ruleLen + 1 ) * sizeof ( UChar ) ) ;
ruleLen = ucol_getRulesEx ( coll , UCOL_FULL_RULES , rules , ruleLen ) ;
}
log_verbose ( " Rules length is %d \n " , ruleLen ) ;
2001-09-22 01:24:03 +00:00
UCAfromRules = ucol_openRules ( rules , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
2001-05-31 06:07:06 +00:00
if ( U_SUCCESS ( status ) ) {
ucol_close ( UCAfromRules ) ;
} else {
log_verbose ( " Unable to create a collator from UCARules! \n " ) ;
}
/*
2001-04-30 19:11:32 +00:00
u_unescape ( blah , b , 256 ) ;
ucol_getSortKey ( coll , b , 1 , res , 256 ) ;
2001-05-31 06:07:06 +00:00
*/
2001-04-30 19:11:32 +00:00
ucol_close ( coll ) ;
2001-09-28 16:34:05 +00:00
if ( rules ! = b ) {
2001-05-31 06:07:06 +00:00
free ( rules ) ;
}
2001-04-30 19:11:32 +00:00
}
/* Pinyin tonal order */
/*
A < . . ( \ u0101 ) < . . ( \ u00e1 ) < . . ( \ u01ce ) < . . ( \ u00e0 )
( w / macron ) < ( w / acute ) < ( w / caron ) < ( w / grave )
E < . . ( \ u0113 ) < . . ( \ u00e9 ) < . . ( \ u011b ) < . . ( \ u00e8 )
I < . . ( \ u012b ) < . . ( \ u00ed ) < . . ( \ u01d0 ) < . . ( \ u00ec )
O < . . ( \ u014d ) < . . ( \ u00f3 ) < . . ( \ u01d2 ) < . . ( \ u00f2 )
2001-05-17 23:09:35 +00:00
U < . . ( \ u016b ) < . . ( \ u00fa ) < . . ( \ u01d4 ) < . . ( \ u00f9 )
2001-04-30 19:11:32 +00:00
< . . ( \ u01d6 ) < . . ( \ u01d8 ) < . . ( \ u01da ) < . . ( \ u01dc ) <
. . ( \ u00fc )
However , in testing we got the following order :
A < . . ( \ u00e1 ) < . . ( \ u00e0 ) < . . ( \ u01ce ) < . . ( \ u0101 )
( w / acute ) < ( w / grave ) < ( w / caron ) < ( w / macron )
E < . . ( \ u00e9 ) < . . ( \ u00e8 ) < . . ( \ u00ea ) < . . ( \ u011b ) <
. . ( \ u0113 )
I < . . ( \ u00ed ) < . . ( \ u00ec ) < . . ( \ u01d0 ) < . . ( \ u012b )
O < . . ( \ u00f3 ) < . . ( \ u00f2 ) < . . ( \ u01d2 ) < . . ( \ u014d )
U < . . ( \ u00fa ) < . . ( \ u00f9 ) < . . ( \ u01d4 ) < . . ( \ u00fc ) <
2001-05-17 23:09:35 +00:00
. . ( \ u01d8 )
2001-04-30 19:11:32 +00:00
< . . ( \ u01dc ) < . . ( \ u01da ) < . . ( \ u01d6 ) < . . ( \ u016b )
*/
2001-05-02 05:05:06 +00:00
2002-03-28 18:26:25 +00:00
static void TestBefore ( void ) {
2001-05-02 05:05:06 +00:00
const static char * data [ ] = {
2001-05-17 23:09:35 +00:00
" \\ u0101 " , " \\ u00e1 " , " \\ u01ce " , " \\ u00e0 " , " A " ,
" \\ u0113 " , " \\ u00e9 " , " \\ u011b " , " \\ u00e8 " , " E " ,
" \\ u012b " , " \\ u00ed " , " \\ u01d0 " , " \\ u00ec " , " I " ,
" \\ u014d " , " \\ u00f3 " , " \\ u01d2 " , " \\ u00f2 " , " O " ,
" \\ u016b " , " \\ u00fa " , " \\ u01d4 " , " \\ u00f9 " , " U " ,
2001-05-02 05:05:06 +00:00
" \\ u01d6 " , " \\ u01d8 " , " \\ u01da " , " \\ u01dc " , " \\ u00fc "
} ;
genericRulesStarter (
" &[before 1]a< \\ u0101< \\ u00e1< \\ u01ce< \\ u00e0 "
" &[before 1]e< \\ u0113< \\ u00e9< \\ u011b< \\ u00e8 "
" &[before 1]i< \\ u012b< \\ u00ed< \\ u01d0< \\ u00ec "
" &[before 1]o< \\ u014d< \\ u00f3< \\ u01d2< \\ u00f2 "
" &[before 1]u< \\ u016b< \\ u00fa< \\ u01d4< \\ u00f9 "
" &u< \\ u01d6< \\ u01d8< \\ u01da< \\ u01dc< \\ u00fc " ,
data , sizeof ( data ) / sizeof ( data [ 0 ] ) ) ;
}
2004-06-03 22:08:39 +00:00
#if 0
/* superceded by TestBeforePinyin */
2002-03-28 18:26:25 +00:00
static void TestJ784 ( void ) {
2001-04-30 19:11:32 +00:00
const static char * data [ ] = {
" A " , " \\ u0101 " , " \\ u00e1 " , " \\ u01ce " , " \\ u00e0 " ,
" E " , " \\ u0113 " , " \\ u00e9 " , " \\ u011b " , " \\ u00e8 " ,
" I " , " \\ u012b " , " \\ u00ed " , " \\ u01d0 " , " \\ u00ec " ,
" O " , " \\ u014d " , " \\ u00f3 " , " \\ u01d2 " , " \\ u00f2 " ,
" U " , " \\ u016b " , " \\ u00fa " , " \\ u01d4 " , " \\ u00f9 " ,
" \\ u00fc " ,
" \\ u01d6 " , " \\ u01d8 " , " \\ u01da " , " \\ u01dc "
} ;
genericLocaleStarter ( " zh " , data , sizeof ( data ) / sizeof ( data [ 0 ] ) ) ;
}
2004-06-03 22:08:39 +00:00
# endif
2001-04-30 19:11:32 +00:00
2004-06-03 22:08:39 +00:00
#if 0
/* superceded by the changes to the lv locale */
2002-03-28 18:26:25 +00:00
static void TestJ831 ( void ) {
2001-04-30 19:11:32 +00:00
const static char * data [ ] = {
" I " ,
" i " ,
" Y " ,
" y "
} ;
genericLocaleStarter ( " lv " , data , sizeof ( data ) / sizeof ( data [ 0 ] ) ) ;
}
2004-06-03 22:08:39 +00:00
# endif
2001-04-30 19:11:32 +00:00
2002-03-28 18:26:25 +00:00
static void TestJ815 ( void ) {
2001-04-30 19:11:32 +00:00
const static char * data [ ] = {
" aa " ,
" Aa " ,
" ab " ,
" Ab " ,
" ad " ,
" Ad " ,
" ae " ,
" Ae " ,
" \\ u00e6 " ,
" \\ u00c6 " ,
" af " ,
" Af " ,
" b " ,
" B "
} ;
genericLocaleStarter ( " fr " , data , sizeof ( data ) / sizeof ( data [ 0 ] ) ) ;
2001-05-02 05:05:06 +00:00
genericRulesStarter ( " [backwards 2]&A<< \\ u00e6/e<<< \\ u00c6/E " , data , sizeof ( data ) / sizeof ( data [ 0 ] ) ) ;
2001-04-30 19:11:32 +00:00
}
2001-05-17 23:16:34 +00:00
/*
2001-05-18 20:53:01 +00:00
" & a < b < c < d& r < c " , " & a < b < d& r < c " ,
" & a < b < c < d& c < m " , " & a < b < c < m < d " ,
" & a < b < c < d& a < m " , " & a < m < b < c < d " ,
" & a <<< b << c < d& a < m " , " & a <<< b << c < m < d " ,
" & a < b < c < d& [before 1] c < m " , " & a < b < m < c < d " ,
2001-05-29 22:53:07 +00:00
" & a < b <<< c << d <<< e& [before 3] e <<< x " , " & a < b <<< c << d <<< x <<< e " ,
" & a < b <<< c << d <<< e& [before 2] e <<< x " , " & a < b <<< c <<< x << d <<< e " ,
" & a < b <<< c << d <<< e& [before 1] e <<< x " , " & a <<< x < b <<< c << d <<< e " ,
" & a < b <<< c << d <<< e <<< f < g& [before 1] g < x " , " & a < b <<< c << d <<< e <<< f < x < g " ,
2001-05-17 23:16:34 +00:00
*/
2002-03-28 18:26:25 +00:00
static void TestRedundantRules ( void ) {
2001-05-17 23:16:34 +00:00
int32_t i ;
2006-09-28 08:41:37 +00:00
static const struct {
2004-05-08 07:59:36 +00:00
const char * rules ;
const char * expectedRules ;
const char * testdata [ 8 ] ;
uint32_t testdatalen ;
} tests [ ] = {
/* this test conflicts with positioning of CODAN placeholder */
2004-11-11 23:34:58 +00:00
/*{
" & a <<< b <<< c << d <<< e& [before 1] e <<< x " ,
2004-05-08 07:59:36 +00:00
" & \\ u2089<<<x " ,
{ " \\ u2089 " , " x " } , 2
} , */
/* this test conflicts with the [before x] syntax tightening */
/*{
" & b <<< c <<< d << e <<< f& [before 1] f <<< x " ,
" & \\ u0252<<<x " ,
{ " \\ u0252 " , " x " } , 2
} , */
/* this test conflicts with the [before x] syntax tightening */
/*{
" & a < b <<< c << d <<< e& [before 1] e <<< x " ,
" & a <<< x < b <<< c << d <<< e " ,
{ " a " , " x " , " b " , " c " , " d " , " e " } , 6
} , */
{
" & a < b < c < d& [before 1] c < m " ,
" & a < b < m < c < d " ,
{ " a " , " b " , " m " , " c " , " d " } , 5
} ,
{
" & a < b <<< c << d <<< e& [before 3] e <<< x " ,
" & a < b <<< c << d <<< x <<< e " ,
{ " a " , " b " , " c " , " d " , " x " , " e " } , 6
} ,
/* this test conflicts with the [before x] syntax tightening */
/* {
" & a < b <<< c << d <<< e& [before 2] e <<< x " ,
" & a < b <<< c <<< x << d <<< e " ,
{ " a " , " b " , " c " , " x " , " d " , " e " } , , 6
} , */
{
" & a < b <<< c << d <<< e <<< f < g& [before 1] g < x " ,
" & a < b <<< c << d <<< e <<< f < x < g " ,
{ " a " , " b " , " c " , " d " , " e " , " f " , " x " , " g " } , 8
} ,
{
" & a <<< b << c < d& a < m " ,
" & a <<< b << c < m < d " ,
{ " a " , " b " , " c " , " m " , " d " } , 5
} ,
{
" &a<b<<b \\ u0301 &z<b " ,
" &a<b \\ u0301 &z<b " ,
{ " a " , " b \\ u0301 " , " z " , " b " } , 4
} ,
{
" &z<m<<<q<<<m " ,
" &z<q<<<m " ,
{ " z " , " q " , " m " } , 3
} ,
{
" &z<<<m<q<<<m " ,
" &z<q<<<m " ,
{ " z " , " q " , " m " } , 3
} ,
{
" & a < b < c < d& r < c " ,
" & a < b < d& r < c " ,
{ " a " , " b " , " d " } , 3
} ,
{
" & a < b < c < d& r < c " ,
" & a < b < d& r < c " ,
{ " r " , " c " } , 2
} ,
{
" & a < b < c < d& c < m " ,
" & a < b < c < m < d " ,
{ " a " , " b " , " c " , " m " , " d " } , 5
} ,
{
" & a < b < c < d& a < m " ,
" & a < m < b < c < d " ,
{ " a " , " m " , " b " , " c " , " d " } , 5
}
2001-05-17 23:16:34 +00:00
} ;
UCollator * credundant = NULL ;
UCollator * cresulting = NULL ;
UErrorCode status = U_ZERO_ERROR ;
UChar rlz [ 2048 ] = { 0 } ;
uint32_t rlen = 0 ;
2004-05-08 07:59:36 +00:00
for ( i = 0 ; i < sizeof ( tests ) / sizeof ( tests [ 0 ] ) ; i + + ) {
log_verbose ( " testing rule %s, expected to be %s \n " , tests [ i ] . rules , tests [ i ] . expectedRules ) ;
rlen = u_unescape ( tests [ i ] . rules , rlz , 2048 ) ;
2001-05-31 17:27:44 +00:00
2001-09-22 01:24:03 +00:00
credundant = ucol_openRules ( rlz , rlen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
2003-06-04 19:02:41 +00:00
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
2004-05-08 07:59:36 +00:00
rlen = u_unescape ( tests [ i ] . expectedRules , rlz , 2048 ) ;
2001-09-22 01:24:03 +00:00
cresulting = ucol_openRules ( rlz , rlen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
2001-05-17 23:16:34 +00:00
2001-05-30 16:09:09 +00:00
testAgainstUCA ( cresulting , credundant , " expected " , TRUE , & status ) ;
2001-05-17 23:16:34 +00:00
ucol_close ( credundant ) ;
ucol_close ( cresulting ) ;
log_verbose ( " testing using data \n " ) ;
2004-05-08 07:59:36 +00:00
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . testdata , tests [ i ] . testdatalen ) ;
2001-05-17 23:16:34 +00:00
}
}
2002-03-28 18:26:25 +00:00
static void TestExpansionSyntax ( void ) {
2001-05-29 22:53:07 +00:00
int32_t i ;
const static char * rules [ ] = {
" &AE <<< a << b <<< c &d <<< f " ,
" &AE <<< a <<< b << c << d < e < f <<< g " ,
" &AE <<< B <<< C / D <<< F "
} ;
const static char * expectedRules [ ] = {
" &A <<< a / E << b / E <<< c /E &d <<< f " ,
" &A <<< a / E <<< b / E << c / E << d / E < e < f <<< g " ,
" &A <<< B / E <<< C / ED <<< F / E "
} ;
const static char * testdata [ ] [ 8 ] = {
{ " AE " , " a " , " b " , " c " } ,
{ " AE " , " a " , " b " , " c " , " d " , " e " , " f " , " g " } ,
{ " AE " , " B " , " C " } /* / ED <<< F / E"},*/
} ;
const static uint32_t testdatalen [ ] = {
4 ,
8 ,
3
} ;
UCollator * credundant = NULL ;
UCollator * cresulting = NULL ;
UErrorCode status = U_ZERO_ERROR ;
UChar rlz [ 2048 ] = { 0 } ;
uint32_t rlen = 0 ;
for ( i = 0 ; i < sizeof ( rules ) / sizeof ( rules [ 0 ] ) ; i + + ) {
log_verbose ( " testing rule %s, expected to be %s \n " , rules [ i ] , expectedRules [ i ] ) ;
rlen = u_unescape ( rules [ i ] , rlz , 2048 ) ;
2001-06-08 16:24:55 +00:00
2001-09-22 01:24:03 +00:00
credundant = ucol_openRules ( rlz , rlen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
2003-06-04 19:02:41 +00:00
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
2001-05-29 22:53:07 +00:00
rlen = u_unescape ( expectedRules [ i ] , rlz , 2048 ) ;
2001-09-22 01:24:03 +00:00
cresulting = ucol_openRules ( rlz , rlen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
2001-05-29 22:53:07 +00:00
2001-05-30 15:13:04 +00:00
/* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
/* as a hard error test, but only in information mode */
2001-05-30 16:09:09 +00:00
testAgainstUCA ( cresulting , credundant , " expected " , FALSE , & status ) ;
2001-05-29 22:53:07 +00:00
ucol_close ( credundant ) ;
ucol_close ( cresulting ) ;
log_verbose ( " testing using data \n " ) ;
genericRulesStarter ( rules [ i ] , testdata [ i ] , testdatalen [ i ] ) ;
}
}
2002-03-28 18:26:25 +00:00
static void TestCase ( void )
2001-05-17 23:16:34 +00:00
{
const static UChar gRules [ MAX_TOKEN_LEN ] =
/*" & 0 < 1,\u2461<a,A"*/
{ 0x0026 , 0x0030 , 0x003C , 0x0031 , 0x002C , 0x2460 , 0x003C , 0x0061 , 0x002C , 0x0041 , 0x0000 } ;
const static UChar testCase [ ] [ MAX_TOKEN_LEN ] =
{
/*0*/ { 0x0031 /*'1'*/ , 0x0061 /*'a'*/ , 0x0000 } ,
/*1*/ { 0x0031 /*'1'*/ , 0x0041 /*'A'*/ , 0x0000 } ,
/*2*/ { 0x2460 /*circ'1'*/ , 0x0061 /*'a'*/ , 0x0000 } ,
/*3*/ { 0x2460 /*circ'1'*/ , 0x0041 /*'A'*/ , 0x0000 }
} ;
const static UCollationResult caseTestResults [ ] [ 9 ] =
{
2007-07-19 00:19:21 +00:00
{ UCOL_LESS , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_EQUAL , UCOL_LESS } ,
{ UCOL_GREATER , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_EQUAL , UCOL_GREATER } ,
{ UCOL_LESS , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_GREATER , UCOL_LESS , UCOL_EQUAL , UCOL_EQUAL , UCOL_LESS } ,
{ UCOL_GREATER , UCOL_LESS , UCOL_GREATER , UCOL_EQUAL , UCOL_LESS , UCOL_LESS , UCOL_EQUAL , UCOL_EQUAL , UCOL_GREATER }
2001-05-17 23:16:34 +00:00
} ;
const static UColAttributeValue caseTestAttributes [ ] [ 2 ] =
{
2007-07-19 00:19:21 +00:00
{ UCOL_LOWER_FIRST , UCOL_OFF } ,
{ UCOL_UPPER_FIRST , UCOL_OFF } ,
{ UCOL_LOWER_FIRST , UCOL_ON } ,
{ UCOL_UPPER_FIRST , UCOL_ON }
2001-05-17 23:16:34 +00:00
} ;
int32_t i , j , k ;
UErrorCode status = U_ZERO_ERROR ;
2003-11-12 20:45:53 +00:00
UCollationElements * iter ;
2001-05-17 23:16:34 +00:00
UCollator * myCollation ;
myCollation = ucol_open ( " en_US " , & status ) ;
2003-11-12 20:45:53 +00:00
2001-05-17 23:16:34 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
2001-05-17 23:16:34 +00:00
return ;
}
log_verbose ( " Testing different case settings \n " ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
for ( k = 0 ; k < 4 ; k + + ) {
ucol_setAttribute ( myCollation , UCOL_CASE_FIRST , caseTestAttributes [ k ] [ 0 ] , & status ) ;
ucol_setAttribute ( myCollation , UCOL_CASE_LEVEL , caseTestAttributes [ k ] [ 1 ] , & status ) ;
log_verbose ( " Case first = %d, Case level = %d \n " , caseTestAttributes [ k ] [ 0 ] , caseTestAttributes [ k ] [ 1 ] ) ;
for ( i = 0 ; i < 3 ; i + + ) {
for ( j = i + 1 ; j < 4 ; j + + ) {
doTest ( myCollation , testCase [ i ] , testCase [ j ] , caseTestResults [ k ] [ 3 * i + j - 1 ] ) ;
}
}
}
ucol_close ( myCollation ) ;
2001-09-22 01:24:03 +00:00
myCollation = ucol_openRules ( gRules , u_strlen ( gRules ) , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
2001-05-17 23:16:34 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
log_verbose ( " Testing different case settings with custom rules \n " ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
2004-11-11 23:34:58 +00:00
2001-05-17 23:16:34 +00:00
for ( k = 0 ; k < 4 ; k + + ) {
ucol_setAttribute ( myCollation , UCOL_CASE_FIRST , caseTestAttributes [ k ] [ 0 ] , & status ) ;
ucol_setAttribute ( myCollation , UCOL_CASE_LEVEL , caseTestAttributes [ k ] [ 1 ] , & status ) ;
for ( i = 0 ; i < 3 ; i + + ) {
for ( j = i + 1 ; j < 4 ; j + + ) {
2001-09-21 21:22:44 +00:00
log_verbose ( " k:%d, i:%d, j:%d \n " , k , i , j ) ;
2001-05-17 23:16:34 +00:00
doTest ( myCollation , testCase [ i ] , testCase [ j ] , caseTestResults [ k ] [ 3 * i + j - 1 ] ) ;
2003-11-12 20:45:53 +00:00
iter = ucol_openElements ( myCollation , testCase [ i ] , u_strlen ( testCase [ i ] ) , & status ) ;
backAndForth ( iter ) ;
ucol_closeElements ( iter ) ;
iter = ucol_openElements ( myCollation , testCase [ j ] , u_strlen ( testCase [ j ] ) , & status ) ;
backAndForth ( iter ) ;
ucol_closeElements ( iter ) ;
2001-05-17 23:16:34 +00:00
}
}
}
2001-09-28 16:34:05 +00:00
ucol_close ( myCollation ) ;
2001-05-17 23:16:34 +00:00
{
const static char * lowerFirst [ ] = {
" h " ,
" H " ,
" ch " ,
" Ch " ,
" CH " ,
" cha " ,
" chA " ,
" Cha " ,
" ChA " ,
" CHa " ,
" CHA " ,
" i " ,
" I "
} ;
const static char * upperFirst [ ] = {
" H " ,
" h " ,
" CH " ,
" Ch " ,
" ch " ,
" CHA " ,
" CHa " ,
" ChA " ,
" Cha " ,
" chA " ,
" cha " ,
" I " ,
" i "
} ;
log_verbose ( " mixed case test \n " ) ;
log_verbose ( " lower first, case level off \n " ) ;
genericRulesStarter ( " [casefirst lower]&H<ch<<<Ch<<<CH " , lowerFirst , sizeof ( lowerFirst ) / sizeof ( lowerFirst [ 0 ] ) ) ;
log_verbose ( " upper first, case level off \n " ) ;
genericRulesStarter ( " [casefirst upper]&H<ch<<<Ch<<<CH " , upperFirst , sizeof ( upperFirst ) / sizeof ( upperFirst [ 0 ] ) ) ;
log_verbose ( " lower first, case level on \n " ) ;
genericRulesStarter ( " [casefirst lower][caselevel on]&H<ch<<<Ch<<<CH " , lowerFirst , sizeof ( lowerFirst ) / sizeof ( lowerFirst [ 0 ] ) ) ;
log_verbose ( " upper first, case level on \n " ) ;
genericRulesStarter ( " [casefirst upper][caselevel on]&H<ch<<<Ch<<<CH " , upperFirst , sizeof ( upperFirst ) / sizeof ( upperFirst [ 0 ] ) ) ;
}
}
2002-03-28 18:26:25 +00:00
static void TestIncrementalNormalize ( void ) {
2001-05-17 23:09:35 +00:00
2003-01-20 07:42:58 +00:00
/*UChar baseA =0x61;*/
2001-05-17 23:09:35 +00:00
UChar baseA = 0x41 ;
2001-05-18 20:53:01 +00:00
/* UChar baseB = 0x42;*/
2006-10-11 08:31:55 +00:00
static const UChar ccMix [ ] = { 0x316 , 0x321 , 0x300 } ;
2003-01-20 07:42:58 +00:00
/*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2001-05-18 20:53:01 +00:00
/*
0x316 is combining grave accent below , cc = 220
0x321 is combining palatalized hook below , cc = 202
0x300 is combining grave accent , cc = 230
*/
2001-05-17 23:09:35 +00:00
2008-02-17 19:32:34 +00:00
# define MAXSLEN 2000
2006-10-11 08:31:55 +00:00
/*int maxSLen = 64000;*/
2001-05-17 23:09:35 +00:00
int sLen ;
int i ;
2001-05-25 19:30:01 +00:00
UCollator * coll ;
UErrorCode status = U_ZERO_ERROR ;
UCollationResult result ;
2010-04-07 16:18:38 +00:00
int32_t myQ = getTestOption ( QUICK_OPTION ) ;
2003-02-20 01:13:36 +00:00
2010-04-07 16:18:38 +00:00
if ( getTestOption ( QUICK_OPTION ) < 0 ) {
setTestOption ( QUICK_OPTION , 1 ) ;
2003-02-20 01:13:36 +00:00
}
2001-05-25 19:30:01 +00:00
{
2001-05-28 20:32:29 +00:00
/* Test 1. Run very long unnormalized strings, to force overflow of*/
/* most buffers along the way.*/
2008-02-17 19:32:34 +00:00
UChar strA [ MAXSLEN + 1 ] ;
UChar strB [ MAXSLEN + 1 ] ;
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
coll = ucol_open ( " en_US " , & status ) ;
2003-06-04 19:02:41 +00:00
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
2002-08-21 19:09:33 +00:00
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
2001-09-28 16:34:05 +00:00
2008-02-17 19:32:34 +00:00
/*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
/*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2003-02-20 01:13:36 +00:00
/*for (sLen = 1000; sLen<1001; sLen++) {*/
for ( sLen = 500 ; sLen < 501 ; sLen + + ) {
2003-04-30 23:26:55 +00:00
/*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2001-05-25 19:30:01 +00:00
strA [ 0 ] = baseA ;
strB [ 0 ] = baseA ;
for ( i = 1 ; i < = sLen - 1 ; i + + ) {
strA [ i ] = ccMix [ i % 3 ] ;
strB [ sLen - i ] = ccMix [ i % 3 ] ;
}
strA [ sLen ] = 0 ;
strB [ sLen ] = 0 ;
2001-09-28 16:34:05 +00:00
2001-05-28 20:32:29 +00:00
ucol_setStrength ( coll , UCOL_TERTIARY ) ; /* Do test with default strength, which runs*/
doTest ( coll , strA , strB , UCOL_EQUAL ) ; /* optimized functions in the impl*/
ucol_setStrength ( coll , UCOL_IDENTICAL ) ; /* Do again with the slow, general impl.*/
2001-05-25 19:30:01 +00:00
doTest ( coll , strA , strB , UCOL_EQUAL ) ;
2001-05-17 23:09:35 +00:00
}
2001-05-25 19:30:01 +00:00
}
2010-04-07 16:18:38 +00:00
setTestOption ( QUICK_OPTION , myQ ) ;
2003-02-20 01:13:36 +00:00
2001-05-17 23:09:35 +00:00
2001-05-28 20:32:29 +00:00
/* Test 2: Non-normal sequence in a string that extends to the last character*/
/* of the string. Checks a couple of edge cases.*/
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
{
2006-09-28 08:41:37 +00:00
static const UChar strA [ ] = { 0x41 , 0x41 , 0x300 , 0x316 , 0 } ;
static const UChar strB [ ] = { 0x41 , 0xc0 , 0x316 , 0 } ;
2001-05-25 19:30:01 +00:00
ucol_setStrength ( coll , UCOL_TERTIARY ) ;
2001-05-17 23:09:35 +00:00
doTest ( coll , strA , strB , UCOL_EQUAL ) ;
}
2001-05-28 20:32:29 +00:00
/* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
{
2004-11-11 23:34:58 +00:00
/* New UCA 3.1.1.
* test below used a code point from Desseret , which sorts differently
2002-06-13 18:35:27 +00:00
* than d800 dc00
*/
/*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2006-09-28 08:41:37 +00:00
static const UChar strA [ ] = { 0x41 , 0x41 , 0x300 , 0x316 , 0xD800 , 0xDC01 , 0 } ;
static const UChar strB [ ] = { 0x41 , 0xc0 , 0x316 , 0xD800 , 0xDC00 , 0 } ;
2001-05-25 19:30:01 +00:00
ucol_setStrength ( coll , UCOL_TERTIARY ) ;
doTest ( coll , strA , strB , UCOL_GREATER ) ;
}
2001-05-28 20:32:29 +00:00
/* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
{
2006-09-28 08:41:37 +00:00
static const UChar strA [ ] = { 0x41 , 0x00 , 0x42 , 0x00 } ;
static const UChar strB [ ] = { 0x41 , 0x00 , 0x00 , 0x00 } ;
2001-05-25 19:30:01 +00:00
char sortKeyA [ 50 ] ;
char sortKeyAz [ 50 ] ;
char sortKeyB [ 50 ] ;
char sortKeyBz [ 50 ] ;
int r ;
2002-07-02 22:36:04 +00:00
/* there used to be -3 here. Hmmmm.... */
/*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
result = ucol_strcoll ( coll , strA , 3 , strB , 3 ) ;
2001-05-25 19:30:01 +00:00
if ( result ! = UCOL_GREATER ) {
log_err ( " ERROR 1 in test 4 \n " ) ;
}
result = ucol_strcoll ( coll , strA , - 1 , strB , - 1 ) ;
if ( result ! = UCOL_EQUAL ) {
log_err ( " ERROR 2 in test 4 \n " ) ;
}
2001-05-30 16:09:09 +00:00
ucol_getSortKey ( coll , strA , 3 , ( uint8_t * ) sortKeyA , sizeof ( sortKeyA ) ) ;
ucol_getSortKey ( coll , strA , - 1 , ( uint8_t * ) sortKeyAz , sizeof ( sortKeyAz ) ) ;
ucol_getSortKey ( coll , strB , 3 , ( uint8_t * ) sortKeyB , sizeof ( sortKeyB ) ) ;
ucol_getSortKey ( coll , strB , - 1 , ( uint8_t * ) sortKeyBz , sizeof ( sortKeyBz ) ) ;
2001-05-25 19:30:01 +00:00
r = strcmp ( sortKeyA , sortKeyAz ) ;
if ( r < = 0 ) {
log_err ( " Error 3 in test 4 \n " ) ;
}
r = strcmp ( sortKeyA , sortKeyB ) ;
if ( r < = 0 ) {
log_err ( " Error 4 in test 4 \n " ) ;
}
r = strcmp ( sortKeyAz , sortKeyBz ) ;
if ( r ! = 0 ) {
log_err ( " Error 5 in test 4 \n " ) ;
}
ucol_setStrength ( coll , UCOL_IDENTICAL ) ;
2001-05-30 16:09:09 +00:00
ucol_getSortKey ( coll , strA , 3 , ( uint8_t * ) sortKeyA , sizeof ( sortKeyA ) ) ;
ucol_getSortKey ( coll , strA , - 1 , ( uint8_t * ) sortKeyAz , sizeof ( sortKeyAz ) ) ;
ucol_getSortKey ( coll , strB , 3 , ( uint8_t * ) sortKeyB , sizeof ( sortKeyB ) ) ;
ucol_getSortKey ( coll , strB , - 1 , ( uint8_t * ) sortKeyBz , sizeof ( sortKeyBz ) ) ;
2001-05-25 19:30:01 +00:00
r = strcmp ( sortKeyA , sortKeyAz ) ;
if ( r < = 0 ) {
log_err ( " Error 6 in test 4 \n " ) ;
}
r = strcmp ( sortKeyA , sortKeyB ) ;
if ( r < = 0 ) {
log_err ( " Error 7 in test 4 \n " ) ;
}
r = strcmp ( sortKeyAz , sortKeyBz ) ;
if ( r ! = 0 ) {
log_err ( " Error 8 in test 4 \n " ) ;
}
ucol_setStrength ( coll , UCOL_TERTIARY ) ;
}
2001-09-28 16:34:05 +00:00
2001-05-28 20:32:29 +00:00
/* Test 5: Null characters in non-normal source strings.*/
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
{
2006-09-28 08:41:37 +00:00
static const UChar strA [ ] = { 0x41 , 0x41 , 0x300 , 0x316 , 0x00 , 0x42 , 0x00 } ;
static const UChar strB [ ] = { 0x41 , 0x41 , 0x300 , 0x316 , 0x00 , 0x00 , 0x00 } ;
2001-05-25 19:30:01 +00:00
char sortKeyA [ 50 ] ;
char sortKeyAz [ 50 ] ;
char sortKeyB [ 50 ] ;
char sortKeyBz [ 50 ] ;
int r ;
result = ucol_strcoll ( coll , strA , 6 , strB , 6 ) ;
if ( result ! = UCOL_GREATER ) {
log_err ( " ERROR 1 in test 5 \n " ) ;
}
result = ucol_strcoll ( coll , strA , - 1 , strB , - 1 ) ;
if ( result ! = UCOL_EQUAL ) {
log_err ( " ERROR 2 in test 5 \n " ) ;
}
2001-05-30 16:09:09 +00:00
ucol_getSortKey ( coll , strA , 6 , ( uint8_t * ) sortKeyA , sizeof ( sortKeyA ) ) ;
ucol_getSortKey ( coll , strA , - 1 , ( uint8_t * ) sortKeyAz , sizeof ( sortKeyAz ) ) ;
ucol_getSortKey ( coll , strB , 6 , ( uint8_t * ) sortKeyB , sizeof ( sortKeyB ) ) ;
ucol_getSortKey ( coll , strB , - 1 , ( uint8_t * ) sortKeyBz , sizeof ( sortKeyBz ) ) ;
2001-05-25 19:30:01 +00:00
r = strcmp ( sortKeyA , sortKeyAz ) ;
if ( r < = 0 ) {
log_err ( " Error 3 in test 5 \n " ) ;
}
r = strcmp ( sortKeyA , sortKeyB ) ;
if ( r < = 0 ) {
log_err ( " Error 4 in test 5 \n " ) ;
}
r = strcmp ( sortKeyAz , sortKeyBz ) ;
if ( r ! = 0 ) {
log_err ( " Error 5 in test 5 \n " ) ;
}
ucol_setStrength ( coll , UCOL_IDENTICAL ) ;
2001-05-30 16:09:09 +00:00
ucol_getSortKey ( coll , strA , 6 , ( uint8_t * ) sortKeyA , sizeof ( sortKeyA ) ) ;
ucol_getSortKey ( coll , strA , - 1 , ( uint8_t * ) sortKeyAz , sizeof ( sortKeyAz ) ) ;
ucol_getSortKey ( coll , strB , 6 , ( uint8_t * ) sortKeyB , sizeof ( sortKeyB ) ) ;
ucol_getSortKey ( coll , strB , - 1 , ( uint8_t * ) sortKeyBz , sizeof ( sortKeyBz ) ) ;
2001-05-25 19:30:01 +00:00
r = strcmp ( sortKeyA , sortKeyAz ) ;
if ( r < = 0 ) {
log_err ( " Error 6 in test 5 \n " ) ;
}
r = strcmp ( sortKeyA , sortKeyB ) ;
if ( r < = 0 ) {
log_err ( " Error 7 in test 5 \n " ) ;
}
r = strcmp ( sortKeyAz , sortKeyBz ) ;
if ( r ! = 0 ) {
log_err ( " Error 8 in test 5 \n " ) ;
}
ucol_setStrength ( coll , UCOL_TERTIARY ) ;
}
2001-09-28 16:34:05 +00:00
2001-05-28 20:32:29 +00:00
/* Test 6: Null character as base of a non-normal combining sequence.*/
2001-09-28 16:34:05 +00:00
2001-05-25 19:30:01 +00:00
{
2006-09-28 08:41:37 +00:00
static const UChar strA [ ] = { 0x41 , 0x0 , 0x300 , 0x316 , 0x41 , 0x302 , 0x00 } ;
static const UChar strB [ ] = { 0x41 , 0x0 , 0x302 , 0x316 , 0x41 , 0x300 , 0x00 } ;
2001-05-25 19:30:01 +00:00
result = ucol_strcoll ( coll , strA , 5 , strB , 5 ) ;
if ( result ! = UCOL_LESS ) {
log_err ( " Error 1 in test 6 \n " ) ;
}
result = ucol_strcoll ( coll , strA , - 1 , strB , - 1 ) ;
if ( result ! = UCOL_EQUAL ) {
log_err ( " Error 2 in test 6 \n " ) ;
}
}
2001-05-17 23:09:35 +00:00
ucol_close ( coll ) ;
}
2001-05-25 19:30:01 +00:00
2001-05-22 22:26:58 +00:00
#if 0
2002-03-28 18:26:25 +00:00
static void TestGetCaseBit ( void ) {
2001-05-22 22:26:58 +00:00
static const char * caseBitData [ ] = {
" a " , " A " , " ch " , " Ch " , " CH " ,
" \\ uFF9E " , " \\ u0009 "
} ;
static const uint8_t results [ ] = {
UCOL_LOWER_CASE , UCOL_UPPER_CASE , UCOL_LOWER_CASE , UCOL_MIXED_CASE , UCOL_UPPER_CASE ,
UCOL_UPPER_CASE , UCOL_LOWER_CASE
} ;
2001-05-17 23:09:35 +00:00
2001-05-22 22:26:58 +00:00
uint32_t i , blen = 0 ;
UChar b [ 256 ] = { 0 } ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * UCA = ucol_open ( " " , & status ) ;
uint8_t res = 0 ;
2001-09-28 16:34:05 +00:00
2001-05-22 22:26:58 +00:00
for ( i = 0 ; i < sizeof ( results ) / sizeof ( results [ 0 ] ) ; i + + ) {
blen = u_unescape ( caseBitData [ i ] , b , 256 ) ;
res = ucol_uprv_getCaseBits ( UCA , b , blen , & status ) ;
if ( results [ i ] ! = res ) {
log_err ( " Expected case = %02X, got %02X for %04X \n " , results [ i ] , res , b [ 0 ] ) ;
}
}
}
# endif
2002-03-28 18:26:25 +00:00
static void TestHangulTailoring ( void ) {
2001-05-31 17:27:44 +00:00
static const char * koreanData [ ] = {
2001-09-28 16:34:05 +00:00
" \\ uac00 " , " \\ u4f3d " , " \\ u4f73 " , " \\ u5047 " , " \\ u50f9 " , " \\ u52a0 " , " \\ u53ef " , " \\ u5475 " ,
" \\ u54e5 " , " \\ u5609 " , " \\ u5ac1 " , " \\ u5bb6 " , " \\ u6687 " , " \\ u67b6 " , " \\ u67b7 " , " \\ u67ef " ,
" \\ u6b4c " , " \\ u73c2 " , " \\ u75c2 " , " \\ u7a3c " , " \\ u82db " , " \\ u8304 " , " \\ u8857 " , " \\ u8888 " ,
" \\ u8a36 " , " \\ u8cc8 " , " \\ u8dcf " , " \\ u8efb " , " \\ u8fe6 " , " \\ u99d5 " ,
" \\ u4EEE " , " \\ u50A2 " , " \\ u5496 " , " \\ u54FF " , " \\ u5777 " , " \\ u5B8A " , " \\ u659D " , " \\ u698E " ,
2001-05-31 17:27:44 +00:00
" \\ u6A9F " , " \\ u73C8 " , " \\ u7B33 " , " \\ u801E " , " \\ u8238 " , " \\ u846D " , " \\ u8B0C "
} ;
2001-05-22 22:26:58 +00:00
2001-09-28 16:34:05 +00:00
const char * rules =
" & \\ uac00 <<< \\ u4f3d <<< \\ u4f73 <<< \\ u5047 <<< \\ u50f9 <<< \\ u52a0 <<< \\ u53ef <<< \\ u5475 "
" <<< \\ u54e5 <<< \\ u5609 <<< \\ u5ac1 <<< \\ u5bb6 <<< \\ u6687 <<< \\ u67b6 <<< \\ u67b7 <<< \\ u67ef "
" <<< \\ u6b4c <<< \\ u73c2 <<< \\ u75c2 <<< \\ u7a3c <<< \\ u82db <<< \\ u8304 <<< \\ u8857 <<< \\ u8888 "
" <<< \\ u8a36 <<< \\ u8cc8 <<< \\ u8dcf <<< \\ u8efb <<< \\ u8fe6 <<< \\ u99d5 "
2001-05-31 17:27:44 +00:00
" <<< \\ u4EEE <<< \\ u50A2 <<< \\ u5496 <<< \\ u54FF <<< \\ u5777 <<< \\ u5B8A <<< \\ u659D <<< \\ u698E "
" <<< \\ u6A9F <<< \\ u73C8 <<< \\ u7B33 <<< \\ u801E <<< \\ u8238 <<< \\ u846D <<< \\ u8B0C " ;
2001-05-22 22:26:58 +00:00
2001-09-27 23:19:12 +00:00
UErrorCode status = U_ZERO_ERROR ;
UChar rlz [ 2048 ] = { 0 } ;
uint32_t rlen = u_unescape ( rules , rlz , 2048 ) ;
UCollator * coll = ucol_openRules ( rlz , rlen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
2003-06-04 19:02:41 +00:00
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
2001-09-27 23:19:12 +00:00
log_verbose ( " Using start of korean rules \n " ) ;
if ( U_SUCCESS ( status ) ) {
genericOrderingTest ( coll , koreanData , sizeof ( koreanData ) / sizeof ( koreanData [ 0 ] ) ) ;
} else {
log_err ( " Unable to open collator with rules %s \n " , rules ) ;
}
log_verbose ( " Setting jamoSpecial to TRUE and testing once more \n " ) ;
2001-09-28 16:34:05 +00:00
( ( UCATableHeader * ) coll - > image ) - > jamoSpecial = TRUE ; /* don't try this at home */
2001-09-27 23:19:12 +00:00
genericOrderingTest ( coll , koreanData , sizeof ( koreanData ) / sizeof ( koreanData [ 0 ] ) ) ;
ucol_close ( coll ) ;
log_verbose ( " Using ko__LOTUS locale \n " ) ;
genericLocaleStarter ( " ko__LOTUS " , koreanData , sizeof ( koreanData ) / sizeof ( koreanData [ 0 ] ) ) ;
2001-05-22 22:26:58 +00:00
}
2001-05-17 23:09:35 +00:00
2002-03-28 18:26:25 +00:00
static void TestCompressOverlap ( void ) {
2001-05-25 22:00:24 +00:00
UChar secstr [ 150 ] ;
UChar tertstr [ 150 ] ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll ;
2001-05-30 16:09:09 +00:00
char result [ 200 ] ;
2001-05-25 22:00:24 +00:00
uint32_t resultlen ;
int count = 0 ;
2001-05-30 16:09:09 +00:00
char * tempptr ;
2001-05-25 22:00:24 +00:00
coll = ucol_open ( " " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Collator can't be created -> %s \n " , u_errorName ( status ) ) ;
2001-05-25 22:00:24 +00:00
return ;
}
while ( count < 149 ) {
secstr [ count ] = 0x0020 ; /* [06, 05, 05] */
tertstr [ count ] = 0x0020 ;
count + + ;
}
/* top down compression ----------------------------------- */
secstr [ count ] = 0x0332 ; /* [, 87, 05] */
tertstr [ count ] = 0x3000 ; /* [06, 05, 07] */
2001-09-28 16:34:05 +00:00
/* no compression secstr should have 150 secondary bytes, tertstr should
2001-05-25 22:00:24 +00:00
have 150 tertiary bytes .
2001-09-28 16:34:05 +00:00
with correct overlapping compression , secstr should have 4 secondary
2001-05-25 22:00:24 +00:00
bytes , tertstr should have > 2 tertiary bytes */
2001-05-30 16:09:09 +00:00
resultlen = ucol_getSortKey ( coll , secstr , 150 , ( uint8_t * ) result , 250 ) ;
2001-05-25 22:00:24 +00:00
tempptr = uprv_strchr ( result , 1 ) + 1 ;
while ( * ( tempptr + 1 ) ! = 1 ) {
2001-09-28 16:34:05 +00:00
/* the last secondary collation element is not checked since it is not
2001-05-25 22:00:24 +00:00
part of the compression */
if ( * tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2 ) {
log_err ( " Secondary compression overlapped \n " ) ;
}
tempptr + + ;
}
2001-09-28 16:34:05 +00:00
2001-05-25 22:00:24 +00:00
/* tertiary top/bottom/common for en_US is similar to the secondary
top / bottom / common */
2001-05-30 16:09:09 +00:00
resultlen = ucol_getSortKey ( coll , tertstr , 150 , ( uint8_t * ) result , 250 ) ;
2001-05-25 22:00:24 +00:00
tempptr = uprv_strrchr ( result , 1 ) + 1 ;
while ( * ( tempptr + 1 ) ! = 0 ) {
2001-09-28 16:34:05 +00:00
/* the last secondary collation element is not checked since it is not
2001-05-25 22:00:24 +00:00
part of the compression */
if ( * tempptr < coll - > tertiaryTop - coll - > tertiaryTopCount ) {
log_err ( " Tertiary compression overlapped \n " ) ;
}
tempptr + + ;
}
/* bottom up compression ------------------------------------- */
secstr [ count ] = 0 ;
tertstr [ count ] = 0 ;
2001-05-30 16:09:09 +00:00
resultlen = ucol_getSortKey ( coll , secstr , 150 , ( uint8_t * ) result , 250 ) ;
2001-05-25 22:00:24 +00:00
tempptr = uprv_strchr ( result , 1 ) + 1 ;
while ( * ( tempptr + 1 ) ! = 1 ) {
2001-09-28 16:34:05 +00:00
/* the last secondary collation element is not checked since it is not
2001-05-25 22:00:24 +00:00
part of the compression */
if ( * tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2 ) {
log_err ( " Secondary compression overlapped \n " ) ;
}
tempptr + + ;
}
2001-09-28 16:34:05 +00:00
2001-05-25 22:00:24 +00:00
/* tertiary top/bottom/common for en_US is similar to the secondary
top / bottom / common */
2001-05-30 16:09:09 +00:00
resultlen = ucol_getSortKey ( coll , tertstr , 150 , ( uint8_t * ) result , 250 ) ;
2001-05-25 22:00:24 +00:00
tempptr = uprv_strrchr ( result , 1 ) + 1 ;
while ( * ( tempptr + 1 ) ! = 0 ) {
2001-09-28 16:34:05 +00:00
/* the last secondary collation element is not checked since it is not
2001-05-25 22:00:24 +00:00
part of the compression */
if ( * tempptr > coll - > tertiaryBottom + coll - > tertiaryBottomCount ) {
log_err ( " Tertiary compression overlapped \n " ) ;
}
tempptr + + ;
}
2001-10-17 02:19:48 +00:00
ucol_close ( coll ) ;
2001-05-25 22:00:24 +00:00
}
2001-06-06 20:48:57 +00:00
static void TestCyrillicTailoring ( void ) {
static const char * test [ ] = {
2001-06-06 20:38:43 +00:00
" \\ u0410b " ,
" \\ u0410 \\ u0306a " ,
" \\ u04d0A "
2001-06-05 22:52:56 +00:00
} ;
2002-10-30 05:44:54 +00:00
2002-10-17 23:12:43 +00:00
/* Russian overrides contractions, so this test is not valid anymore */
2004-11-11 23:34:58 +00:00
/*genericLocaleStarter("ru", test, 3);*/
2002-10-17 23:12:43 +00:00
genericLocaleStarter ( " root " , test , 3 ) ;
2001-06-05 22:52:56 +00:00
genericRulesStarter ( " & \\ u0410 = \\ u0410 " , test , 3 ) ;
genericRulesStarter ( " &Z < \\ u0410 " , test , 3 ) ;
2001-06-06 20:38:43 +00:00
genericRulesStarter ( " & \\ u0410 = \\ u0410 < \\ u04d0 " , test , 3 ) ;
genericRulesStarter ( " &Z < \\ u0410 < \\ u04d0 " , test , 3 ) ;
genericRulesStarter ( " & \\ u0410 = \\ u0410 < \\ u0410 \\ u0301 " , test , 3 ) ;
genericRulesStarter ( " &Z < \\ u0410 < \\ u0410 \\ u0301 " , test , 3 ) ;
2001-06-05 22:52:56 +00:00
}
2002-10-30 05:44:54 +00:00
static void TestSuppressContractions ( void ) {
static const char * testNoCont2 [ ] = {
" \\ u0410 \\ u0302a " ,
" \\ u0410 \\ u0306b " ,
2004-11-11 23:34:58 +00:00
" \\ u0410c "
2002-10-30 05:44:54 +00:00
} ;
static const char * testNoCont [ ] = {
2004-11-11 23:34:58 +00:00
" a \\ u0410 " ,
2002-10-30 05:44:54 +00:00
" A \\ u0410 \\ u0306 " ,
" \\ uFF21 \\ u0410 \\ u0302 "
} ;
2004-11-11 23:34:58 +00:00
2002-10-30 05:44:54 +00:00
genericRulesStarter ( " [suppressContractions [ \\ u0400- \\ u047f]] " , testNoCont , 3 ) ;
genericRulesStarter ( " [suppressContractions [ \\ u0400- \\ u047f]] " , testNoCont2 , 3 ) ;
}
2002-03-28 18:26:25 +00:00
static void TestContraction ( void ) {
2001-06-05 18:09:06 +00:00
const static char * testrules [ ] = {
" &A = AB / B " ,
" &A = A \\ u0306/ \\ u0306 " ,
" &c = ch / h "
} ;
const static UChar testdata [ ] [ 2 ] = {
2001-06-11 18:38:05 +00:00
{ 0x0041 /* 'A' */ , 0x0042 /* 'B' */ } ,
{ 0x0041 /* 'A' */ , 0x0306 /* combining breve */ } ,
{ 0x0063 /* 'c' */ , 0x0068 /* 'h' */ }
2001-06-05 18:09:06 +00:00
} ;
const static UChar testdata2 [ ] [ 2 ] = {
2001-06-11 18:38:05 +00:00
{ 0x0063 /* 'c' */ , 0x0067 /* 'g' */ } ,
{ 0x0063 /* 'c' */ , 0x0068 /* 'h' */ } ,
{ 0x0063 /* 'c' */ , 0x006C /* 'l' */ }
2001-06-05 18:09:06 +00:00
} ;
const static char * testrules3 [ ] = {
2001-06-05 22:52:56 +00:00
" &z < xyz &xyzw << B " ,
" &z < xyz &xyz << B / w " ,
" &z < ch &achm << B " ,
" &z < ch &a << B / chm " ,
" & \\ ud800 \\ udc00w << B " ,
" & \\ ud800 \\ udc00 << B / w " ,
" &a \\ ud800 \\ udc00m << B " ,
" &a << B / \\ ud800 \\ udc00m " ,
2001-06-05 18:09:06 +00:00
} ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll ;
2001-06-05 22:52:56 +00:00
UChar rule [ 256 ] = { 0 } ;
2001-06-05 18:09:06 +00:00
uint32_t rlen = 0 ;
int i ;
for ( i = 0 ; i < sizeof ( testrules ) / sizeof ( testrules [ 0 ] ) ; i + + ) {
UCollationElements * iter1 ;
int j = 0 ;
log_verbose ( " Rule %s for testing \n " , testrules [ i ] ) ;
rlen = u_unescape ( testrules [ i ] , rule , 32 ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( rule , rlen , UCOL_ON , UCOL_TERTIARY , NULL , & status ) ;
2001-06-05 18:09:06 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Collator creation failed %s -> %s \n " , testrules [ i ] , u_errorName ( status ) ) ;
2001-06-05 18:09:06 +00:00
return ;
}
iter1 = ucol_openElements ( coll , testdata [ i ] , 2 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Collation iterator creation failed \n " ) ;
return ;
}
while ( j < 2 ) {
2001-09-28 16:34:05 +00:00
UCollationElements * iter2 = ucol_openElements ( coll ,
& ( testdata [ i ] [ j ] ) ,
2001-06-05 18:09:06 +00:00
1 , & status ) ;
uint32_t ce ;
if ( U_FAILURE ( status ) ) {
log_err ( " Collation iterator creation failed \n " ) ;
return ;
}
ce = ucol_next ( iter2 , & status ) ;
while ( ce ! = UCOL_NULLORDER ) {
if ( ( uint32_t ) ucol_next ( iter1 , & status ) ! = ce ) {
log_err ( " Collation elements in contraction split does not match \n " ) ;
return ;
}
ce = ucol_next ( iter2 , & status ) ;
}
j + + ;
ucol_closeElements ( iter2 ) ;
}
if ( ucol_next ( iter1 , & status ) ! = UCOL_NULLORDER ) {
log_err ( " Collation elements not exhausted \n " ) ;
return ;
}
ucol_closeElements ( iter1 ) ;
ucol_close ( coll ) ;
}
2001-06-05 22:52:56 +00:00
rlen = u_unescape ( " & a < b < c < ch < d & c = ch / h " , rule , 256 ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( rule , rlen , UCOL_ON , UCOL_TERTIARY , NULL , & status ) ;
2001-06-05 18:09:06 +00:00
if ( ucol_strcoll ( coll , testdata2 [ 0 ] , 2 , testdata2 [ 1 ] , 2 ) ! = UCOL_LESS ) {
log_err ( " Expected \\ u%04x \\ u%04x < \\ u%04x \\ u%04x \n " ,
2001-09-28 16:34:05 +00:00
testdata2 [ 0 ] [ 0 ] , testdata2 [ 0 ] [ 1 ] , testdata2 [ 1 ] [ 0 ] ,
2001-06-05 18:09:06 +00:00
testdata2 [ 1 ] [ 1 ] ) ;
return ;
}
if ( ucol_strcoll ( coll , testdata2 [ 1 ] , 2 , testdata2 [ 2 ] , 2 ) ! = UCOL_LESS ) {
log_err ( " Expected \\ u%04x \\ u%04x < \\ u%04x \\ u%04x \n " ,
2001-09-28 16:34:05 +00:00
testdata2 [ 1 ] [ 0 ] , testdata2 [ 1 ] [ 1 ] , testdata2 [ 2 ] [ 0 ] ,
2001-06-05 18:09:06 +00:00
testdata2 [ 2 ] [ 1 ] ) ;
return ;
}
ucol_close ( coll ) ;
for ( i = 0 ; i < sizeof ( testrules3 ) / sizeof ( testrules3 [ 0 ] ) ; i + = 2 ) {
UCollator * coll1 ,
* coll2 ;
UCollationElements * iter1 ,
* iter2 ;
2001-06-11 18:38:05 +00:00
UChar ch = 0x0042 /* 'B' */ ;
2001-06-05 18:09:06 +00:00
uint32_t ce ;
2001-06-05 22:52:56 +00:00
rlen = u_unescape ( testrules3 [ i ] , rule , 32 ) ;
2001-09-22 01:24:03 +00:00
coll1 = ucol_openRules ( rule , rlen , UCOL_ON , UCOL_TERTIARY , NULL , & status ) ;
2001-06-05 22:52:56 +00:00
rlen = u_unescape ( testrules3 [ i + 1 ] , rule , 32 ) ;
2001-09-22 01:24:03 +00:00
coll2 = ucol_openRules ( rule , rlen , UCOL_ON , UCOL_TERTIARY , NULL , & status ) ;
2001-06-05 18:09:06 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " Collator creation failed %s \n " , testrules [ i ] ) ;
return ;
}
iter1 = ucol_openElements ( coll1 , & ch , 1 , & status ) ;
iter2 = ucol_openElements ( coll2 , & ch , 1 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Collation iterator creation failed \n " ) ;
return ;
}
ce = ucol_next ( iter1 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Retrieving ces failed \n " ) ;
return ;
}
while ( ce ! = UCOL_NULLORDER ) {
if ( ce ! = ( uint32_t ) ucol_next ( iter2 , & status ) ) {
log_err ( " CEs does not match \n " ) ;
return ;
}
ce = ucol_next ( iter1 , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Retrieving ces failed \n " ) ;
return ;
}
}
if ( ucol_next ( iter2 , & status ) ! = UCOL_NULLORDER ) {
log_err ( " CEs not exhausted \n " ) ;
return ;
}
ucol_closeElements ( iter1 ) ;
ucol_closeElements ( iter2 ) ;
ucol_close ( coll1 ) ;
ucol_close ( coll2 ) ;
}
}
2002-03-28 18:26:25 +00:00
static void TestExpansion ( void ) {
2001-06-08 02:11:28 +00:00
const static char * testrules [ ] = {
" &J << K / B & K << M " ,
" &J << K / B << M "
} ;
const static UChar testdata [ ] [ 3 ] = {
2001-06-11 18:38:05 +00:00
{ 0x004A /*'J'*/ , 0x0041 /*'A'*/ , 0 } ,
{ 0x004D /*'M'*/ , 0x0041 /*'A'*/ , 0 } ,
{ 0x004B /*'K'*/ , 0x0041 /*'A'*/ , 0 } ,
{ 0x004B /*'K'*/ , 0x0043 /*'C'*/ , 0 } ,
{ 0x004A /*'J'*/ , 0x0043 /*'C'*/ , 0 } ,
{ 0x004D /*'M'*/ , 0x0043 /*'C'*/ , 0 }
2001-06-08 02:11:28 +00:00
} ;
2001-09-28 16:34:05 +00:00
2001-06-08 02:11:28 +00:00
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll ;
UChar rule [ 256 ] = { 0 } ;
uint32_t rlen = 0 ;
int i ;
for ( i = 0 ; i < sizeof ( testrules ) / sizeof ( testrules [ 0 ] ) ; i + + ) {
int j = 0 ;
log_verbose ( " Rule %s for testing \n " , testrules [ i ] ) ;
rlen = u_unescape ( testrules [ i ] , rule , 32 ) ;
2001-09-22 01:24:03 +00:00
coll = ucol_openRules ( rule , rlen , UCOL_ON , UCOL_TERTIARY , NULL , & status ) ;
2001-06-08 02:11:28 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Collator creation failed %s -> %s \n " , testrules [ i ] , u_errorName ( status ) ) ;
2001-06-08 02:11:28 +00:00
return ;
}
2001-09-28 16:34:05 +00:00
2001-06-08 02:11:28 +00:00
for ( j = 0 ; j < 5 ; j + + ) {
doTest ( coll , testdata [ j ] , testdata [ j + 1 ] , UCOL_LESS ) ;
}
ucol_close ( coll ) ;
}
}
2001-11-14 06:55:15 +00:00
#if 0
/* this test tests the current limitations of the engine */
/* it always fail, so it is disabled by default */
2002-03-28 18:26:25 +00:00
static void TestLimitations ( void ) {
2001-06-26 22:26:13 +00:00
/* recursive expansions */
{
static const char * rule = " &a=b/c&d=c/e " ;
static const char * tlimit01 [ ] = { " add " , " b " , " adf " } ;
static const char * tlimit02 [ ] = { " aa " , " b " , " af " } ;
log_verbose ( " recursive expansions \n " ) ;
genericRulesStarter ( rule , tlimit01 , sizeof ( tlimit01 ) / sizeof ( tlimit01 [ 0 ] ) ) ;
genericRulesStarter ( rule , tlimit02 , sizeof ( tlimit02 ) / sizeof ( tlimit02 [ 0 ] ) ) ;
}
/* contractions spanning expansions */
{
static const char * rule = " &a<<<c/e&g<<<eh " ;
static const char * tlimit01 [ ] = { " ad " , " c " , " af " , " f " , " ch " , " h " } ;
static const char * tlimit02 [ ] = { " ad " , " c " , " ch " , " af " , " f " , " h " } ;
log_verbose ( " contractions spanning expansions \n " ) ;
genericRulesStarter ( rule , tlimit01 , sizeof ( tlimit01 ) / sizeof ( tlimit01 [ 0 ] ) ) ;
genericRulesStarter ( rule , tlimit02 , sizeof ( tlimit02 ) / sizeof ( tlimit02 [ 0 ] ) ) ;
}
/* normalization: nulls in contractions */
{
static const char * rule = " &a<<< \\ u0000 \\ u0302 " ;
static const char * tlimit01 [ ] = { " a " , " \\ u0000 \\ u0302 \\ u0327 " } ;
static const char * tlimit02 [ ] = { " \\ u0000 \\ u0302 \\ u0327 " , " a " } ;
static const UColAttribute att [ ] = { UCOL_DECOMPOSITION_MODE } ;
2001-06-27 01:19:04 +00:00
static const UColAttributeValue valOn [ ] = { UCOL_ON } ;
static const UColAttributeValue valOff [ ] = { UCOL_OFF } ;
2001-06-26 22:26:13 +00:00
log_verbose ( " NULL in contractions \n " ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , 2 , att , valOn , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , 2 , att , valOn , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , 2 , att , valOff , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , 2 , att , valOff , 1 ) ;
}
/* normalization: contractions spanning normalization */
{
static const char * rule = " &a<<< \\ u0000 \\ u0302 " ;
static const char * tlimit01 [ ] = { " a " , " \\ u0000 \\ u0302 \\ u0327 " } ;
static const char * tlimit02 [ ] = { " \\ u0000 \\ u0302 \\ u0327 " , " a " } ;
static const UColAttribute att [ ] = { UCOL_DECOMPOSITION_MODE } ;
2001-06-27 01:19:04 +00:00
static const UColAttributeValue valOn [ ] = { UCOL_ON } ;
static const UColAttributeValue valOff [ ] = { UCOL_OFF } ;
2001-06-26 22:26:13 +00:00
log_verbose ( " contractions spanning normalization \n " ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , 2 , att , valOn , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , 2 , att , valOn , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , 2 , att , valOff , 1 ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , 2 , att , valOff , 1 ) ;
}
/* variable top: */
{
2001-11-10 06:54:28 +00:00
/*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2001-06-26 22:26:13 +00:00
static const char * rule = " & \\ u2010<x<[variable top]=z " ;
2001-11-10 06:54:28 +00:00
/*static const char *rule3 = "&' '<x<[variable top]=z";*/
2001-06-26 22:26:13 +00:00
static const char * tlimit01 [ ] = { " " , " z " , " zb " , " a " , " b " , " xb " , " b " , " c " } ;
static const char * tlimit02 [ ] = { " - " , " -x " , " x " , " xb " , " -z " , " z " , " zb " , " -a " , " a " , " -b " , " b " , " c " } ;
static const char * tlimit03 [ ] = { " " , " xb " , " z " , " zb " , " a " , " b " , " b " , " c " } ;
static const UColAttribute att [ ] = { UCOL_ALTERNATE_HANDLING , UCOL_STRENGTH } ;
2001-06-27 01:19:04 +00:00
static const UColAttributeValue valOn [ ] = { UCOL_SHIFTED , UCOL_QUATERNARY } ;
static const UColAttributeValue valOff [ ] = { UCOL_NON_IGNORABLE , UCOL_TERTIARY } ;
2001-06-26 22:26:13 +00:00
log_verbose ( " variable top \n " ) ;
genericRulesStarterWithOptions ( rule , tlimit03 , sizeof ( tlimit03 ) / sizeof ( tlimit03 [ 0 ] ) , att , valOn , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , sizeof ( tlimit01 ) / sizeof ( tlimit01 [ 0 ] ) , att , valOn , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , sizeof ( tlimit02 ) / sizeof ( tlimit02 [ 0 ] ) , att , valOn , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , sizeof ( tlimit01 ) / sizeof ( tlimit01 [ 0 ] ) , att , valOff , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , sizeof ( tlimit02 ) / sizeof ( tlimit02 [ 0 ] ) , att , valOff , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
}
/* case level */
{
static const char * rule = " &c<ch<<<cH<<<Ch<<<CH " ;
static const char * tlimit01 [ ] = { " c " , " CH " , " Ch " , " cH " , " ch " } ;
static const char * tlimit02 [ ] = { " c " , " CH " , " cH " , " Ch " , " ch " } ;
static const UColAttribute att [ ] = { UCOL_CASE_FIRST } ;
2001-06-27 01:19:04 +00:00
static const UColAttributeValue valOn [ ] = { UCOL_UPPER_FIRST } ;
2001-11-10 06:54:28 +00:00
/*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2001-06-26 22:26:13 +00:00
log_verbose ( " case level \n " ) ;
genericRulesStarterWithOptions ( rule , tlimit01 , sizeof ( tlimit01 ) / sizeof ( tlimit01 [ 0 ] ) , att , valOn , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
genericRulesStarterWithOptions ( rule , tlimit02 , sizeof ( tlimit02 ) / sizeof ( tlimit02 [ 0 ] ) , att , valOn , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
/*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
/*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
}
}
2001-11-14 06:55:15 +00:00
# endif
2001-06-26 22:26:13 +00:00
2002-03-28 18:26:25 +00:00
static void TestBocsuCoverage ( void ) {
2001-06-22 18:35:01 +00:00
UErrorCode status = U_ZERO_ERROR ;
2001-06-27 01:19:04 +00:00
const char * testString = " \\ u0041 \\ u0441 \\ u4441 \\ U00044441 \\ u4441 \\ u0441 \\ u0041 " ;
2001-06-22 18:35:01 +00:00
UChar test [ 256 ] = { 0 } ;
uint32_t tlen = u_unescape ( testString , test , 32 ) ;
uint8_t key [ 256 ] = { 0 } ;
uint32_t klen = 0 ;
UCollator * coll = ucol_open ( " " , & status ) ;
2003-06-04 06:53:23 +00:00
if ( U_SUCCESS ( status ) ) {
2003-07-22 16:45:58 +00:00
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_IDENTICAL , & status ) ;
2001-06-22 18:35:01 +00:00
2003-07-22 16:45:58 +00:00
klen = ucol_getSortKey ( coll , test , tlen , key , 256 ) ;
2001-06-22 18:35:01 +00:00
2003-07-22 16:45:58 +00:00
ucol_close ( coll ) ;
2003-06-04 06:53:23 +00:00
} else {
log_data_err ( " Couldn't open UCA \n " ) ;
}
2001-06-26 22:26:13 +00:00
}
2001-06-22 18:35:01 +00:00
2002-03-28 18:26:25 +00:00
static void TestVariableTopSetting ( void ) {
2001-06-26 22:26:13 +00:00
UErrorCode status = U_ZERO_ERROR ;
const UChar * current = NULL ;
2001-06-29 22:54:56 +00:00
uint32_t varTopOriginal = 0 , varTop1 , varTop2 ;
2001-06-26 22:26:13 +00:00
UCollator * coll = ucol_open ( " " , & status ) ;
2003-06-04 06:53:23 +00:00
if ( U_SUCCESS ( status ) ) {
2001-06-22 18:35:01 +00:00
2003-07-22 16:45:58 +00:00
uint32_t strength = 0 ;
uint16_t specs = 0 ;
uint32_t chOffset = 0 ;
uint32_t chLen = 0 ;
uint32_t exOffset = 0 ;
uint32_t exLen = 0 ;
uint32_t oldChOffset = 0 ;
uint32_t oldChLen = 0 ;
uint32_t oldExOffset = 0 ;
uint32_t oldExLen = 0 ;
uint32_t prefixOffset = 0 ;
uint32_t prefixLen = 0 ;
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
UBool startOfRules = TRUE ;
UColTokenParser src ;
UColOptionSet opts ;
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
UChar * rulesCopy = NULL ;
uint32_t rulesLen ;
2002-06-13 18:35:27 +00:00
2003-07-22 16:45:58 +00:00
UCollationResult result ;
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
UChar first [ 256 ] = { 0 } ;
UChar second [ 256 ] = { 0 } ;
UParseError parseError ;
2010-04-07 16:18:38 +00:00
int32_t myQ = getTestOption ( QUICK_OPTION ) ;
2003-02-20 08:19:04 +00:00
2010-06-14 22:07:11 +00:00
uprv_memset ( & src , 0 , sizeof ( UColTokenParser ) ) ;
2003-07-22 16:45:58 +00:00
src . opts = & opts ;
2003-06-04 06:53:23 +00:00
2010-04-07 16:18:38 +00:00
if ( getTestOption ( QUICK_OPTION ) < = 0 ) {
setTestOption ( QUICK_OPTION , 1 ) ;
2003-07-22 16:45:58 +00:00
}
/* this test will fail when normalization is turned on */
/* therefore we always turn off exhaustive mode for it */
2006-12-19 06:06:30 +00:00
{ /* QUICK > 0*/
2003-07-22 16:45:58 +00:00
log_verbose ( " Slide variable top over UCARules \n " ) ;
rulesLen = ucol_getRulesEx ( coll , UCOL_FULL_RULES , rulesCopy , 0 ) ;
2010-11-02 08:27:22 +00:00
rulesCopy = ( UChar * ) uprv_malloc ( ( rulesLen + UCOL_TOK_EXTRA_RULE_SPACE_SIZE ) * sizeof ( UChar ) ) ;
2003-07-22 16:45:58 +00:00
rulesLen = ucol_getRulesEx ( coll , UCOL_FULL_RULES , rulesCopy , rulesLen + UCOL_TOK_EXTRA_RULE_SPACE_SIZE ) ;
if ( U_SUCCESS ( status ) & & rulesLen > 0 ) {
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , UCOL_SHIFTED , & status ) ;
src . current = src . source = rulesCopy ;
src . end = rulesCopy + rulesLen ;
src . extraCurrent = src . end ;
2004-11-11 23:34:58 +00:00
src . extraEnd = src . end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE ;
2003-07-22 16:45:58 +00:00
2010-10-28 17:28:06 +00:00
/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
the rules copy in src . source to get reallocated , freeing the original pointer in rulesCopy */
2003-07-22 16:45:58 +00:00
while ( ( current = ucol_tok_parseNextToken ( & src , startOfRules , & parseError , & status ) ) ! = NULL ) {
strength = src . parsedToken . strength ;
chOffset = src . parsedToken . charsOffset ;
chLen = src . parsedToken . charsLen ;
exOffset = src . parsedToken . extensionOffset ;
exLen = src . parsedToken . extensionLen ;
prefixOffset = src . parsedToken . prefixOffset ;
prefixLen = src . parsedToken . prefixLen ;
specs = src . parsedToken . flags ;
startOfRules = FALSE ;
2006-12-19 06:06:30 +00:00
{
2010-10-28 17:28:06 +00:00
log_verbose ( " %04X %d " , * ( src . source + chOffset ) , chLen ) ;
2003-07-22 16:45:58 +00:00
}
if ( strength = = UCOL_PRIMARY ) {
status = U_ZERO_ERROR ;
varTopOriginal = ucol_getVariableTop ( coll , & status ) ;
2010-10-28 17:28:06 +00:00
varTop1 = ucol_setVariableTop ( coll , src . source + oldChOffset , oldChLen , & status ) ;
2003-07-22 16:45:58 +00:00
if ( U_FAILURE ( status ) ) {
char buffer [ 256 ] ;
char * buf = buffer ;
uint32_t i = 0 , j ;
uint32_t CE = UCOL_NO_MORE_CES ;
/* before we start screaming, let's see if there is a problem with the rules */
2010-01-06 23:50:03 +00:00
UErrorCode collIterateStatus = U_ZERO_ERROR ;
collIterate * s = uprv_new_collIterate ( & collIterateStatus ) ;
2010-10-28 17:28:06 +00:00
uprv_init_collIterate ( coll , src . source + oldChOffset , oldChLen , s , & collIterateStatus ) ;
2003-07-22 16:45:58 +00:00
2010-01-06 23:50:03 +00:00
CE = ucol_getNextCE ( coll , s , & status ) ;
2003-07-22 16:45:58 +00:00
for ( i = 0 ; i < oldChLen ; i + + ) {
2010-10-28 17:28:06 +00:00
j = sprintf ( buf , " %04X " , * ( src . source + oldChOffset + i ) ) ;
2003-07-22 16:45:58 +00:00
buf + = j ;
}
if ( status = = U_PRIMARY_TOO_LONG_ERROR ) {
log_verbose ( " = Expected failure for %s = " , buffer ) ;
} else {
2010-01-06 23:50:03 +00:00
if ( uprv_collIterateAtEnd ( s ) ) {
2004-11-11 23:34:58 +00:00
log_err ( " Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s \n " ,
2003-07-22 16:45:58 +00:00
oldChOffset , u_errorName ( status ) , buffer ) ;
2003-06-04 06:53:23 +00:00
} else {
2004-11-11 23:34:58 +00:00
log_verbose ( " There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s \n " ,
2003-07-22 16:45:58 +00:00
buffer ) ;
2003-06-04 06:53:23 +00:00
}
2003-01-20 07:42:58 +00:00
}
2010-01-06 23:50:03 +00:00
uprv_delete_collIterate ( s ) ;
2001-06-26 22:26:13 +00:00
}
2003-07-22 16:45:58 +00:00
varTop2 = ucol_getVariableTop ( coll , & status ) ;
if ( ( varTop1 & 0xFFFF0000 ) ! = ( varTop2 & 0xFFFF0000 ) ) {
log_err ( " cannot retrieve set varTop value! \n " ) ;
continue ;
}
if ( ( varTop1 & 0xFFFF0000 ) > 0 & & oldExLen = = 0 ) {
2010-10-28 17:28:06 +00:00
u_strncpy ( first , src . source + oldChOffset , oldChLen ) ;
u_strncpy ( first + oldChLen , src . source + chOffset , chLen ) ;
u_strncpy ( first + oldChLen + chLen , src . source + oldChOffset , oldChLen ) ;
2003-07-22 16:45:58 +00:00
first [ 2 * oldChLen + chLen ] = 0 ;
if ( oldExLen = = 0 ) {
2010-10-28 17:28:06 +00:00
u_strncpy ( second , src . source + chOffset , chLen ) ;
2003-07-22 16:45:58 +00:00
second [ chLen ] = 0 ;
} else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
2010-10-28 17:28:06 +00:00
u_strncpy ( second , src . source + oldExOffset , oldExLen ) ;
u_strncpy ( second + oldChLen , src . source + chOffset , chLen ) ;
u_strncpy ( second + oldChLen + chLen , src . source + oldExOffset , oldExLen ) ;
2003-07-22 16:45:58 +00:00
second [ 2 * oldExLen + chLen ] = 0 ;
}
result = ucol_strcoll ( coll , first , - 1 , second , - 1 ) ;
if ( result = = UCOL_EQUAL ) {
doTest ( coll , first , second , UCOL_EQUAL ) ;
} else {
2010-10-28 17:28:06 +00:00
log_verbose ( " Suspicious strcoll result for %04X and %04X \n " , * ( src . source + oldChOffset ) , * ( src . source + chOffset ) ) ;
2003-07-22 16:45:58 +00:00
}
2003-06-04 06:53:23 +00:00
}
2001-06-26 22:26:13 +00:00
}
2003-07-22 16:45:58 +00:00
if ( strength ! = UCOL_TOK_RESET ) {
oldChOffset = chOffset ;
oldChLen = chLen ;
oldExOffset = exOffset ;
oldExLen = exLen ;
}
2003-06-04 06:53:23 +00:00
}
status = U_ZERO_ERROR ;
2003-01-20 07:42:58 +00:00
}
2003-07-22 16:45:58 +00:00
else {
log_err ( " Unexpected failure getting rules %s \n " , u_errorName ( status ) ) ;
return ;
}
if ( U_FAILURE ( status ) ) {
log_err ( " Error parsing rules %s \n " , u_errorName ( status ) ) ;
return ;
}
status = U_ZERO_ERROR ;
}
2001-06-26 22:26:13 +00:00
2010-04-07 16:18:38 +00:00
setTestOption ( QUICK_OPTION , myQ ) ;
2003-02-20 08:19:04 +00:00
2003-07-22 16:45:58 +00:00
log_verbose ( " Testing setting variable top to contractions \n " ) ;
{
/* uint32_t tailoredCE = UCOL_NOT_FOUND; */
/*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
UChar * conts = ( UChar * ) ( ( uint8_t * ) coll - > image + coll - > image - > contractionUCACombos ) ;
while ( * conts ! = 0 ) {
2008-04-04 22:47:43 +00:00
if ( ( * ( conts + 2 ) = = 0 ) | | ( * ( conts + 1 ) = = 0 ) ) { /* contracts or pre-context contractions */
2003-07-22 16:45:58 +00:00
varTop1 = ucol_setVariableTop ( coll , conts , - 1 , & status ) ;
} else {
varTop1 = ucol_setVariableTop ( coll , conts , 3 , & status ) ;
2001-06-26 22:26:13 +00:00
}
2003-07-22 16:45:58 +00:00
if ( U_FAILURE ( status ) ) {
2010-10-19 21:48:04 +00:00
if ( status = = U_PRIMARY_TOO_LONG_ERROR ) {
/* ucol_setVariableTop() is documented to not accept 3-byte primaries,
* therefore it is not an error when it complains about them . */
log_verbose ( " Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR \n " ,
* conts , * ( conts + 1 ) , * ( conts + 2 ) ) ;
} else {
log_err ( " Couldn't set variable top to a contraction %04X %04X %04X - %s \n " ,
* conts , * ( conts + 1 ) , * ( conts + 2 ) , u_errorName ( status ) ) ;
}
2003-07-22 16:45:58 +00:00
status = U_ZERO_ERROR ;
}
conts + = 3 ;
}
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
status = U_ZERO_ERROR ;
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
first [ 0 ] = 0x0040 ;
first [ 1 ] = 0x0050 ;
first [ 2 ] = 0x0000 ;
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
ucol_setVariableTop ( coll , first , - 1 , & status ) ;
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
if ( U_SUCCESS ( status ) ) {
log_err ( " Invalid contraction succeded in setting variable top! \n " ) ;
2001-06-26 22:26:13 +00:00
}
2003-07-22 16:45:58 +00:00
}
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
log_verbose ( " Test restoring variable top \n " ) ;
2001-06-26 22:26:13 +00:00
2003-07-22 16:45:58 +00:00
status = U_ZERO_ERROR ;
ucol_restoreVariableTop ( coll , varTopOriginal , & status ) ;
if ( varTopOriginal ! = ucol_getVariableTop ( coll , & status ) ) {
log_err ( " Couldn't restore old variable top \n " ) ;
}
log_verbose ( " Testing calling with error set \n " ) ;
status = U_INTERNAL_PROGRAM_ERROR ;
varTop1 = ucol_setVariableTop ( coll , first , 1 , & status ) ;
varTop2 = ucol_getVariableTop ( coll , & status ) ;
ucol_restoreVariableTop ( coll , varTop2 , & status ) ;
varTop1 = ucol_setVariableTop ( NULL , first , 1 , & status ) ;
varTop2 = ucol_getVariableTop ( NULL , & status ) ;
ucol_restoreVariableTop ( NULL , varTop2 , & status ) ;
if ( status ! = U_INTERNAL_PROGRAM_ERROR ) {
log_err ( " Bad reaction to passed error! \n " ) ;
}
2010-11-02 08:27:22 +00:00
uprv_free ( src . source ) ;
2003-07-22 16:45:58 +00:00
ucol_close ( coll ) ;
2003-06-04 06:53:23 +00:00
} else {
log_data_err ( " Couldn't open UCA collator \n " ) ;
2001-06-26 22:26:13 +00:00
}
2003-06-04 06:53:23 +00:00
2001-06-22 18:35:01 +00:00
}
2002-03-28 18:26:25 +00:00
static void TestNonChars ( void ) {
2001-09-21 21:22:44 +00:00
static const char * test [ ] = {
2010-10-19 21:48:04 +00:00
" \\ u0000 " , /* ignorable */
" \\ uFFFE " , /* special merge-sort character with minimum non-ignorable weights */
" \\ uFDD0 " , " \\ uFDEF " ,
" \\ U0001FFFE " , " \\ U0001FFFF " , /* UCA 6.0: noncharacters are treated like unassigned, */
" \\ U0002FFFE " , " \\ U0002FFFF " , /* not like ignorable. */
2001-09-21 21:22:44 +00:00
" \\ U0003FFFE " , " \\ U0003FFFF " ,
" \\ U0004FFFE " , " \\ U0004FFFF " ,
" \\ U0005FFFE " , " \\ U0005FFFF " ,
" \\ U0006FFFE " , " \\ U0006FFFF " ,
" \\ U0007FFFE " , " \\ U0007FFFF " ,
" \\ U0008FFFE " , " \\ U0008FFFF " ,
" \\ U0009FFFE " , " \\ U0009FFFF " ,
" \\ U000AFFFE " , " \\ U000AFFFF " ,
" \\ U000BFFFE " , " \\ U000BFFFF " ,
" \\ U000CFFFE " , " \\ U000CFFFF " ,
" \\ U000DFFFE " , " \\ U000DFFFF " ,
" \\ U000EFFFE " , " \\ U000EFFFF " ,
" \\ U000FFFFE " , " \\ U000FFFFF " ,
2010-10-19 21:48:04 +00:00
" \\ U0010FFFE " , " \\ U0010FFFF " ,
" \\ uFFFF " /* special character with maximum primary weight */
2001-09-21 21:22:44 +00:00
} ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " en_US " , & status ) ;
log_verbose ( " Test non characters \n " ) ;
if ( U_SUCCESS ( status ) ) {
2010-10-19 21:48:04 +00:00
genericOrderingTestWithResult ( coll , test , 35 , UCOL_LESS ) ;
2001-09-21 21:22:44 +00:00
} else {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Unable to open collator \n " ) ;
2001-09-21 21:22:44 +00:00
}
2001-09-28 16:34:05 +00:00
2001-09-25 21:49:30 +00:00
ucol_close ( coll ) ;
2001-09-21 21:22:44 +00:00
}
2002-03-28 18:26:25 +00:00
static void TestExtremeCompression ( void ) {
2001-11-13 23:41:11 +00:00
static char * test [ 4 ] ;
2003-01-20 07:42:58 +00:00
int32_t j = 0 , i = 0 ;
2001-11-13 22:55:05 +00:00
for ( i = 0 ; i < 4 ; i + + ) {
2002-07-29 21:04:18 +00:00
test [ i ] = ( char * ) malloc ( 2048 * sizeof ( char ) ) ;
2001-11-13 22:55:05 +00:00
}
2003-01-20 07:42:58 +00:00
for ( j = 20 ; j < 500 ; j + + ) {
for ( i = 0 ; i < 4 ; i + + ) {
uprv_memset ( test [ i ] , ' a ' , ( j - 1 ) * sizeof ( char ) ) ;
test [ i ] [ j - 1 ] = ( char ) ( ' a ' + i ) ;
test [ i ] [ j ] = 0 ;
}
genericLocaleStarter ( " en_US " , ( const char * * ) test , 4 ) ;
}
for ( i = 0 ; i < 4 ; i + + ) {
free ( test [ i ] ) ;
}
}
#if 0
static void TestExtremeCompression ( void ) {
static char * test [ 4 ] ;
int32_t j = 0 , i = 0 ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " en_US " , status ) ;
for ( i = 0 ; i < 4 ; i + + ) {
test [ i ] = ( char * ) malloc ( 2048 * sizeof ( char ) ) ;
}
for ( j = 10 ; j < 2048 ; j + + ) {
for ( i = 0 ; i < 4 ; i + + ) {
uprv_memset ( test [ i ] , ' a ' , ( j - 2 ) * sizeof ( char ) ) ;
test [ i ] [ j - 1 ] = ( char ) ( ' a ' + i ) ;
test [ i ] [ j ] = 0 ;
}
}
2001-11-14 06:55:15 +00:00
genericLocaleStarter ( " en_US " , ( const char * * ) test , 4 ) ;
2001-11-13 23:41:11 +00:00
2003-01-20 07:42:58 +00:00
for ( j = 10 ; j < 2048 ; j + + ) {
for ( i = 0 ; i < 1 ; i + + ) {
uprv_memset ( test [ i ] , ' a ' , ( j - 1 ) * sizeof ( char ) ) ;
test [ i ] [ j ] = 0 ;
}
}
2001-11-13 23:41:11 +00:00
for ( i = 0 ; i < 4 ; i + + ) {
2002-07-29 21:04:18 +00:00
free ( test [ i ] ) ;
2001-11-13 23:41:11 +00:00
}
2001-09-21 21:22:44 +00:00
}
2003-01-20 07:42:58 +00:00
# endif
2001-09-21 21:22:44 +00:00
2002-03-28 18:26:25 +00:00
static void TestSurrogates ( void ) {
2001-08-10 22:02:31 +00:00
static const char * test [ ] = {
" z " , " \\ ud900 \\ udc25 " , " \\ ud805 \\ udc50 " ,
" \\ ud800 \\ udc00y " , " \\ ud800 \\ udc00r " ,
" \\ ud800 \\ udc00f " , " \\ ud800 \\ udc00 " ,
" \\ ud800 \\ udc00c " , " \\ ud800 \\ udc00b " ,
" \\ ud800 \\ udc00fa " , " \\ ud800 \\ udc00fb " ,
2001-09-28 16:34:05 +00:00
" \\ ud800 \\ udc00a " ,
2001-08-10 22:02:31 +00:00
" c " , " b "
} ;
2001-09-28 16:34:05 +00:00
static const char * rule =
2001-08-10 22:02:31 +00:00
" &z < \\ ud900 \\ udc25 < \\ ud805 \\ udc50 "
" < \\ ud800 \\ udc00y < \\ ud800 \\ udc00r "
" < \\ ud800 \\ udc00f << \\ ud800 \\ udc00 "
" < \\ ud800 \\ udc00fa << \\ ud800 \\ udc00fb "
" < \\ ud800 \\ udc00a < c < b " ;
genericRulesStarter ( rule , test , 14 ) ;
}
2001-10-06 02:08:12 +00:00
/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2002-03-28 18:26:25 +00:00
static void TestPrefix ( void ) {
2001-10-08 02:26:25 +00:00
uint32_t i ;
2006-10-01 07:12:18 +00:00
static const struct {
2001-10-08 02:26:25 +00:00
const char * rules ;
const char * data [ 50 ] ;
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
{ " &z <<< z|a " ,
2001-10-08 02:26:25 +00:00
{ " zz " , " za " } , 2 } ,
2003-02-20 01:13:36 +00:00
2004-11-11 23:34:58 +00:00
{ " &z <<< z| a " ,
2003-05-27 16:50:03 +00:00
{ " zz " , " za " } , 2 } ,
2001-10-08 02:26:25 +00:00
{ " [strength I] "
" &a= \\ ud900 \\ udc25 "
2004-11-11 23:34:58 +00:00
" &z<<< \\ ud900 \\ udc25|a " ,
2001-10-08 02:26:25 +00:00
{ " aa " , " az " , " \\ ud900 \\ udc25z " , " \\ ud900 \\ udc25a " , " zz " } , 4 } ,
2001-10-06 02:08:12 +00:00
} ;
2001-10-08 02:26:25 +00:00
for ( i = 0 ; i < ( sizeof ( tests ) / sizeof ( tests [ 0 ] ) ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len ) ;
}
2001-10-06 02:08:12 +00:00
}
/* This test uses data suplied by Masashiko Maedera to test the implementation */
/* JIS X 4061 collation order implementation */
2002-03-28 18:26:25 +00:00
static void TestNewJapanese ( void ) {
2001-10-06 02:08:12 +00:00
2006-10-01 07:12:18 +00:00
static const char * const test1 [ ] = {
2001-10-05 02:03:17 +00:00
" \\ u30b7 \\ u30e3 \\ u30fc \\ u30ec " ,
2001-10-02 01:37:00 +00:00
" \\ u30b7 \\ u30e3 \\ u30a4 " ,
" \\ u30b7 \\ u30e4 \\ u30a3 " ,
" \\ u30b7 \\ u30e3 \\ u30ec " ,
" \\ u3061 \\ u3087 \\ u3053 " ,
" \\ u3061 \\ u3088 \\ u3053 " ,
" \\ u30c1 \\ u30e7 \\ u30b3 \\ u30ec \\ u30fc \\ u30c8 " ,
" \\ u3066 \\ u30fc \\ u305f " ,
2003-01-20 07:42:58 +00:00
" \\ u30c6 \\ u30fc \\ u30bf " ,
2001-10-02 01:37:00 +00:00
" \\ u30c6 \\ u30a7 \\ u30bf " ,
" \\ u3066 \\ u3048 \\ u305f " ,
2004-11-11 23:34:58 +00:00
" \\ u3067 \\ u30fc \\ u305f " ,
2001-10-02 01:37:00 +00:00
" \\ u30c7 \\ u30fc \\ u30bf " ,
" \\ u30c7 \\ u30a7 \\ u30bf " ,
" \\ u3067 \\ u3048 \\ u305f " ,
" \\ u3066 \\ u30fc \\ u305f \\ u30fc " ,
" \\ u30c6 \\ u30fc \\ u30bf \\ u30a1 " ,
" \\ u30c6 \\ u30a7 \\ u30bf \\ u30fc " ,
" \\ u3066 \\ u3047 \\ u305f \\ u3041 " ,
" \\ u3066 \\ u3048 \\ u305f \\ u30fc " ,
" \\ u3067 \\ u30fc \\ u305f \\ u30fc " ,
" \\ u30c7 \\ u30fc \\ u30bf \\ u30a1 " ,
" \\ u3067 \\ u30a7 \\ u305f \\ u30a1 " ,
" \\ u30c7 \\ u3047 \\ u30bf \\ u3041 " ,
" \\ u30c7 \\ u30a8 \\ u30bf \\ u30a2 " ,
" \\ u3072 \\ u3086 " ,
" \\ u3073 \\ u3085 \\ u3042 " ,
" \\ u3074 \\ u3085 \\ u3042 " ,
" \\ u3073 \\ u3085 \\ u3042 \\ u30fc " ,
" \\ u30d3 \\ u30e5 \\ u30a2 \\ u30fc " ,
" \\ u3074 \\ u3085 \\ u3042 \\ u30fc " ,
" \\ u30d4 \\ u30e5 \\ u30a2 \\ u30fc " ,
" \\ u30d2 \\ u30e5 \\ u30a6 " ,
" \\ u30d2 \\ u30e6 \\ u30a6 " ,
" \\ u30d4 \\ u30e5 \\ u30a6 \\ u30a2 " ,
2004-11-11 23:34:58 +00:00
" \\ u3073 \\ u3085 \\ u30fc \\ u3042 \\ u30fc " ,
2001-10-02 01:37:00 +00:00
" \\ u30d3 \\ u30e5 \\ u30fc \\ u30a2 \\ u30fc " ,
" \\ u30d3 \\ u30e5 \\ u30a6 \\ u30a2 \\ u30fc " ,
" \\ u3072 \\ u3085 \\ u3093 " ,
" \\ u3074 \\ u3085 \\ u3093 " ,
" \\ u3075 \\ u30fc \\ u308a " ,
" \\ u30d5 \\ u30fc \\ u30ea " ,
" \\ u3075 \\ u3045 \\ u308a " ,
" \\ u3075 \\ u30a5 \\ u308a " ,
" \\ u3075 \\ u30a5 \\ u30ea " ,
" \\ u30d5 \\ u30a6 \\ u30ea " ,
" \\ u3076 \\ u30fc \\ u308a " ,
" \\ u30d6 \\ u30fc \\ u30ea " ,
" \\ u3076 \\ u3045 \\ u308a " ,
" \\ u30d6 \\ u30a5 \\ u308a " ,
" \\ u3077 \\ u3046 \\ u308a " ,
" \\ u30d7 \\ u30a6 \\ u30ea " ,
" \\ u3075 \\ u30fc \\ u308a \\ u30fc " ,
" \\ u30d5 \\ u30a5 \\ u30ea \\ u30fc " ,
" \\ u3075 \\ u30a5 \\ u308a \\ u30a3 " ,
" \\ u30d5 \\ u3045 \\ u308a \\ u3043 " ,
" \\ u30d5 \\ u30a6 \\ u30ea \\ u30fc " ,
" \\ u3075 \\ u3046 \\ u308a \\ u3043 " ,
" \\ u30d6 \\ u30a6 \\ u30ea \\ u30a4 " ,
" \\ u3077 \\ u30fc \\ u308a \\ u30fc " ,
" \\ u3077 \\ u30a5 \\ u308a \\ u30a4 " ,
" \\ u3077 \\ u3046 \\ u308a \\ u30fc " ,
" \\ u30d7 \\ u30a6 \\ u30ea \\ u30a4 " ,
" \\ u30d5 \\ u30fd " ,
" \\ u3075 \\ u309e " ,
" \\ u3076 \\ u309d " ,
" \\ u3076 \\ u3075 " ,
" \\ u3076 \\ u30d5 " ,
" \\ u30d6 \\ u3075 " ,
" \\ u30d6 \\ u30d5 " ,
" \\ u3076 \\ u309e " ,
" \\ u3076 \\ u3077 " ,
2001-10-05 18:01:27 +00:00
" \\ u30d6 \\ u3077 " ,
2001-10-02 01:37:00 +00:00
" \\ u3077 \\ u309d " ,
" \\ u30d7 \\ u30fd " ,
2001-10-05 18:01:27 +00:00
" \\ u3077 \\ u3075 " ,
2001-10-11 21:19:10 +00:00
} ;
2001-10-08 19:32:09 +00:00
static const char * test2 [ ] = {
" \\ u306f \\ u309d " , /* H\\u309d */
2005-09-17 06:26:58 +00:00
" \\ u30cf \\ u30fd " , /* K\\u30fd */
2001-10-08 19:32:09 +00:00
" \\ u306f \\ u306f " , /* HH */
" \\ u306f \\ u30cf " , /* HK */
" \\ u30cf \\ u30cf " , /* KK */
" \\ u306f \\ u309e " , /* H\\u309e */
" \\ u30cf \\ u30fe " , /* K\\u30fe */
" \\ u306f \\ u3070 " , /* HH\\u309b */
" \\ u30cf \\ u30d0 " , /* KK\\u309b */
" \\ u306f \\ u3071 " , /* HH\\u309c */
" \\ u30cf \\ u3071 " , /* KH\\u309c */
" \\ u30cf \\ u30d1 " , /* KK\\u309c */
" \\ u3070 \\ u309d " , /* H\\u309b\\u309d */
" \\ u30d0 \\ u30fd " , /* K\\u309b\\u30fd */
" \\ u3070 \\ u306f " , /* H\\u309bH */
" \\ u30d0 \\ u30cf " , /* K\\u309bK */
" \\ u3070 \\ u309e " , /* H\\u309b\\u309e */
" \\ u30d0 \\ u30fe " , /* K\\u309b\\u30fe */
" \\ u3070 \\ u3070 " , /* H\\u309bH\\u309b */
" \\ u30d0 \\ u3070 " , /* K\\u309bH\\u309b */
" \\ u30d0 \\ u30d0 " , /* K\\u309bK\\u309b */
" \\ u3070 \\ u3071 " , /* H\\u309bH\\u309c */
" \\ u30d0 \\ u30d1 " , /* K\\u309bK\\u309c */
" \\ u3071 \\ u309d " , /* H\\u309c\\u309d */
" \\ u30d1 \\ u30fd " , /* K\\u309c\\u30fd */
" \\ u3071 \\ u306f " , /* H\\u309cH */
" \\ u30d1 \\ u30cf " , /* K\\u309cK */
" \\ u3071 \\ u3070 " , /* H\\u309cH\\u309b */
" \\ u3071 \\ u30d0 " , /* H\\u309cK\\u309b */
" \\ u30d1 \\ u30d0 " , /* K\\u309cK\\u309b */
" \\ u3071 \\ u3071 " , /* H\\u309cH\\u309c */
" \\ u30d1 \\ u30d1 " , /* K\\u309cK\\u309c */
} ;
2001-11-10 06:54:28 +00:00
/*
2001-10-09 15:24:32 +00:00
static const char * test3 [ ] = {
2001-10-11 21:19:10 +00:00
" \\ u221er \\ u221e " ,
" \\ u221eR# " ,
" \\ u221et \\ u221e " ,
" #r \\ u221e " ,
" #R# " ,
" #t% " ,
" #T% " ,
" 8t \\ u221e " ,
" 8T \\ u221e " ,
" 8t# " ,
" 8T# " ,
" 8t% " ,
" 8T% " ,
" 8t8 " ,
" 8T8 " ,
" \\ u03c9r \\ u221e " ,
" \\ u03a9R% " ,
" rr \\ u221e " ,
" rR \\ u221e " ,
" Rr \\ u221e " ,
" RR \\ u221e " ,
" RT% " ,
" rt8 " ,
" tr \\ u221e " ,
" tr8 " ,
" TR8 " ,
" tt8 " ,
" \\ u30b7 \\ u30e3 \\ u30fc \\ u30ec " ,
2001-10-09 15:24:32 +00:00
} ;
2001-11-10 06:54:28 +00:00
*/
2002-09-03 19:43:11 +00:00
static const UColAttribute att [ ] = { UCOL_STRENGTH } ;
static const UColAttributeValue val [ ] = { UCOL_QUATERNARY } ;
2001-11-13 22:55:05 +00:00
2002-09-03 19:43:11 +00:00
static const UColAttribute attShifted [ ] = { UCOL_STRENGTH , UCOL_ALTERNATE_HANDLING } ;
static const UColAttributeValue valShifted [ ] = { UCOL_QUATERNARY , UCOL_SHIFTED } ;
genericLocaleStarterWithOptions ( " ja " , test1 , sizeof ( test1 ) / sizeof ( test1 [ 0 ] ) , att , val , 1 ) ;
genericLocaleStarterWithOptions ( " ja " , test2 , sizeof ( test2 ) / sizeof ( test2 [ 0 ] ) , att , val , 1 ) ;
2001-10-31 23:59:35 +00:00
/*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
2002-09-03 19:43:11 +00:00
genericLocaleStarterWithOptions ( " ja " , test1 , sizeof ( test1 ) / sizeof ( test1 [ 0 ] ) , attShifted , valShifted , 2 ) ;
genericLocaleStarterWithOptions ( " ja " , test2 , sizeof ( test2 ) / sizeof ( test2 [ 0 ] ) , attShifted , valShifted , 2 ) ;
2001-10-11 21:19:10 +00:00
}
2002-03-28 18:26:25 +00:00
static void TestStrCollIdenticalPrefix ( void ) {
2001-10-11 21:19:10 +00:00
const char * rule = " & \\ ud9b0 \\ udc70= \\ ud9b0 \\ udc71 " ;
const char * test [ ] = {
" ab \\ ud9b0 \\ udc70 " ,
" ab \\ ud9b0 \\ udc71 "
} ;
2006-01-28 08:25:52 +00:00
genericRulesStarterWithResult ( rule , test , sizeof ( test ) / sizeof ( test [ 0 ] ) , UCOL_EQUAL ) ;
2001-10-06 02:08:12 +00:00
}
2002-03-07 19:06:00 +00:00
/* Contractions should have all their canonically equivalent */
/* strings included */
2002-03-28 18:26:25 +00:00
static void TestContractionClosure ( void ) {
2006-09-28 08:41:37 +00:00
static const struct {
2001-10-18 22:59:20 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * data [ 10 ] ;
2001-10-18 22:59:20 +00:00
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
2002-03-13 06:04:01 +00:00
{ " &b= \\ u00e4 \\ u00e4 " ,
2001-10-18 22:59:20 +00:00
{ " b " , " \\ u00e4 \\ u00e4 " , " a \\ u0308a \\ u0308 " , " \\ u00e4a \\ u0308 " , " a \\ u0308 \\ u00e4 " } , 5 } ,
2002-03-13 06:04:01 +00:00
{ " &b= \\ u00C5 " ,
2001-10-18 22:59:20 +00:00
{ " b " , " \\ u00C5 " , " A \\ u030A " , " \\ u212B " } , 4 } ,
2002-03-07 19:06:00 +00:00
} ;
uint32_t i ;
for ( i = 0 ; i < ( sizeof ( tests ) / sizeof ( tests [ 0 ] ) ) ; i + + ) {
2006-01-28 08:25:52 +00:00
genericRulesStarterWithResult ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len , UCOL_EQUAL ) ;
2002-03-07 19:06:00 +00:00
}
}
/* This tests also fails*/
2002-03-28 18:26:25 +00:00
static void TestBeforePrefixFailure ( void ) {
2006-09-28 08:41:37 +00:00
static const struct {
2002-03-07 19:06:00 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * data [ 10 ] ;
2002-03-07 19:06:00 +00:00
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
2002-03-07 19:06:00 +00:00
{ " &g <<< a "
" &[before 3] \\ uff41 <<< x " ,
{ " x " , " \\ uff41 " } , 2 } ,
2001-10-18 22:59:20 +00:00
{ " & \\ u30A7= \\ u30A7= \\ u3047= \\ uff6a "
" & \\ u30A8= \\ u30A8= \\ u3048= \\ uff74 "
2004-11-11 23:34:58 +00:00
" &[before 3] \\ u30a7<<< \\ u30a9 " ,
2001-10-18 22:59:20 +00:00
{ " \\ u30a9 " , " \\ u30a7 " } , 2 } ,
{ " &[before 3] \\ u30a7<<< \\ u30a9 "
" & \\ u30A7= \\ u30A7= \\ u3047= \\ uff6a "
" & \\ u30A8= \\ u30A8= \\ u3048= \\ uff74 " ,
{ " \\ u30a9 " , " \\ u30a7 " } , 2 } ,
} ;
uint32_t i ;
for ( i = 0 ; i < ( sizeof ( tests ) / sizeof ( tests [ 0 ] ) ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len ) ;
}
#if 0
2004-11-11 23:34:58 +00:00
const char * rule1 =
2001-10-11 21:19:10 +00:00
" & \\ u30A7= \\ u30A7= \\ u3047= \\ uff6a "
" & \\ u30A8= \\ u30A8= \\ u3048= \\ uff74 "
" &[before 3] \\ u30a7<<< \\ u30c6| \\ u30fc " ;
2004-11-11 23:34:58 +00:00
const char * rule2 =
2001-10-11 21:19:10 +00:00
" &[before 3] \\ u30a7<<< \\ u30c6| \\ u30fc "
" & \\ u30A7= \\ u30A7= \\ u3047= \\ uff6a "
" & \\ u30A8= \\ u30A8= \\ u3048= \\ uff74 " ;
const char * test [ ] = {
2004-11-11 23:34:58 +00:00
" \\ u30c6 \\ u30fc \\ u30bf " ,
2001-10-11 21:19:10 +00:00
" \\ u30c6 \\ u30a7 \\ u30bf " ,
} ;
genericRulesStarter ( rule1 , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
genericRulesStarter ( rule2 , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
2001-10-18 22:59:20 +00:00
/* this piece of code should be in some sort of verbose mode */
2001-10-06 02:08:12 +00:00
/* it gets the collation elements for elements and prints them */
/* This is useful when trying to see whether the problem is */
2004-11-11 23:34:58 +00:00
{
2001-10-11 21:19:10 +00:00
UErrorCode status = U_ZERO_ERROR ;
uint32_t i = 0 ;
UCollationElements * it = NULL ;
uint32_t CE ;
UChar string [ 256 ] ;
uint32_t uStringLen ;
UCollator * coll = NULL ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
uStringLen = u_unescape ( rule1 , string , 256 ) ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
coll = ucol_openRules ( string , uStringLen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
/*coll = ucol_open("ja_JP_JIS", &status);*/
it = ucol_openElements ( coll , string , 0 , & status ) ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
for ( i = 0 ; i < sizeof ( test ) / sizeof ( test [ 0 ] ) ; i + + ) {
log_verbose ( " %s \n " , test [ i ] ) ;
uStringLen = u_unescape ( test [ i ] , string , 256 ) ;
ucol_setText ( it , string , uStringLen , & status ) ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
while ( ( CE = ucol_next ( it , & status ) ) ! = UCOL_NULLORDER ) {
log_verbose ( " %08X \n " , CE ) ;
}
log_verbose ( " \n " ) ;
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
}
2001-10-02 01:37:00 +00:00
2001-10-11 21:19:10 +00:00
ucol_closeElements ( it ) ;
ucol_close ( coll ) ;
}
2001-10-18 22:59:20 +00:00
# endif
2001-10-08 19:32:09 +00:00
}
2002-03-07 19:06:00 +00:00
2002-03-28 18:26:25 +00:00
static void TestPrefixCompose ( void ) {
2004-11-11 23:34:58 +00:00
const char * rule1 =
2001-10-13 16:22:08 +00:00
" & \\ u30a7<<< \\ u30ab| \\ u30fc= \\ u30ac| \\ u30fc " ;
2001-11-10 06:54:28 +00:00
/*
2001-10-13 16:22:08 +00:00
const char * test [ ] = {
2004-11-11 23:34:58 +00:00
" \\ u30c6 \\ u30fc \\ u30bf " ,
2001-10-13 16:22:08 +00:00
" \\ u30c6 \\ u30a7 \\ u30bf " ,
} ;
2001-11-10 06:54:28 +00:00
*/
2004-11-11 23:34:58 +00:00
{
2001-10-13 16:22:08 +00:00
UErrorCode status = U_ZERO_ERROR ;
2001-11-10 06:54:28 +00:00
/*uint32_t i = 0;*/
/*UCollationElements *it = NULL;*/
2001-10-17 02:19:48 +00:00
/* uint32_t CE;*/
2001-10-13 16:22:08 +00:00
UChar string [ 256 ] ;
uint32_t uStringLen ;
UCollator * coll = NULL ;
uStringLen = u_unescape ( rule1 , string , 256 ) ;
coll = ucol_openRules ( string , uStringLen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
ucol_close ( coll ) ;
}
}
2001-10-08 19:32:09 +00:00
2002-03-15 23:51:22 +00:00
/*
2004-11-11 23:34:58 +00:00
[ last variable ] last variable value
[ last primary ignorable ] largest CE for primary ignorable
[ last secondary ignorable ] largest CE for secondary ignorable
[ last tertiary ignorable ] largest CE for tertiary ignorable
[ top ] guaranteed to be above all implicit CEs , for now and in the future ( in 1.8 )
2002-03-15 23:51:22 +00:00
*/
2002-03-28 18:26:25 +00:00
static void TestRuleOptions ( void ) {
2004-11-11 23:34:58 +00:00
/* values here are hardcoded and are correct for the current UCA
* when the UCA changes , one might be forced to change these
2010-10-25 23:06:37 +00:00
* values .
2002-07-11 22:44:26 +00:00
*/
2010-10-25 23:06:37 +00:00
/*
* These strings contain the last character before [ variable top ]
* and the first and second characters ( by primary weights ) after it .
* See FractionalUCA . txt . For example :
[ last variable [ 0 C FE , 05 , 05 ] ] # U + 10 A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
[ variable top = 0 C FE ]
[ first regular [ 0 D 0 A , 05 , 05 ] ] # U + 0060 GRAVE ACCENT
and
00 B4 ; [ 0 D 0 C , 05 , 05 ]
*
* Note : Starting with UCA 6.0 , the [ variable top ] collation element
* is not the weight of any character or string ,
* which means that LAST_VARIABLE_CHAR_STRING sorts before [ last variable ] .
*/
# define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
# define FIRST_REGULAR_CHAR_STRING "\\u0060"
# define SECOND_REGULAR_CHAR_STRING "\\u00B4"
/*
* This string has to match the character that has the [ last regular ] weight
* which changes with each UCA version .
* See the bottom of FractionalUCA . txt which says something like
[ last regular [ 7 A FE , 05 , 05 ] ] # U + 1342 E EGYPTIAN HIEROGLYPH AA032
*
* Note : Starting with UCA 6.0 , the [ last regular ] collation element
* is not the weight of any character or string ,
* which means that LAST_REGULAR_CHAR_STRING sorts before [ last regular ] .
*/
# define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2006-09-28 08:41:37 +00:00
static const struct {
2002-03-15 23:51:22 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * data [ 10 ] ;
2002-03-15 23:51:22 +00:00
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
2002-07-11 22:44:26 +00:00
/* - all befores here amount to zero */
2004-11-11 23:34:58 +00:00
{ " &[before 3][first tertiary ignorable]<<<a " ,
2003-07-24 23:23:19 +00:00
{ " \\ u0000 " , " a " } , 2
} , /* you cannot go before first tertiary ignorable */
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
{ " &[before 3][last tertiary ignorable]<<<a " ,
2003-07-24 23:23:19 +00:00
{ " \\ u0000 " , " a " } , 2
} , /* you cannot go before last tertiary ignorable */
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
{ " &[before 3][first secondary ignorable]<<<a " ,
2003-07-24 23:23:19 +00:00
{ " \\ u0000 " , " a " } , 2
} , /* you cannot go before first secondary ignorable */
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
{ " &[before 3][last secondary ignorable]<<<a " ,
2003-07-24 23:23:19 +00:00
{ " \\ u0000 " , " a " } , 2
} , /* you cannot go before first secondary ignorable */
2002-07-11 22:44:26 +00:00
/* 'normal' befores */
2004-11-11 23:34:58 +00:00
{ " &[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a " ,
2003-07-24 23:23:19 +00:00
{ " c " , " b " , " \\ u0332 " , " a " } , 4
} ,
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
/* we don't have a code point that corresponds to
2002-07-11 22:44:26 +00:00
* the last primary ignorable
*/
2004-11-11 23:34:58 +00:00
{ " &[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a " ,
2003-07-24 23:23:19 +00:00
{ " \\ u0332 " , " \\ u20e3 " , " c " , " b " , " a " } , 5
2004-11-11 23:34:58 +00:00
} ,
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
{ " &[before 3][first variable]<<<c<<<b &[first variable]<a " ,
2003-07-24 23:23:19 +00:00
{ " c " , " b " , " \\ u0009 " , " a " , " \\ u000a " } , 5
2004-11-11 23:34:58 +00:00
} ,
2002-07-11 22:44:26 +00:00
2004-11-11 23:34:58 +00:00
{ " &[last variable]<a &[before 3][last variable]<<<c<<<b " ,
2010-10-19 21:48:04 +00:00
{ LAST_VARIABLE_CHAR_STRING , " c " , " b " , /* [last variable] */ " a " , FIRST_REGULAR_CHAR_STRING } , 5
2004-11-11 23:34:58 +00:00
} ,
2002-07-11 22:44:26 +00:00
{ " &[first regular]<a "
" &[before 1][first regular]<b " ,
2010-10-19 21:48:04 +00:00
{ " b " , FIRST_REGULAR_CHAR_STRING , " a " , SECOND_REGULAR_CHAR_STRING } , 4
2003-07-24 23:23:19 +00:00
} ,
2002-07-11 22:44:26 +00:00
{ " &[before 1][last regular]<b "
" &[last regular]<a " ,
2010-10-19 21:48:04 +00:00
{ LAST_REGULAR_CHAR_STRING , " b " , /* [last regular] */ " a " , " \\ u4e00 " } , 4
2004-11-11 23:34:58 +00:00
} ,
2002-07-11 22:44:26 +00:00
{ " &[before 1][first implicit]<b "
" &[first implicit]<a " ,
2003-07-24 23:23:19 +00:00
{ " b " , " \\ u4e00 " , " a " , " \\ u4e01 " } , 4
} ,
2002-07-11 22:44:26 +00:00
{ " &[before 1][last implicit]<b "
" &[last implicit]<a " ,
2004-01-27 22:17:51 +00:00
{ " b " , " \\ U0010FFFD " , " a " } , 3
2004-11-11 23:34:58 +00:00
} ,
2002-07-11 22:44:26 +00:00
2002-03-15 23:51:22 +00:00
{ " &[last variable]<z "
2002-03-16 05:02:35 +00:00
" &[last primary ignorable]<x "
" &[last secondary ignorable]<<y "
" &[last tertiary ignorable]<<<w "
" &[top]<u " ,
2010-10-19 21:48:04 +00:00
{ " \\ ufffb " , " w " , " y " , " \\ u20e3 " , " x " , LAST_VARIABLE_CHAR_STRING , " z " , " u " } , 7
2003-07-24 23:23:19 +00:00
}
2002-07-11 22:44:26 +00:00
2002-03-15 23:51:22 +00:00
} ;
uint32_t i ;
for ( i = 0 ; i < ( sizeof ( tests ) / sizeof ( tests [ 0 ] ) ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len ) ;
}
}
2002-10-17 23:12:43 +00:00
2002-10-30 06:09:25 +00:00
static void TestOptimize ( void ) {
2004-11-11 23:34:58 +00:00
/* this is not really a test - just trying out
* whether copying of UCA contents will fail
* Cannot really test , since the functionality
2002-10-30 06:09:25 +00:00
* remains the same .
*/
2006-09-28 08:41:37 +00:00
static const struct {
2002-10-17 23:12:43 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * data [ 10 ] ;
2002-10-17 23:12:43 +00:00
const uint32_t len ;
2004-11-11 23:34:58 +00:00
} tests [ ] = {
2002-10-17 23:12:43 +00:00
/* - all befores here amount to zero */
2004-11-11 23:34:58 +00:00
{ " [optimize [ \\ uAC00- \\ uD7FF]] " ,
{ " a " , " b " } , 2 }
2002-10-17 23:12:43 +00:00
} ;
uint32_t i ;
for ( i = 0 ; i < ( sizeof ( tests ) / sizeof ( tests [ 0 ] ) ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . data , tests [ i ] . len ) ;
}
}
2003-02-20 01:13:36 +00:00
/*
2004-11-11 23:34:58 +00:00
cycheng @ ca . ibm . c . . . we got inconsistent results when using the UTF - 16 BE iterator and the UTF - 8 iterator .
weiv ucol_strcollIter ?
cycheng @ ca . ibm . c . . . e . g . s1 = 0xfffc0062 , and s2 = d8000021
weiv these are the input strings ?
cycheng @ ca . ibm . c . . . yes , using the utf - 16 iterator and UCA with normalization on , we have s1 > s2
weiv will check - could be a problem with utf - 8 iterator
cycheng @ ca . ibm . c . . . but if we use the utf - 8 iterator , i . e . s1 = efbfbc62 and s2 = eda08021 , we have s1 < s2
weiv hmmm
cycheng @ ca . ibm . c . . . note that we have a standalone high surrogate
weiv that doesn ' t sound right
cycheng @ ca . ibm . c . . . we got the same inconsistent results on AIX and Win2000
weiv so you have two strings , you convert them to utf - 8 and to utf - 16 BE
cycheng @ ca . ibm . c . . . yes
weiv and then do the comparison
cycheng @ ca . ibm . c . . . in one case , the input strings are in utf8 , and in the other case the input strings are in utf - 16 be
weiv utf - 16 strings look like a little endian ones in the example you sent me
weiv It could be a bug - let me try to test it out
cycheng @ ca . ibm . c . . . ok
cycheng @ ca . ibm . c . . . we can wait till the conf . call
cycheng @ ca . ibm . c . . . next weke
weiv that would be great
weiv hmmm
weiv I might be wrong
weiv let me play with it some more
cycheng @ ca . ibm . c . . . ok
cycheng @ ca . ibm . c . . . also please check s3 = 0x0e3a0062 and s4 = 0x0e400021 . both are in utf - 16 be
cycheng @ ca . ibm . c . . . seems with icu 2.2 we have s3 > s4 , but not in icu 2.4 that ' s built for db2
cycheng @ ca . ibm . c . . . also s1 & s2 that I sent you earlier are also in utf - 16 be
weiv ok
cycheng @ ca . ibm . c . . . i ask sherman to send you more inconsistent data
weiv thanks
cycheng @ ca . ibm . c . . . the 4 strings we sent are just samples
2003-02-20 01:13:36 +00:00
*/
2003-05-15 21:28:34 +00:00
#if 0
2003-02-20 01:13:36 +00:00
static void Alexis ( void ) {
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " " , & status ) ;
const char utf16be [ 2 ] [ 4 ] = {
{ ( char ) 0xd8 , ( char ) 0x00 , ( char ) 0x00 , ( char ) 0x21 } ,
{ ( char ) 0xff , ( char ) 0xfc , ( char ) 0x00 , ( char ) 0x62 }
} ;
const char utf8 [ 2 ] [ 4 ] = {
{ ( char ) 0xed , ( char ) 0xa0 , ( char ) 0x80 , ( char ) 0x21 } ,
{ ( char ) 0xef , ( char ) 0xbf , ( char ) 0xbc , ( char ) 0x62 } ,
} ;
UCharIterator iterU161 , iterU162 ;
UCharIterator iterU81 , iterU82 ;
UCollationResult resU16 , resU8 ;
uiter_setUTF16BE ( & iterU161 , utf16be [ 0 ] , 4 ) ;
uiter_setUTF16BE ( & iterU162 , utf16be [ 1 ] , 4 ) ;
uiter_setUTF8 ( & iterU81 , utf8 [ 0 ] , 4 ) ;
uiter_setUTF8 ( & iterU82 , utf8 [ 1 ] , 4 ) ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
resU16 = ucol_strcollIter ( coll , & iterU161 , & iterU162 , & status ) ;
resU8 = ucol_strcollIter ( coll , & iterU81 , & iterU82 , & status ) ;
if ( resU16 ! = resU8 ) {
log_err ( " different results \n " ) ;
}
ucol_close ( coll ) ;
}
2003-05-15 21:28:34 +00:00
# endif
2003-02-20 01:13:36 +00:00
# define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
static void Alexis2 ( void ) {
UErrorCode status = U_ZERO_ERROR ;
UChar U16Source [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] , U16Target [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] ;
char U16BESource [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] , U16BETarget [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] ;
2004-11-11 23:34:58 +00:00
char U8Source [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] , U8Target [ CMSCOLL_ALEXIS2_BUFFER_SIZE ] ;
2003-02-20 01:13:36 +00:00
int32_t U16LenS = 0 , U16LenT = 0 , U16BELenS = 0 , U16BELenT = 0 , U8LenS = 0 , U8LenT = 0 ;
2003-06-04 19:02:41 +00:00
UConverter * conv = NULL ;
2003-02-20 01:13:36 +00:00
UCharIterator U16BEItS , U16BEItT ;
UCharIterator U8ItS , U8ItT ;
UCollationResult resU16 , resU16BE , resU8 ;
2006-09-28 08:41:37 +00:00
static const char * const pairs [ ] [ 2 ] = {
2003-04-30 23:26:55 +00:00
{ " \\ ud800 \\ u0021 " , " \\ uFFFC \\ u0062 " } ,
2003-02-20 01:13:36 +00:00
{ " \\ u0435 \\ u0308 \\ u0334 " , " \\ u0415 \\ u0334 \\ u0340 " } ,
{ " \\ u0E40 \\ u0021 " , " \\ u00A1 \\ u0021 " } ,
{ " \\ u0E40 \\ u0021 " , " \\ uFE57 \\ u0062 " } ,
2003-04-30 23:26:55 +00:00
{ " \\ u5F20 " , " \\ u5F20 \\ u4E00 \\ u8E3F " } ,
{ " \\ u0000 \\ u0020 " , " \\ u0000 \\ u0020 \\ u0000 " } ,
{ " \\ u0020 " , " \\ u0020 \\ u0000 " }
/*
5F 20 ( my result here )
5F 204E008 E3F
5F 20 ( your result here )
*/
2003-02-20 01:13:36 +00:00
} ;
int32_t i = 0 ;
2003-06-04 19:02:41 +00:00
UCollator * coll = ucol_open ( " " , & status ) ;
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
2003-02-20 01:13:36 +00:00
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
2003-06-04 19:02:41 +00:00
conv = ucnv_open ( " UTF16BE " , & status ) ;
2003-02-20 01:13:36 +00:00
for ( i = 0 ; i < sizeof ( pairs ) / sizeof ( pairs [ 0 ] ) ; i + + ) {
U16LenS = u_unescape ( pairs [ i ] [ 0 ] , U16Source , CMSCOLL_ALEXIS2_BUFFER_SIZE ) ;
U16LenT = u_unescape ( pairs [ i ] [ 1 ] , U16Target , CMSCOLL_ALEXIS2_BUFFER_SIZE ) ;
resU16 = ucol_strcoll ( coll , U16Source , U16LenS , U16Target , U16LenT ) ;
2003-04-30 23:26:55 +00:00
log_verbose ( " Result of strcoll is %i \n " , resU16 ) ;
2003-02-20 01:13:36 +00:00
U16BELenS = ucnv_fromUChars ( conv , U16BESource , CMSCOLL_ALEXIS2_BUFFER_SIZE , U16Source , U16LenS , & status ) ;
U16BELenT = ucnv_fromUChars ( conv , U16BETarget , CMSCOLL_ALEXIS2_BUFFER_SIZE , U16Target , U16LenT , & status ) ;
2003-04-30 23:26:55 +00:00
/* use the original sizes, as the result from converter is in bytes */
uiter_setUTF16BE ( & U16BEItS , U16BESource , U16LenS ) ;
uiter_setUTF16BE ( & U16BEItT , U16BETarget , U16LenT ) ;
2003-02-20 01:13:36 +00:00
resU16BE = ucol_strcollIter ( coll , & U16BEItS , & U16BEItT , & status ) ;
2003-04-30 23:26:55 +00:00
log_verbose ( " Result of U16BE is %i \n " , resU16BE ) ;
2003-02-20 01:13:36 +00:00
if ( resU16 ! = resU16BE ) {
log_verbose ( " Different results between UTF16 and UTF16BE for %s & %s \n " , pairs [ i ] [ 0 ] , pairs [ i ] [ 1 ] ) ;
}
u_strToUTF8 ( U8Source , CMSCOLL_ALEXIS2_BUFFER_SIZE , & U8LenS , U16Source , U16LenS , & status ) ;
u_strToUTF8 ( U8Target , CMSCOLL_ALEXIS2_BUFFER_SIZE , & U8LenT , U16Target , U16LenT , & status ) ;
uiter_setUTF8 ( & U8ItS , U8Source , U8LenS ) ;
uiter_setUTF8 ( & U8ItT , U8Target , U8LenT ) ;
resU8 = ucol_strcollIter ( coll , & U8ItS , & U8ItT , & status ) ;
if ( resU16 ! = resU8 ) {
log_verbose ( " Different results between UTF16 and UTF8 for %s & %s \n " , pairs [ i ] [ 0 ] , pairs [ i ] [ 1 ] ) ;
}
}
ucol_close ( coll ) ;
2003-02-25 22:22:41 +00:00
ucnv_close ( conv ) ;
2003-02-20 01:13:36 +00:00
}
2003-04-30 23:26:55 +00:00
static void TestHebrewUCA ( void ) {
UErrorCode status = U_ZERO_ERROR ;
2006-09-28 08:41:37 +00:00
static const char * first [ ] = {
2003-04-30 23:26:55 +00:00
" d790d6b8d79cd795d6bcd7a9 " ,
" d790d79cd79ed7a7d799d799d7a1 " ,
" d790d6b4d79ed795d6bcd7a9 " ,
} ;
char utf8String [ 3 ] [ 256 ] ;
UChar utf16String [ 3 ] [ 256 ] ;
int32_t i = 0 , j = 0 ;
int32_t sizeUTF8 [ 3 ] ;
int32_t sizeUTF16 [ 3 ] ;
UCollator * coll = ucol_open ( " " , & status ) ;
2008-02-19 20:10:55 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Could not open UCA collation %s \n " , u_errorName ( status ) ) ;
2008-02-19 20:10:55 +00:00
return ;
}
2003-04-30 23:26:55 +00:00
/*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
for ( i = 0 ; i < sizeof ( first ) / sizeof ( first [ 0 ] ) ; i + + ) {
sizeUTF8 [ i ] = u_parseUTF8 ( first [ i ] , - 1 , utf8String [ i ] , 256 , & status ) ;
u_strFromUTF8 ( utf16String [ i ] , 256 , & sizeUTF16 [ i ] , utf8String [ i ] , sizeUTF8 [ i ] , & status ) ;
log_verbose ( " %i: " ) ;
for ( j = 0 ; j < sizeUTF16 [ i ] ; j + + ) {
/*log_verbose("\\u%04X", utf16String[i][j]);*/
log_verbose ( " %04X " , utf16String [ i ] [ j ] ) ;
}
log_verbose ( " \n " ) ;
}
for ( i = 0 ; i < sizeof ( first ) / sizeof ( first [ 0 ] ) - 1 ; i + + ) {
for ( j = i + 1 ; j < sizeof ( first ) / sizeof ( first [ 0 ] ) ; j + + ) {
doTest ( coll , utf16String [ i ] , utf16String [ j ] , UCOL_LESS ) ;
}
}
ucol_close ( coll ) ;
}
static void TestPartialSortKeyTermination ( void ) {
2006-09-28 08:41:37 +00:00
static const char * cases [ ] = {
2003-04-30 23:26:55 +00:00
" \\ u1234 \\ u1234 \\ udc00 " ,
2004-11-11 23:34:58 +00:00
" \\ udc00 \\ ud800 \\ ud800 "
2003-04-30 23:26:55 +00:00
} ;
int32_t i = sizeof ( UCollator ) ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " " , & status ) ;
UCharIterator iter ;
UChar currCase [ 256 ] ;
int32_t length = 0 ;
int32_t pKeyLen = 0 ;
uint8_t key [ 256 ] ;
for ( i = 0 ; i < sizeof ( cases ) / sizeof ( cases [ 0 ] ) ; i + + ) {
uint32_t state [ 2 ] = { 0 , 0 } ;
length = u_unescape ( cases [ i ] , currCase , 256 ) ;
uiter_setString ( & iter , currCase , length ) ;
pKeyLen = ucol_nextSortKeyPart ( coll , & iter , state , key , 256 , & status ) ;
log_verbose ( " Done \n " ) ;
}
ucol_close ( coll ) ;
}
static void TestSettings ( void ) {
2006-09-28 08:41:37 +00:00
static const char * cases [ ] = {
2003-04-30 23:26:55 +00:00
" apple " ,
" Apple "
} ;
2006-09-28 08:41:37 +00:00
static const char * locales [ ] = {
2003-04-30 23:26:55 +00:00
" " ,
" en "
} ;
UErrorCode status = U_ZERO_ERROR ;
int32_t i = 0 , j = 0 ;
UChar source [ 256 ] , target [ 256 ] ;
int32_t sLen = 0 , tLen = 0 ;
UCollator * collateObject = NULL ;
for ( i = 0 ; i < sizeof ( locales ) / sizeof ( locales [ 0 ] ) ; i + + ) {
collateObject = ucol_open ( locales [ i ] , & status ) ;
ucol_setStrength ( collateObject , UCOL_PRIMARY ) ;
ucol_setAttribute ( collateObject , UCOL_CASE_LEVEL , UCOL_OFF , & status ) ;
for ( j = 1 ; j < sizeof ( cases ) / sizeof ( cases [ 0 ] ) ; j + + ) {
sLen = u_unescape ( cases [ j - 1 ] , source , 256 ) ;
source [ sLen ] = 0 ;
tLen = u_unescape ( cases [ j ] , target , 256 ) ;
source [ tLen ] = 0 ;
doTest ( collateObject , source , target , UCOL_EQUAL ) ;
}
ucol_close ( collateObject ) ;
}
}
2003-05-01 00:57:27 +00:00
static int32_t TestEqualsForCollator ( const char * locName , UCollator * source , UCollator * target ) {
2008-02-19 08:08:10 +00:00
UErrorCode status = U_ZERO_ERROR ;
int32_t errorNo = 0 ;
/*const UChar *sourceRules = NULL;*/
/*int32_t sourceRulesLen = 0;*/
UColAttributeValue french = UCOL_OFF ;
int32_t cloneSize = 0 ;
2003-05-01 00:57:27 +00:00
if ( ! ucol_equals ( source , target ) ) {
2008-02-19 08:08:10 +00:00
log_err ( " Same collators, different address not equal \n " ) ;
errorNo + + ;
2003-05-01 00:57:27 +00:00
}
ucol_close ( target ) ;
2009-04-23 00:23:57 +00:00
if ( uprv_strcmp ( ucol_getLocaleByType ( source , ULOC_REQUESTED_LOCALE , & status ) , ucol_getLocaleByType ( source , ULOC_ACTUAL_LOCALE , & status ) ) = = 0 ) {
2008-02-19 08:08:10 +00:00
/* currently, safeClone is implemented through getRules/openRules
* so it is the same as the test below - I will comment that test out .
*/
/* real thing */
target = ucol_safeClone ( source , NULL , & cloneSize , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Error creating clone \n " ) ;
errorNo + + ;
return errorNo ;
}
if ( ! ucol_equals ( source , target ) ) {
log_err ( " Collator different from it's clone \n " ) ;
errorNo + + ;
}
french = ucol_getAttribute ( source , UCOL_FRENCH_COLLATION , & status ) ;
if ( french = = UCOL_ON ) {
ucol_setAttribute ( target , UCOL_FRENCH_COLLATION , UCOL_OFF , & status ) ;
} else {
ucol_setAttribute ( target , UCOL_FRENCH_COLLATION , UCOL_ON , & status ) ;
}
if ( U_FAILURE ( status ) ) {
log_err ( " Error setting attributes \n " ) ;
errorNo + + ;
return errorNo ;
}
if ( ucol_equals ( source , target ) ) {
log_err ( " Collators same even when options changed \n " ) ;
errorNo + + ;
}
ucol_close ( target ) ;
/* commented out since safeClone uses exactly the same technique */
/*
sourceRules = ucol_getRules ( source , & sourceRulesLen ) ;
target = ucol_openRules ( sourceRules , sourceRulesLen , UCOL_DEFAULT , UCOL_DEFAULT , & parseError , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Error instantiating target from rules \n " ) ;
errorNo + + ;
return errorNo ;
}
if ( ! ucol_equals ( source , target ) ) {
log_err ( " Collator different from collator that was created from the same rules \n " ) ;
errorNo + + ;
}
ucol_close ( target ) ;
*/
2003-05-01 00:57:27 +00:00
}
2008-02-19 08:08:10 +00:00
return errorNo ;
2003-05-01 00:57:27 +00:00
}
static void TestEquals ( void ) {
2008-02-19 08:08:10 +00:00
/* ucol_equals is not currently a public API. There is a chance that it will become
* something like this , but currently it is only used by RuleBasedCollator : : operator = =
*/
/* test whether the two collators instantiated from the same locale are equal */
UErrorCode status = U_ZERO_ERROR ;
UParseError parseError ;
int32_t noOfLoc = uloc_countAvailable ( ) ;
const char * locName = NULL ;
UCollator * source = NULL , * target = NULL ;
int32_t i = 0 ;
2003-05-01 00:57:27 +00:00
2008-02-19 08:08:10 +00:00
const char * rules [ ] = {
" &l < lj <<< Lj <<< LJ " ,
" &n < nj <<< Nj <<< NJ " ,
" &ae <<< \\ u00e4 " ,
" &AE <<< \\ u00c4 "
} ;
/*
const char * badRules [ ] = {
2003-05-01 17:44:17 +00:00
" &l <<< Lj " ,
2008-02-19 08:08:10 +00:00
" &n < nj <<< nJ <<< NJ " ,
" &a <<< \\ u00e4 " ,
" &AE <<< \\ u00c4 <<< x "
} ;
*/
2003-05-01 17:44:17 +00:00
2008-02-19 08:08:10 +00:00
UChar sourceRules [ 1024 ] , targetRules [ 1024 ] ;
int32_t sourceRulesSize = 0 , targetRulesSize = 0 ;
int32_t rulesSize = sizeof ( rules ) / sizeof ( rules [ 0 ] ) ;
2003-05-01 17:44:17 +00:00
2008-02-19 08:08:10 +00:00
for ( i = 0 ; i < rulesSize ; i + + ) {
sourceRulesSize + = u_unescape ( rules [ i ] , sourceRules + sourceRulesSize , 1024 - sourceRulesSize ) ;
targetRulesSize + = u_unescape ( rules [ rulesSize - i - 1 ] , targetRules + targetRulesSize , 1024 - targetRulesSize ) ;
}
2003-05-01 17:44:17 +00:00
2008-02-19 08:08:10 +00:00
source = ucol_openRules ( sourceRules , sourceRulesSize , UCOL_DEFAULT , UCOL_DEFAULT , & parseError , & status ) ;
if ( status = = U_FILE_ACCESS_ERROR ) {
log_data_err ( " Is your data around? \n " ) ;
return ;
} else if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator \n " ) ;
return ;
}
target = ucol_openRules ( targetRules , targetRulesSize , UCOL_DEFAULT , UCOL_DEFAULT , & parseError , & status ) ;
if ( ! ucol_equals ( source , target ) ) {
log_err ( " Equivalent collators not equal! \n " ) ;
}
ucol_close ( source ) ;
ucol_close ( target ) ;
2003-05-01 00:57:27 +00:00
2008-02-19 08:08:10 +00:00
source = ucol_open ( " root " , & status ) ;
target = ucol_open ( " root " , & status ) ;
log_verbose ( " Testing root \n " ) ;
if ( ! ucol_equals ( source , source ) ) {
log_err ( " Same collator not equal \n " ) ;
}
if ( TestEqualsForCollator ( locName , source , target ) ) {
log_err ( " Errors for root \n " , locName ) ;
}
ucol_close ( source ) ;
for ( i = 0 ; i < noOfLoc ; i + + ) {
status = U_ZERO_ERROR ;
locName = uloc_getAvailable ( i ) ;
/*if(hasCollationElements(locName)) {*/
log_verbose ( " Testing equality for locale %s \n " , locName ) ;
source = ucol_open ( locName , & status ) ;
target = ucol_open ( locName , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Error opening collator for locale %s %s \n " , locName , u_errorName ( status ) ) ;
continue ;
}
if ( TestEqualsForCollator ( locName , source , target ) ) {
log_err ( " Errors for locale %s \n " , locName ) ;
}
ucol_close ( source ) ;
/*}*/
}
2003-05-29 21:15:26 +00:00
}
2003-05-01 00:57:27 +00:00
2003-05-29 21:15:26 +00:00
static void TestJ2726 ( void ) {
2008-02-19 08:08:10 +00:00
UChar a [ 2 ] = { 0x61 , 0x00 } ; /*"a"*/
UChar aSpace [ 3 ] = { 0x61 , 0x20 , 0x00 } ; /*"a "*/
UChar spaceA [ 3 ] = { 0x20 , 0x61 , 0x00 } ; /*" a"*/
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " en " , & status ) ;
ucol_setAttribute ( coll , UCOL_ALTERNATE_HANDLING , UCOL_SHIFTED , & status ) ;
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_PRIMARY , & status ) ;
doTest ( coll , a , aSpace , UCOL_EQUAL ) ;
doTest ( coll , aSpace , a , UCOL_EQUAL ) ;
doTest ( coll , a , spaceA , UCOL_EQUAL ) ;
doTest ( coll , spaceA , a , UCOL_EQUAL ) ;
doTest ( coll , spaceA , aSpace , UCOL_EQUAL ) ;
doTest ( coll , aSpace , spaceA , UCOL_EQUAL ) ;
ucol_close ( coll ) ;
2003-05-01 00:57:27 +00:00
}
2003-07-22 16:49:56 +00:00
static void NullRule ( void ) {
2008-02-19 08:08:10 +00:00
UChar r [ 3 ] = { 0 } ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_openRules ( r , 1 , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
if ( U_SUCCESS ( status ) ) {
log_err ( " This should have been an error! \n " ) ;
ucol_close ( coll ) ;
} else {
status = U_ZERO_ERROR ;
}
coll = ucol_openRules ( r , 0 , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Empty rules should have produced a valid collator -> %s \n " , u_errorName ( status ) ) ;
2008-02-19 08:08:10 +00:00
} else {
ucol_close ( coll ) ;
}
2003-07-22 16:49:56 +00:00
}
2003-08-18 22:12:04 +00:00
/**
* Test for CollationElementIterator previous and next for the whole set of
* unicode characters with normalization on .
*/
static void TestNumericCollation ( void )
{
UErrorCode status = U_ZERO_ERROR ;
2004-11-11 23:34:58 +00:00
const static char * basicTestStrings [ ] = {
" hello1 " ,
" hello2 " ,
" hello2002 " ,
" hello2003 " ,
" hello123456 " ,
" hello1234567 " ,
" hello10000000 " ,
" hello100000000 " ,
" hello1000000000 " ,
" hello10000000000 " ,
} ;
const static char * preZeroTestStrings [ ] = {
" avery10000 " ,
" avery010000 " ,
" avery0010000 " ,
" avery00010000 " ,
" avery000010000 " ,
" avery0000010000 " ,
" avery00000010000 " ,
" avery000000010000 " ,
} ;
const static char * thirtyTwoBitNumericStrings [ ] = {
" avery42949672960 " ,
" avery42949672961 " ,
" avery42949672962 " ,
" avery429496729610 "
2009-01-29 07:44:27 +00:00
} ;
const static char * longNumericStrings [ ] = {
/* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
In fact , a single collation element can represent a maximum of 254 digits as a number . Digit strings longer than that
are treated as multiple collation elements . */
" num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z " , /*253digits, num + 9.23E252 + z */
" num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 " , /*254digits, num + 1.00E253 */
" num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 " , /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
" num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234 " , /*254digits, num + 1.23E253 */
" num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 " , /*255digits, num + 1.23E253 + 5 */
" num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456 " , /*256digits, num + 1.23E253 + 56 */
" num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 " , /*257digits, num + 1.23E253 + 567 */
" num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a " , /*254digits, num + 1.23E253 + a, out of numeric order but expected */
" num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234 " , /*254digits, num + 9.23E253, out of numeric order but expected */
" num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a " , /*254digits, num + 9.23E253 + a, out of numeric order but expected */
2004-11-11 23:34:58 +00:00
} ;
2003-08-18 22:12:04 +00:00
const static char * supplementaryDigits [ ] = {
" \\ uD835 \\ uDFCE " , /* 0 */
" \\ uD835 \\ uDFCF " , /* 1 */
" \\ uD835 \\ uDFD0 " , /* 2 */
" \\ uD835 \\ uDFD1 " , /* 3 */
" \\ uD835 \\ uDFCF \\ uD835 \\ uDFCE " , /* 10 */
" \\ uD835 \\ uDFCF \\ uD835 \\ uDFCF " , /* 11 */
" \\ uD835 \\ uDFCF \\ uD835 \\ uDFD0 " , /* 12 */
" \\ uD835 \\ uDFD0 \\ uD835 \\ uDFCE " , /* 20 */
" \\ uD835 \\ uDFD0 \\ uD835 \\ uDFCF " , /* 21 */
" \\ uD835 \\ uDFD0 \\ uD835 \\ uDFD0 " /* 22 */
} ;
const static char * foreignDigits [ ] = {
" \\ u0661 " ,
" \\ u0662 " ,
" \\ u0663 " ,
" \\ u0661 \\ u0660 " ,
" \\ u0661 \\ u0662 " ,
" \\ u0661 \\ u0663 " ,
" \\ u0662 \\ u0660 " ,
" \\ u0662 \\ u0662 " ,
" \\ u0662 \\ u0663 " ,
" \\ u0663 \\ u0660 " ,
" \\ u0663 \\ u0662 " ,
" \\ u0663 \\ u0663 "
} ;
2003-09-17 04:02:08 +00:00
const static char * evenZeroes [ ] = {
2004-02-12 08:32:34 +00:00
" 2000 " ,
2003-09-17 04:02:08 +00:00
" 2001 " ,
" 2002 " ,
" 2003 "
} ;
2003-08-18 22:12:04 +00:00
UColAttribute att = UCOL_NUMERIC_COLLATION ;
UColAttributeValue val = UCOL_ON ;
2004-11-11 23:34:58 +00:00
/* Open our collator. */
2003-08-18 22:12:04 +00:00
UCollator * coll = ucol_open ( " root " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " ERROR: in using ucol_open() -> %s \n " ,
2003-08-18 22:12:04 +00:00
myErrorName ( status ) ) ;
return ;
}
genericLocaleStarterWithOptions ( " root " , basicTestStrings , sizeof ( basicTestStrings ) / sizeof ( basicTestStrings [ 0 ] ) , & att , & val , 1 ) ;
genericLocaleStarterWithOptions ( " root " , thirtyTwoBitNumericStrings , sizeof ( thirtyTwoBitNumericStrings ) / sizeof ( thirtyTwoBitNumericStrings [ 0 ] ) , & att , & val , 1 ) ;
2009-01-29 07:44:27 +00:00
genericLocaleStarterWithOptions ( " root " , longNumericStrings , sizeof ( longNumericStrings ) / sizeof ( longNumericStrings [ 0 ] ) , & att , & val , 1 ) ;
2004-01-09 07:55:22 +00:00
genericLocaleStarterWithOptions ( " en_US " , foreignDigits , sizeof ( foreignDigits ) / sizeof ( foreignDigits [ 0 ] ) , & att , & val , 1 ) ;
2004-11-11 23:34:58 +00:00
genericLocaleStarterWithOptions ( " root " , supplementaryDigits , sizeof ( supplementaryDigits ) / sizeof ( supplementaryDigits [ 0 ] ) , & att , & val , 1 ) ;
genericLocaleStarterWithOptions ( " root " , evenZeroes , sizeof ( evenZeroes ) / sizeof ( evenZeroes [ 0 ] ) , & att , & val , 1 ) ;
2003-08-18 22:12:04 +00:00
2004-11-11 23:34:58 +00:00
/* Setting up our collator to do digits. */
ucol_setAttribute ( coll , UCOL_NUMERIC_COLLATION , UCOL_ON , & status ) ;
2003-08-18 22:12:04 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute \n %s \n " ,
myErrorName ( status ) ) ;
return ;
}
2004-11-11 23:34:58 +00:00
/*
Testing that prepended zeroes still yield the correct collation behavior .
2003-08-18 22:12:04 +00:00
We expect that every element in our strings array will be equal .
*/
2004-02-12 08:32:34 +00:00
genericOrderingTestWithResult ( coll , preZeroTestStrings , sizeof ( preZeroTestStrings ) / sizeof ( preZeroTestStrings [ 0 ] ) , UCOL_EQUAL ) ;
2004-11-11 23:34:58 +00:00
2003-08-18 22:12:04 +00:00
ucol_close ( coll ) ;
}
2004-11-11 23:34:58 +00:00
static void TestTibetanConformance ( void )
{
const char * test [ ] = {
" \\ u0FB2 \\ u0591 \\ u0F71 \\ u0061 " ,
2003-11-12 20:45:53 +00:00
" \\ u0FB2 \\ u0F71 \\ u0061 "
} ;
2004-11-11 23:34:58 +00:00
2003-11-12 20:45:53 +00:00
UErrorCode status = U_ZERO_ERROR ;
UCollator * coll = ucol_open ( " " , & status ) ;
UChar source [ 100 ] ;
UChar target [ 100 ] ;
int result ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
if ( U_SUCCESS ( status ) ) {
u_unescape ( test [ 0 ] , source , 100 ) ;
u_unescape ( test [ 1 ] , target , 100 ) ;
2003-11-24 19:40:46 +00:00
doTest ( coll , source , target , UCOL_EQUAL ) ;
2003-11-12 20:45:53 +00:00
result = ucol_strcoll ( coll , source , - 1 , target , - 1 ) ;
2003-12-08 19:01:55 +00:00
log_verbose ( " result %d \n " , result ) ;
2003-11-12 20:45:53 +00:00
if ( UCOL_EQUAL ! = result ) {
2004-11-11 23:34:58 +00:00
log_err ( " Tibetan comparison error \n " ) ;
2003-11-12 20:45:53 +00:00
}
}
ucol_close ( coll ) ;
genericLocaleStarterWithResult ( " " , test , 2 , UCOL_EQUAL ) ;
}
2003-08-18 22:12:04 +00:00
2004-01-16 07:14:08 +00:00
static void TestPinyinProblem ( void ) {
2004-01-10 00:22:37 +00:00
static const char * test [ ] = { " \\ u4E56 \\ u4E56 \\ u7761 " , " \\ u4E56 \\ u5B69 \\ u5B50 " } ;
2004-01-09 07:57:34 +00:00
genericLocaleStarter ( " zh__PINYIN " , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
}
2004-01-16 16:40:30 +00:00
# define TST_UCOL_MAX_INPUT 0x220001
2004-01-16 07:14:08 +00:00
# define topByte 0xFF000000;
# define bottomByte 0xFF;
# define fourBytes 0xFFFFFFFF;
static void showImplicit ( UChar32 i ) {
2004-01-16 16:40:30 +00:00
if ( i > = 0 & & i < = TST_UCOL_MAX_INPUT ) {
2004-01-16 07:14:08 +00:00
log_verbose ( " %08X \t %08X \n " , i , uprv_uca_getImplicitFromRaw ( i ) ) ;
2004-11-11 23:34:58 +00:00
}
2004-01-16 07:14:08 +00:00
}
static void TestImplicitGeneration ( void ) {
UErrorCode status = U_ZERO_ERROR ;
UChar32 last = 0 ;
UChar32 current ;
UChar32 i = 0 , j = 0 ;
UChar32 roundtrip = 0 ;
UChar32 lastBottom = 0 ;
UChar32 currentBottom = 0 ;
UChar32 lastTop = 0 ;
UChar32 currentTop = 0 ;
2004-01-16 23:44:58 +00:00
UCollator * coll = ucol_open ( " root " , & status ) ;
2004-01-16 07:14:08 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Couldn't open UCA -> %s \n " , u_errorName ( status ) ) ;
2004-01-16 07:14:08 +00:00
return ;
}
2004-11-11 23:34:58 +00:00
2004-01-16 07:14:08 +00:00
uprv_uca_getRawFromImplicit ( 0xE20303E7 ) ;
2004-01-16 16:40:30 +00:00
for ( i = 0 ; i < = TST_UCOL_MAX_INPUT ; + + i ) {
2004-01-16 07:14:08 +00:00
current = uprv_uca_getImplicitFromRaw ( i ) & fourBytes ;
2004-11-11 23:34:58 +00:00
2004-01-16 14:32:45 +00:00
/* check that it round-trips AND that all intervening ones are illegal*/
2004-01-16 07:14:08 +00:00
roundtrip = uprv_uca_getRawFromImplicit ( current ) ;
if ( roundtrip ! = i ) {
2004-11-11 23:34:58 +00:00
log_err ( " No roundtrip %08X \n " , i ) ;
2004-01-16 07:14:08 +00:00
}
if ( last ! = 0 ) {
for ( j = last + 1 ; j < current ; + + j ) {
roundtrip = uprv_uca_getRawFromImplicit ( j ) ;
2004-01-16 14:32:45 +00:00
/* raise an error if it *doesn't* find an error*/
2004-01-16 07:14:08 +00:00
if ( roundtrip ! = - 1 ) {
log_err ( " Fails to recognize illegal %08X \n " , j ) ;
}
}
}
2004-01-16 14:32:45 +00:00
/* now do other consistency checks*/
2004-01-16 07:14:08 +00:00
lastBottom = last & bottomByte ;
currentBottom = current & bottomByte ;
lastTop = last & topByte ;
currentTop = current & topByte ;
2004-01-16 14:32:45 +00:00
/* print out some values for spot-checking*/
2004-01-16 07:14:08 +00:00
if ( lastTop ! = currentTop | | i = = 0x10000 | | i = = 0x110000 ) {
showImplicit ( i - 3 ) ;
showImplicit ( i - 2 ) ;
showImplicit ( i - 1 ) ;
showImplicit ( i ) ;
showImplicit ( i + 1 ) ;
showImplicit ( i + 2 ) ;
}
last = current ;
2004-01-16 23:44:58 +00:00
if ( uprv_uca_getCodePointFromRaw ( uprv_uca_getRawFromCodePoint ( i ) ) ! = i ) {
log_err ( " No raw <-> code point roundtrip for 0x%08X \n " , i ) ;
}
2004-01-16 07:14:08 +00:00
}
2004-01-16 16:40:30 +00:00
showImplicit ( TST_UCOL_MAX_INPUT - 2 ) ;
showImplicit ( TST_UCOL_MAX_INPUT - 1 ) ;
2004-11-11 23:34:58 +00:00
showImplicit ( TST_UCOL_MAX_INPUT ) ;
2004-01-16 23:44:58 +00:00
ucol_close ( coll ) ;
2004-01-16 07:14:08 +00:00
}
2004-04-28 05:31:19 +00:00
/**
* Iterate through the given iterator , checking to see that all the strings
* in the expected array are present .
* @ param expected array of strings we expect to see , or NULL
* @ param expectedCount number of elements of expected , or 0
*/
static int32_t checkUEnumeration ( const char * msg ,
UEnumeration * iter ,
const char * * expected ,
int32_t expectedCount ) {
UErrorCode ec = U_ZERO_ERROR ;
int32_t i = 0 , n , j , bit ;
int32_t seenMask = 0 ;
U_ASSERT ( expectedCount > = 0 & & expectedCount < 31 ) ; /* [sic] 31 not 32 */
n = uenum_count ( iter , & ec ) ;
if ( ! assertSuccess ( " count " , & ec ) ) return - 1 ;
log_verbose ( " %s = [ " , msg ) ;
for ( ; ; + + i ) {
const char * s = uenum_next ( iter , NULL , & ec ) ;
if ( ! assertSuccess ( " snext " , & ec ) | | s = = NULL ) break ;
if ( i ! = 0 ) log_verbose ( " , " ) ;
log_verbose ( " %s " , s ) ;
/* check expected list */
for ( j = 0 , bit = 1 ; j < expectedCount ; + + j , bit < < = 1 ) {
if ( ( seenMask & bit ) = = 0 & &
uprv_strcmp ( s , expected [ j ] ) = = 0 ) {
seenMask | = bit ;
break ;
}
}
}
log_verbose ( " ] (%d) \n " , i ) ;
assertTrue ( " count verified " , i = = n ) ;
/* did we see all expected strings? */
for ( j = 0 , bit = 1 ; j < expectedCount ; + + j , bit < < = 1 ) {
if ( ( seenMask & bit ) ! = 0 ) {
log_verbose ( " Ok: \" %s \" seen \n " , expected [ j ] ) ;
} else {
log_err ( " FAIL: \" %s \" not seen \n " , expected [ j ] ) ;
}
}
return n ;
}
/**
* Test new API added for separate collation tree .
*/
static void TestSeparateTrees ( void ) {
UErrorCode ec = U_ZERO_ERROR ;
UEnumeration * e = NULL ;
int32_t n = - 1 ;
UBool isAvailable ;
char loc [ 256 ] ;
static const char * AVAIL [ ] = { " en " , " de " } ;
static const char * KW [ ] = { " collation " } ;
static const char * KWVAL [ ] = { " phonebook " , " stroke " } ;
2004-07-18 02:02:06 +00:00
# if !UCONFIG_NO_SERVICE
2004-04-28 05:31:19 +00:00
e = ucol_openAvailableLocales ( & ec ) ;
2009-06-09 21:28:13 +00:00
if ( e ! = NULL ) {
assertSuccess ( " ucol_openAvailableLocales " , & ec ) ;
assertTrue ( " ucol_openAvailableLocales!=0 " , e ! = 0 ) ;
n = checkUEnumeration ( " ucol_openAvailableLocales " , e , AVAIL , LEN ( AVAIL ) ) ;
/* Don't need to check n because we check list */
uenum_close ( e ) ;
} else {
log_data_err ( " Error calling ucol_openAvailableLocales() -> %s (Are you missing data?) \n " , u_errorName ( ec ) ) ;
}
2004-07-18 02:02:06 +00:00
# endif
2004-04-28 05:31:19 +00:00
e = ucol_getKeywords ( & ec ) ;
2009-06-09 21:28:13 +00:00
if ( e ! = NULL ) {
assertSuccess ( " ucol_getKeywords " , & ec ) ;
assertTrue ( " ucol_getKeywords!=0 " , e ! = 0 ) ;
n = checkUEnumeration ( " ucol_getKeywords " , e , KW , LEN ( KW ) ) ;
/* Don't need to check n because we check list */
uenum_close ( e ) ;
} else {
log_data_err ( " Error calling ucol_getKeywords() -> %s (Are you missing data?) \n " , u_errorName ( ec ) ) ;
}
2004-04-28 05:31:19 +00:00
e = ucol_getKeywordValues ( KW [ 0 ] , & ec ) ;
2009-06-09 21:28:13 +00:00
if ( e ! = NULL ) {
assertSuccess ( " ucol_getKeywordValues " , & ec ) ;
assertTrue ( " ucol_getKeywordValues!=0 " , e ! = 0 ) ;
n = checkUEnumeration ( " ucol_getKeywordValues " , e , KWVAL , LEN ( KWVAL ) ) ;
/* Don't need to check n because we check list */
uenum_close ( e ) ;
} else {
log_data_err ( " Error calling ucol_getKeywordValues() -> %s (Are you missing data?) \n " , u_errorName ( ec ) ) ;
}
2004-04-28 05:31:19 +00:00
2004-05-25 05:43:23 +00:00
/* Try setting a warning before calling ucol_getKeywordValues */
ec = U_USING_FALLBACK_WARNING ;
e = ucol_getKeywordValues ( KW [ 0 ] , & ec ) ;
2009-06-09 21:28:13 +00:00
if ( assertSuccess ( " ucol_getKeywordValues [with warning code set] " , & ec ) ) {
assertTrue ( " ucol_getKeywordValues!=0 [with warning code set] " , e ! = 0 ) ;
n = checkUEnumeration ( " ucol_getKeywordValues [with warning code set] " , e , KWVAL , LEN ( KWVAL ) ) ;
/* Don't need to check n because we check list */
uenum_close ( e ) ;
}
2004-05-25 05:43:23 +00:00
2004-04-28 05:31:19 +00:00
/*
U_DRAFT int32_t U_EXPORT2
ucol_getFunctionalEquivalent ( char * result , int32_t resultCapacity ,
const char * locale , UBool * isAvailable ,
UErrorCode * status ) ;
}
*/
2010-10-19 03:31:50 +00:00
n = ucol_getFunctionalEquivalent ( loc , sizeof ( loc ) , " collation " , " de " ,
2004-04-28 05:31:19 +00:00
& isAvailable , & ec ) ;
2009-06-09 21:28:13 +00:00
if ( assertSuccess ( " getFunctionalEquivalent " , & ec ) ) {
2010-10-19 03:31:50 +00:00
assertEquals ( " getFunctionalEquivalent(de) " , " de " , loc ) ;
assertTrue ( " getFunctionalEquivalent(de).isAvailable==TRUE " ,
2009-06-09 21:28:13 +00:00
isAvailable = = TRUE ) ;
}
2004-11-11 23:34:58 +00:00
2010-10-19 03:31:50 +00:00
n = ucol_getFunctionalEquivalent ( loc , sizeof ( loc ) , " collation " , " de_DE " ,
2004-04-28 05:31:19 +00:00
& isAvailable , & ec ) ;
2009-06-09 21:28:13 +00:00
if ( assertSuccess ( " getFunctionalEquivalent " , & ec ) ) {
2010-10-19 03:31:50 +00:00
assertEquals ( " getFunctionalEquivalent(de_DE) " , " de " , loc ) ;
assertTrue ( " getFunctionalEquivalent(de_DE).isAvailable==TRUE " ,
2009-06-09 21:28:13 +00:00
isAvailable = = TRUE ) ;
}
2004-04-28 05:31:19 +00:00
}
2004-05-14 07:10:56 +00:00
/* supercedes TestJ784 */
2004-05-08 07:59:36 +00:00
static void TestBeforePinyin ( void ) {
2004-11-11 23:34:58 +00:00
const static char rules [ ] = {
2004-06-03 22:08:39 +00:00
" &[before 2]A<< \\ u0101<<< \\ u0100<< \\ u00E1<<< \\ u00C1<< \\ u01CE<<< \\ u01CD<< \\ u00E0<<< \\ u00C0 "
" &[before 2]e<< \\ u0113<<< \\ u0112<< \\ u00E9<<< \\ u00C9<< \\ u011B<<< \\ u011A<< \\ u00E8<<< \\ u00C8 "
" &[before 2]i<< \\ u012B<<< \\ u012A<< \\ u00ED<<< \\ u00CD<< \\ u01D0<<< \\ u01CF<< \\ u00EC<<< \\ u00CC "
" &[before 2]o<< \\ u014D<<< \\ u014C<< \\ u00F3<<< \\ u00D3<< \\ u01D2<<< \\ u01D1<< \\ u00F2<<< \\ u00D2 "
" &[before 2]u<< \\ u016B<<< \\ u016A<< \\ u00FA<<< \\ u00DA<< \\ u01D4<<< \\ u01D3<< \\ u00F9<<< \\ u00D9 "
" &U<< \\ u01D6<<< \\ u01D5<< \\ u01D8<<< \\ u01D7<< \\ u01DA<<< \\ u01D9<< \\ u01DC<<< \\ u01DB<< \\ u00FC "
2004-05-08 07:59:36 +00:00
} ;
2004-05-24 22:17:31 +00:00
const static char * test [ ] = {
" l \\ u0101 " ,
" la " ,
" l \\ u0101n " ,
2004-05-08 07:59:36 +00:00
" lan " ,
2004-05-24 22:17:31 +00:00
" l \\ u0113 " ,
" le " ,
" l \\ u0113n " ,
" len "
2004-05-08 07:59:36 +00:00
} ;
2004-05-24 22:17:31 +00:00
const static char * test2 [ ] = {
2004-05-24 22:07:40 +00:00
" x \\ u0101 " ,
" x \\ u0100 " ,
" X \\ u0101 " ,
" X \\ u0100 " ,
" x \\ u00E1 " ,
" x \\ u00C1 " ,
" X \\ u00E1 " ,
" X \\ u00C1 " ,
" x \\ u01CE " ,
" x \\ u01CD " ,
" X \\ u01CE " ,
" X \\ u01CD " ,
" x \\ u00E0 " ,
" x \\ u00C0 " ,
" X \\ u00E0 " ,
" X \\ u00C0 " ,
" xa " ,
" xA " ,
" Xa " ,
" XA " ,
" x \\ u0101x " ,
" x \\ u0100x " ,
" x \\ u00E1x " ,
" x \\ u00C1x " ,
" x \\ u01CEx " ,
" x \\ u01CDx " ,
" x \\ u00E0x " ,
" x \\ u00C0x " ,
" xax " ,
" xAx "
} ;
2004-05-24 22:17:31 +00:00
2004-05-08 07:59:36 +00:00
genericRulesStarter ( rules , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
2004-05-14 07:10:56 +00:00
genericLocaleStarter ( " zh " , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
2004-05-24 22:07:40 +00:00
genericRulesStarter ( rules , test2 , sizeof ( test2 ) / sizeof ( test2 [ 0 ] ) ) ;
genericLocaleStarter ( " zh " , test2 , sizeof ( test2 ) / sizeof ( test2 [ 0 ] ) ) ;
2004-05-08 07:59:36 +00:00
}
static void TestBeforeTightening ( void ) {
2006-10-01 07:12:18 +00:00
static const struct {
2004-05-08 07:59:36 +00:00
const char * rules ;
UErrorCode expectedStatus ;
} tests [ ] = {
{ " &[before 1]a<x " , U_ZERO_ERROR } ,
{ " &[before 1]a<<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 1]a<<<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 1]a=x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 2]a<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 2]a<<x " , U_ZERO_ERROR } ,
{ " &[before 2]a<<<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 2]a=x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 3]a<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 3]a<<x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before 3]a<<<x " , U_ZERO_ERROR } ,
{ " &[before 3]a=x " , U_INVALID_FORMAT_ERROR } ,
{ " &[before I]a = x " , U_INVALID_FORMAT_ERROR }
} ;
int32_t i = 0 ;
UErrorCode status = U_ZERO_ERROR ;
UChar rlz [ RULE_BUFFER_LEN ] = { 0 } ;
uint32_t rlen = 0 ;
UCollator * coll = NULL ;
for ( i = 0 ; i < sizeof ( tests ) / sizeof ( tests [ 0 ] ) ; i + + ) {
rlen = u_unescape ( tests [ i ] . rules , rlz , RULE_BUFFER_LEN ) ;
coll = ucol_openRules ( rlz , rlen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
if ( status ! = tests [ i ] . expectedStatus ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Opening a collator with rules %s returned error code %s, expected %s \n " ,
2004-05-08 07:59:36 +00:00
tests [ i ] . rules , u_errorName ( status ) , u_errorName ( tests [ i ] . expectedStatus ) ) ;
}
ucol_close ( coll ) ;
status = U_ZERO_ERROR ;
}
}
#if 0
& m < a
& [ before 1 ] a < x < < < X < < q < < < Q < z
assert : m < < < M < x < < < X < < q < < < Q < z < a < n
& m < a
& [ before 2 ] a < < x < < < X < < q < < < Q < z
assert : m < < < M < x < < < X < < q < < < Q < < a < z < n
& m < a
& [ before 3 ] a < < < x < < < X < < q < < < Q < z
assert : m < < < M < x < < < X < < < a < < q < < < Q < z < n
& m < < a
& [ before 1 ] a < x < < < X < < q < < < Q < z
assert : x < < < X < < q < < < Q < z < m < < < M < < a < n
& m < < a
& [ before 2 ] a < < x < < < X < < q < < < Q < z
assert : m < < < M < < x < < < X < < q < < < Q < < a < z < n
& m < < a
& [ before 3 ] a < < < x < < < X < < q < < < Q < z
assert : m < < < M < < x < < < X < < < a < < q < < < Q < z < n
& m < < < a
& [ before 1 ] a < x < < < X < < q < < < Q < z
assert : x < < < X < < q < < < Q < z < n < m < < < a < < < M
& m < < < a
& [ before 2 ] a < < x < < < X < < q < < < Q < z
assert : x < < < X < < q < < < Q < < m < < < a < < < M < z < n
& m < < < a
& [ before 3 ] a < < < x < < < X < < q < < < Q < z
assert : m < < < x < < < X < < < a < < < M < < q < < < Q < z < n
& [ before 1 ] s < x < < < X < < q < < < Q < z
assert : r < < < R < x < < < X < < q < < < Q < z < s < n
& [ before 2 ] s < < x < < < X < < q < < < Q < z
assert : r < < < R < x < < < X < < q < < < Q < < s < z < n
& [ before 3 ] s < < < x < < < X < < q < < < Q < z
assert : r < < < R < x < < < X < < < s < < q < < < Q < z < n
& [ before 1 ] \ u24DC < x < < < X < < q < < < Q < z
assert : x < < < X < < q < < < Q < z < n < m < < < \ u24DC < < < M
& [ before 2 ] \ u24DC < < x < < < X < < q < < < Q < z
assert : x < < < X < < q < < < Q < < m < < < \ u24DC < < < M < z < n
& [ before 3 ] \ u24DC < < < x < < < X < < q < < < Q < z
assert : m < < < x < < < X < < < \ u24DC < < < M < < q < < < Q < z < n
# endif
2004-06-03 22:08:39 +00:00
#if 0
/* requires features not yet supported */
2004-05-08 07:59:36 +00:00
static void TestMoreBefore ( void ) {
2006-09-28 08:41:37 +00:00
static const struct {
2004-05-08 07:59:36 +00:00
const char * rules ;
2006-09-28 08:41:37 +00:00
const char * order [ 16 ] ;
2004-05-08 07:59:36 +00:00
int32_t size ;
} tests [ ] = {
{ " &m < a &[before 1] a < x <<< X << q <<< Q < z " ,
{ " m " , " M " , " x " , " X " , " q " , " Q " , " z " , " a " , " n " } , 9 } ,
{ " &m < a &[before 2] a << x <<< X << q <<< Q < z " ,
{ " m " , " M " , " x " , " X " , " q " , " Q " , " a " , " z " , " n " } , 9 } ,
{ " &m < a &[before 3] a <<< x <<< X << q <<< Q < z " ,
{ " m " , " M " , " x " , " X " , " a " , " q " , " Q " , " z " , " n " } , 9 } ,
{ " &m << a &[before 1] a < x <<< X << q <<< Q < z " ,
{ " x " , " X " , " q " , " Q " , " z " , " m " , " M " , " a " , " n " } , 9 } ,
{ " &m << a &[before 2] a << x <<< X << q <<< Q < z " ,
{ " m " , " M " , " x " , " X " , " q " , " Q " , " a " , " z " , " n " } , 9 } ,
{ " &m << a &[before 3] a <<< x <<< X << q <<< Q < z " ,
{ " m " , " M " , " x " , " X " , " a " , " q " , " Q " , " z " , " n " } , 9 } ,
2004-11-11 23:34:58 +00:00
{ " &m <<< a &[before 1] a < x <<< X << q <<< Q < z " ,
2004-05-08 07:59:36 +00:00
{ " x " , " X " , " q " , " Q " , " z " , " n " , " m " , " a " , " M " } , 9 } ,
2004-11-11 23:34:58 +00:00
{ " &m <<< a &[before 2] a << x <<< X << q <<< Q < z " ,
2004-05-08 07:59:36 +00:00
{ " x " , " X " , " q " , " Q " , " m " , " a " , " M " , " z " , " n " } , 9 } ,
{ " &m <<< a &[before 3] a <<< x <<< X << q <<< Q < z " ,
{ " m " , " x " , " X " , " a " , " M " , " q " , " Q " , " z " , " n " } , 9 } ,
{ " &[before 1] s < x <<< X << q <<< Q < z " ,
{ " r " , " R " , " x " , " X " , " q " , " Q " , " z " , " s " , " n " } , 9 } ,
{ " &[before 2] s << x <<< X << q <<< Q < z " ,
{ " r " , " R " , " x " , " X " , " q " , " Q " , " s " , " z " , " n " } , 9 } ,
2004-11-11 23:34:58 +00:00
{ " &[before 3] s <<< x <<< X << q <<< Q < z " ,
2004-05-08 07:59:36 +00:00
{ " r " , " R " , " x " , " X " , " s " , " q " , " Q " , " z " , " n " } , 9 } ,
2004-05-19 00:31:51 +00:00
{ " &[before 1] \\ u24DC < x <<< X << q <<< Q < z " ,
{ " x " , " X " , " q " , " Q " , " z " , " n " , " m " , " \\ u24DC " , " M " } , 9 } ,
{ " &[before 2] \\ u24DC << x <<< X << q <<< Q < z " ,
{ " x " , " X " , " q " , " Q " , " m " , " \\ u24DC " , " M " , " z " , " n " } , 9 } ,
{ " &[before 3] \\ u24DC <<< x <<< X << q <<< Q < z " ,
{ " m " , " x " , " X " , " \\ u24DC " , " M " , " q " , " Q " , " z " , " n " } , 9 }
2004-05-08 07:59:36 +00:00
} ;
int32_t i = 0 ;
for ( i = 0 ; i < sizeof ( tests ) / sizeof ( tests [ 0 ] ) ; i + + ) {
genericRulesStarter ( tests [ i ] . rules , tests [ i ] . order , tests [ i ] . size ) ;
}
}
2004-06-03 22:08:39 +00:00
# endif
2004-05-08 07:59:36 +00:00
2004-06-03 22:08:39 +00:00
static void TestTailorNULL ( void ) {
2004-05-17 22:06:14 +00:00
const static char * rule = " &a <<< ' \\ u0000' " ;
2004-05-14 07:10:56 +00:00
UErrorCode status = U_ZERO_ERROR ;
UChar rlz [ RULE_BUFFER_LEN ] = { 0 } ;
uint32_t rlen = 0 ;
UChar a = 1 , null = 0 ;
UCollationResult res = UCOL_EQUAL ;
UCollator * coll = NULL ;
rlen = u_unescape ( rule , rlz , RULE_BUFFER_LEN ) ;
coll = ucol_openRules ( rlz , rlen , UCOL_DEFAULT , UCOL_DEFAULT , NULL , & status ) ;
2004-12-16 02:54:23 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Could not open default collator! -> %s \n " , u_errorName ( status ) ) ;
2004-12-16 02:54:23 +00:00
} else {
res = ucol_strcoll ( coll , & a , 1 , & null , 1 ) ;
if ( res ! = UCOL_LESS ) {
log_err ( " NULL was not tailored properly! \n " ) ;
}
2004-05-17 22:06:14 +00:00
}
2004-12-16 02:54:23 +00:00
2004-05-14 07:10:56 +00:00
ucol_close ( coll ) ;
}
2004-05-08 07:59:36 +00:00
2005-09-17 06:26:58 +00:00
static void
TestUpperFirstQuaternary ( void )
{
2005-09-26 06:22:39 +00:00
const char * tests [ ] = { " B " , " b " , " Bb " , " bB " } ;
2005-09-17 06:26:58 +00:00
UColAttribute att [ ] = { UCOL_STRENGTH , UCOL_CASE_FIRST } ;
UColAttributeValue attVals [ ] = { UCOL_QUATERNARY , UCOL_UPPER_FIRST } ;
genericLocaleStarterWithOptions ( " root " , tests , sizeof ( tests ) / sizeof ( tests [ 0 ] ) , att , attVals , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
}
2006-01-28 08:25:52 +00:00
static void
TestJ4960 ( void )
{
const char * tests [ ] = { " \\ u00e2T " , " aT " } ;
UColAttribute att [ ] = { UCOL_STRENGTH , UCOL_CASE_LEVEL } ;
UColAttributeValue attVals [ ] = { UCOL_PRIMARY , UCOL_ON } ;
const char * tests2 [ ] = { " a " , " A " } ;
const char * rule = " &[first tertiary ignorable]=A=a " ;
UColAttribute att2 [ ] = { UCOL_CASE_LEVEL } ;
UColAttributeValue attVals2 [ ] = { UCOL_ON } ;
/* Test whether we correctly ignore primary ignorables on case level when */
/* we have only primary & case level */
genericLocaleStarterWithOptionsAndResult ( " root " , tests , sizeof ( tests ) / sizeof ( tests [ 0 ] ) , att , attVals , sizeof ( att ) / sizeof ( att [ 0 ] ) , UCOL_EQUAL ) ;
/* Test whether ICU4J will make case level for sortkeys that have primary strength */
/* and case level */
genericLocaleStarterWithOptions ( " root " , tests2 , sizeof ( tests2 ) / sizeof ( tests2 [ 0 ] ) , att , attVals , sizeof ( att ) / sizeof ( att [ 0 ] ) ) ;
/* Test whether completely ignorable letters have case level info (they shouldn't) */
genericRulesStarterWithOptionsAndResult ( rule , tests2 , sizeof ( tests2 ) / sizeof ( tests2 [ 0 ] ) , att2 , attVals2 , sizeof ( att2 ) / sizeof ( att2 [ 0 ] ) , UCOL_EQUAL ) ;
}
2006-07-06 06:30:06 +00:00
static void
TestJ5223 ( void )
{
2006-07-25 02:52:22 +00:00
static const char * test = " this is a test string " ;
2006-07-06 06:30:06 +00:00
UChar ustr [ 256 ] ;
int32_t ustr_length = u_unescape ( test , ustr , 256 ) ;
unsigned char sortkey [ 256 ] ;
int32_t sortkey_length ;
UErrorCode status = U_ZERO_ERROR ;
static UCollator * coll = NULL ;
coll = ucol_open ( " root " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Couldn't open UCA -> %s \n " , u_errorName ( status ) ) ;
2006-07-06 06:30:06 +00:00
return ;
}
ucol_setStrength ( coll , UCOL_PRIMARY ) ;
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_PRIMARY , & status ) ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Failed setting atributes \n " ) ;
return ;
2009-04-23 00:23:57 +00:00
}
2006-07-06 06:30:06 +00:00
sortkey_length = ucol_getSortKey ( coll , ustr , ustr_length , NULL , 0 ) ;
if ( sortkey_length > 256 ) return ;
/* we mark the position where the null byte should be written in advance */
sortkey [ sortkey_length - 1 ] = 0xAA ;
/* we set the buffer size one byte higher than needed */
sortkey_length = ucol_getSortKey ( coll , ustr , ustr_length , sortkey ,
sortkey_length + 1 ) ;
/* no error occurs (for me) */
if ( sortkey [ sortkey_length - 1 ] = = 0xAA ) {
log_err ( " Hit bug at first try \n " ) ;
}
/* we mark the position where the null byte should be written again */
sortkey [ sortkey_length - 1 ] = 0xAA ;
/* this time we set the buffer size to the exact amount needed */
sortkey_length = ucol_getSortKey ( coll , ustr , ustr_length , sortkey ,
sortkey_length ) ;
/* now the trailing null byte is not written */
if ( sortkey [ sortkey_length - 1 ] = = 0xAA ) {
log_err ( " Hit bug at second try \n " ) ;
}
ucol_close ( coll ) ;
}
2006-08-22 17:51:36 +00:00
/* Regression test for Thai partial sort key problem */
static void
TestJ5232 ( void )
{
const static char * test [ ] = {
" \\ u0e40 \\ u0e01 \\ u0e47 \\ u0e1a \\ u0e40 \\ u0e25 \\ u0e47 \\ u0e21 " ,
" \\ u0e40 \\ u0e01 \\ u0e47 \\ u0e1a \\ u0e40 \\ u0e25 \\ u0e48 \\ u0e21 "
} ;
2009-04-23 00:23:57 +00:00
2006-08-22 17:51:36 +00:00
genericLocaleStarter ( " th " , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
}
2006-09-07 20:12:11 +00:00
static void
TestJ5367 ( void )
{
const static char * test [ ] = { " a " , " y " } ;
const char * rules = " &Ny << Y &[first secondary ignorable] <<< a " ;
genericRulesStarter ( rules , test , sizeof ( test ) / sizeof ( test [ 0 ] ) ) ;
}
2006-08-22 17:51:36 +00:00
2007-11-30 04:29:20 +00:00
static void
TestVI5913 ( void )
{
UErrorCode status = U_ZERO_ERROR ;
int32_t i , j ;
UCollator * coll = NULL ;
uint8_t resColl [ 100 ] , expColl [ 100 ] ;
int32_t rLen , tLen , ruleLen , sLen , kLen ;
UChar rule [ 256 ] = { 0x26 , 0x62 , 0x3c , 0x1FF3 , 0 } ; /* &a<0x1FF3-omega with Ypogegrammeni*/
UChar rule2 [ 256 ] = { 0x26 , 0x7a , 0x3c , 0x0161 , 0 } ; /* &z<s with caron*/
UChar rule3 [ 256 ] = { 0x26 , 0x7a , 0x3c , 0x0061 , 0x00ea , 0 } ; /* &z<a+e with circumflex.*/
2007-12-07 10:49:34 +00:00
static const UChar tData [ ] [ 20 ] = {
2007-11-30 04:29:20 +00:00
{ 0x1EAC , 0 } ,
{ 0x0041 , 0x0323 , 0x0302 , 0 } ,
{ 0x1EA0 , 0x0302 , 0 } ,
{ 0x00C2 , 0x0323 , 0 } ,
{ 0x1ED8 , 0 } , /* O with dot and circumflex */
{ 0x1ECC , 0x0302 , 0 } ,
{ 0x1EB7 , 0 } ,
{ 0x1EA1 , 0x0306 , 0 } ,
} ;
2007-12-07 10:49:34 +00:00
static const UChar tailorData [ ] [ 20 ] = {
2007-11-30 04:29:20 +00:00
{ 0x1FA2 , 0 } , /* Omega with 3 combining marks */
{ 0x03C9 , 0x0313 , 0x0300 , 0x0345 , 0 } ,
{ 0x1FF3 , 0x0313 , 0x0300 , 0 } ,
{ 0x1F60 , 0x0300 , 0x0345 , 0 } ,
{ 0x1F62 , 0x0345 , 0 } ,
{ 0x1FA0 , 0x0300 , 0 } ,
} ;
2007-12-07 10:49:34 +00:00
static const UChar tailorData2 [ ] [ 20 ] = {
2007-11-30 04:29:20 +00:00
{ 0x1E63 , 0x030C , 0 } , /* s with dot below + caron */
{ 0x0073 , 0x0323 , 0x030C , 0 } ,
{ 0x0073 , 0x030C , 0x0323 , 0 } ,
} ;
2007-12-07 10:49:34 +00:00
static const UChar tailorData3 [ ] [ 20 ] = {
2007-11-30 04:29:20 +00:00
{ 0x007a , 0 } , /* z */
{ 0x0061 , 0x0065 , 0 } , /* a + e */
{ 0x0061 , 0x00ea , 0 } , /* a + e with circumflex */
{ 0x0061 , 0x1EC7 , 0 } , /* a+ e with dot below and circumflex */
{ 0x0061 , 0x1EB9 , 0x0302 , 0 } , /* a + e with dot below + combining circumflex */
{ 0x0061 , 0x00EA , 0x0323 , 0 } , /* a + e with circumflex + combining dot below */
{ 0x00EA , 0x0323 , 0 } , /* e with circumflex + combining dot below */
{ 0x00EA , 0 } , /* e with circumflex */
} ;
/* Test Vietnamese sort. */
coll = ucol_open ( " vi " , & status ) ;
2008-02-16 12:30:55 +00:00
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Couldn't open collator -> %s \n " , u_errorName ( status ) ) ;
2008-02-16 12:30:55 +00:00
return ;
}
2007-11-30 04:29:20 +00:00
log_verbose ( " \n \n VI collation: " ) ;
if ( ! ucol_equal ( coll , tData [ 0 ] , u_strlen ( tData [ 0 ] ) , tData [ 2 ] , u_strlen ( tData [ 2 ] ) ) ) {
log_err ( " \\ u1EAC not equals to \\ u1EA0+ \\ u0302 \n " ) ;
}
if ( ! ucol_equal ( coll , tData [ 0 ] , u_strlen ( tData [ 0 ] ) , tData [ 3 ] , u_strlen ( tData [ 3 ] ) ) ) {
log_err ( " \\ u1EAC not equals to \\ u00c2+ \\ u0323 \n " ) ;
}
if ( ! ucol_equal ( coll , tData [ 5 ] , u_strlen ( tData [ 5 ] ) , tData [ 4 ] , u_strlen ( tData [ 4 ] ) ) ) {
log_err ( " \\ u1ED8 not equals to \\ u1ECC+ \\ u0302 \n " ) ;
}
if ( ! ucol_equal ( coll , tData [ 7 ] , u_strlen ( tData [ 7 ] ) , tData [ 6 ] , u_strlen ( tData [ 6 ] ) ) ) {
log_err ( " \\ u1EB7 not equals to \\ u1EA1+ \\ u0306 \n " ) ;
}
for ( j = 0 ; j < 8 ; j + + ) {
tLen = u_strlen ( tData [ j ] ) ;
log_verbose ( " \n Data :%s \t len: %d key: " , tData [ j ] , tLen ) ;
rLen = ucol_getSortKey ( coll , tData [ j ] , tLen , resColl , 100 ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
}
ucol_close ( coll ) ;
2007-12-07 10:49:34 +00:00
/* Test Romanian sort. */
2007-11-30 04:29:20 +00:00
coll = ucol_open ( " ro " , & status ) ;
log_verbose ( " \n \n RO collation: " ) ;
if ( ! ucol_equal ( coll , tData [ 0 ] , u_strlen ( tData [ 0 ] ) , tData [ 1 ] , u_strlen ( tData [ 1 ] ) ) ) {
log_err ( " \\ u1EAC not equals to \\ u1EA0+ \\ u0302 \n " ) ;
}
if ( ! ucol_equal ( coll , tData [ 4 ] , u_strlen ( tData [ 4 ] ) , tData [ 5 ] , u_strlen ( tData [ 5 ] ) ) ) {
log_err ( " \\ u1EAC not equals to \\ u00c2+ \\ u0323 \n " ) ;
}
if ( ! ucol_equal ( coll , tData [ 6 ] , u_strlen ( tData [ 6 ] ) , tData [ 7 ] , u_strlen ( tData [ 7 ] ) ) ) {
log_err ( " \\ u1EB7 not equals to \\ u1EA1+ \\ u0306 \n " ) ;
}
for ( j = 4 ; j < 8 ; j + + ) {
tLen = u_strlen ( tData [ j ] ) ;
log_verbose ( " \n Data :%s \t len: %d key: " , tData [ j ] , tLen ) ;
rLen = ucol_getSortKey ( coll , tData [ j ] , tLen , resColl , 100 ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
}
ucol_close ( coll ) ;
/* Test the precomposed Greek character with 3 combining marks. */
log_verbose ( " \n \n Tailoring test: Greek character with 3 combining marks " ) ;
ruleLen = u_strlen ( rule ) ;
coll = ucol_openRules ( rule , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
2007-12-07 10:49:34 +00:00
if ( U_FAILURE ( status ) ) {
log_err ( " ucol_openRules failed with %s \n " , u_errorName ( status ) ) ;
return ;
}
2007-11-30 04:29:20 +00:00
sLen = u_strlen ( tailorData [ 0 ] ) ;
for ( j = 1 ; j < 6 ; j + + ) {
tLen = u_strlen ( tailorData [ j ] ) ;
if ( ! ucol_equal ( coll , tailorData [ 0 ] , sLen , tailorData [ j ] , tLen ) ) {
log_err ( " \n \\ u1FA2 not equals to data[%d]:%s \n " , j , tailorData [ j ] ) ;
}
}
/* Test getSortKey. */
tLen = u_strlen ( tailorData [ 0 ] ) ;
kLen = ucol_getSortKey ( coll , tailorData [ 0 ] , tLen , expColl , 100 ) ;
for ( j = 0 ; j < 6 ; j + + ) {
tLen = u_strlen ( tailorData [ j ] ) ;
rLen = ucol_getSortKey ( coll , tailorData [ j ] , tLen , resColl , 100 ) ;
if ( kLen ! = rLen | | uprv_memcmp ( expColl , resColl , rLen * sizeof ( uint8_t ) ) ! = 0 ) {
log_err ( " \n Data[%d] :%s \t len: %d key: " , j , tailorData [ j ] , tLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
}
}
ucol_close ( coll ) ;
log_verbose ( " \n \n Tailoring test for s with caron: " ) ;
ruleLen = u_strlen ( rule2 ) ;
coll = ucol_openRules ( rule2 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
tLen = u_strlen ( tailorData2 [ 0 ] ) ;
kLen = ucol_getSortKey ( coll , tailorData2 [ 0 ] , tLen , expColl , 100 ) ;
for ( j = 1 ; j < 3 ; j + + ) {
tLen = u_strlen ( tailorData2 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tailorData2 [ j ] , tLen , resColl , 100 ) ;
if ( kLen ! = rLen | | uprv_memcmp ( expColl , resColl , rLen * sizeof ( uint8_t ) ) ! = 0 ) {
log_err ( " \n After tailoring Data[%d] :%s \t len: %d key: " , j , tailorData [ j ] , tLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
}
}
ucol_close ( coll ) ;
log_verbose ( " \n \n Tailoring test for &z< ae with circumflex: " ) ;
2007-12-07 10:49:34 +00:00
ruleLen = u_strlen ( rule3 ) ;
coll = ucol_openRules ( rule3 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
tLen = u_strlen ( tailorData3 [ 3 ] ) ;
kLen = ucol_getSortKey ( coll , tailorData3 [ 3 ] , tLen , expColl , 100 ) ;
for ( j = 4 ; j < 6 ; j + + ) {
tLen = u_strlen ( tailorData3 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tailorData3 [ j ] , tLen , resColl , 100 ) ;
2007-11-30 04:29:20 +00:00
2007-12-07 10:49:34 +00:00
if ( kLen ! = rLen | | uprv_memcmp ( expColl , resColl , rLen * sizeof ( uint8_t ) ) ! = 0 ) {
log_err ( " \n After tailoring Data[%d] :%s \t len: %d key: " , j , tailorData [ j ] , tLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
}
log_verbose ( " \n Test Data[%d] :%s \t len: %d key: " , j , tailorData [ j ] , tLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
}
ucol_close ( coll ) ;
2007-11-30 04:29:20 +00:00
}
2006-08-22 17:51:36 +00:00
2008-04-04 22:47:43 +00:00
static void
TestTailor6179 ( void )
{
UErrorCode status = U_ZERO_ERROR ;
int32_t i ;
UCollator * coll = NULL ;
uint8_t resColl [ 100 ] ;
int32_t rLen , tLen , ruleLen ;
/* &[last primary ignorable]<< a &[first primary ignorable]<<b */
UChar rule1 [ 256 ] = { 0x26 , 0x5B , 0x6C , 0x61 , 0x73 , 0x74 , 0x20 , 0x70 , 0x72 , 0x69 , 0x6D , 0x61 , 0x72 , 0x79 ,
0x20 , 0x69 , 0x67 , 0x6E , 0x6F , 0x72 , 0x61 , 0x62 , 0x6C , 0x65 , 0x5D , 0x3C , 0x3C , 0x20 , 0x61 , 0x20 ,
0x26 , 0x5B , 0x66 , 0x69 , 0x72 , 0x73 , 0x74 , 0x20 , 0x70 , 0x72 , 0x69 , 0x6D , 0x61 , 0x72 , 0x79 , 0x20 ,
0x69 , 0x67 , 0x6E , 0x6F , 0x72 , 0x61 , 0x62 , 0x6C , 0x65 , 0x5D , 0x3C , 0x3C , 0x62 , 0x20 , 0 } ;
/* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
UChar rule2 [ 256 ] = { 0x26 , 0x5B , 0x6C , 0x61 , 0x73 , 0x74 , 0x20 , 0x73 , 0x65 , 0x63 , 0x6F , 0x6E , 0x64 , 0x61 ,
0x72 , 0x79 , 0x20 , 0x69 , 0x67 , 0x6E , 0x6F , 0x72 , 0x61 , 0x62 , 0x6C , 0x65 , 0x5D , 0x3C , 0x3C , 0x3C ,
0x61 , 0x20 , 0x26 , 0x5B , 0x66 , 0x69 , 0x72 , 0x73 , 0x74 , 0x20 , 0x73 , 0x65 , 0x63 , 0x6F , 0x6E ,
0x64 , 0x61 , 0x72 , 0x79 , 0x20 , 0x69 , 0x67 , 0x6E , 0x6F , 0x72 , 0x61 , 0x62 , 0x6C , 0x65 , 0x5D , 0x3C ,
0x3C , 0x3C , 0x20 , 0x62 , 0 } ;
UChar tData1 [ ] [ 20 ] = {
{ 0x61 , 0 } ,
{ 0x62 , 0 } ,
{ 0xFDD0 , 0x009E , 0 }
} ;
UChar tData2 [ ] [ 20 ] = {
{ 0x61 , 0 } ,
{ 0x62 , 0 } ,
{ 0xFDD0 , 0x009E , 0 }
} ;
2009-11-13 19:25:21 +00:00
/*
* These values from FractionalUCA . txt will change ,
* and need to be updated here .
*/
2008-04-04 22:47:43 +00:00
uint8_t firstPrimaryIgnCE [ 6 ] = { 1 , 87 , 1 , 5 , 1 , 0 } ;
2009-11-13 19:25:21 +00:00
uint8_t lastPrimaryIgnCE [ 6 ] = { 1 , 0xE3 , 0xC9 , 1 , 5 , 0 } ;
2008-04-04 22:47:43 +00:00
uint8_t firstSecondaryIgnCE [ 6 ] = { 1 , 1 , 0x3f , 0x03 , 0 } ;
2009-11-13 19:25:21 +00:00
uint8_t lastSecondaryIgnCE [ 6 ] = { 1 , 1 , 0x3f , 0x03 , 0 } ;
2008-04-04 22:47:43 +00:00
/* Test [Last Primary ignorable] */
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
log_verbose ( " \n \n Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b " ) ;
ruleLen = u_strlen ( rule1 ) ;
coll = ucol_openRules ( rule1 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Tailoring test: &[last primary ignorable] failed! -> %s \n " , u_errorName ( status ) ) ;
2008-04-04 22:47:43 +00:00
return ;
}
tLen = u_strlen ( tData1 [ 0 ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ 0 ] , tLen , resColl , 100 ) ;
if ( uprv_memcmp ( resColl , lastPrimaryIgnCE , uprv_min ( rLen , 6 ) ) < 0 ) {
log_err ( " \n Data[%d] :%s \t len: %d key: " , 0 , tData1 [ 0 ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
}
tLen = u_strlen ( tData1 [ 1 ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ 1 ] , tLen , resColl , 100 ) ;
if ( uprv_memcmp ( resColl , firstPrimaryIgnCE , uprv_min ( rLen , 6 ) ) < 0 ) {
log_err ( " \n Data[%d] :%s \t len: %d key: " , 1 , tData1 [ 1 ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
}
ucol_close ( coll ) ;
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
/* Test [Last Secondary ignorable] */
log_verbose ( " \n \n Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b " ) ;
ruleLen = u_strlen ( rule1 ) ;
coll = ucol_openRules ( rule2 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Tailoring test: &[last primary ignorable] failed! " ) ;
return ;
}
tLen = u_strlen ( tData2 [ 0 ] ) ;
rLen = ucol_getSortKey ( coll , tData2 [ 0 ] , tLen , resColl , 100 ) ;
log_verbose ( " \n Data[%d] :%s \t len: %d key: " , 0 , tData2 [ 0 ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
if ( uprv_memcmp ( resColl , lastSecondaryIgnCE , uprv_min ( rLen , 3 ) ) < 0 ) {
log_err ( " \n Data[%d] :%s \t len: %d key: " , 0 , tData2 [ 0 ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
}
tLen = u_strlen ( tData2 [ 1 ] ) ;
rLen = ucol_getSortKey ( coll , tData2 [ 1 ] , tLen , resColl , 100 ) ;
log_verbose ( " \n Data[%d] :%s \t len: %d key: " , 1 , tData2 [ 1 ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
if ( uprv_memcmp ( resColl , firstSecondaryIgnCE , uprv_min ( rLen , 4 ) ) < 0 ) {
log_err ( " \n Data[%d] :%s \t len: %d key: " , 1 , tData2 [ 1 ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_err ( " %02X " , resColl [ i ] ) ;
}
}
ucol_close ( coll ) ;
}
static void
TestUCAPrecontext ( void )
{
UErrorCode status = U_ZERO_ERROR ;
int32_t i , j ;
UCollator * coll = NULL ;
uint8_t resColl [ 100 ] , prevColl [ 100 ] ;
int32_t rLen , tLen , ruleLen ;
UChar rule1 [ 256 ] = { 0x26 , 0xb7 , 0x3c , 0x61 , 0 } ; /* & middle-dot < a */
2009-04-23 00:23:57 +00:00
UChar rule2 [ 256 ] = { 0x26 , 0x4C , 0xb7 , 0x3c , 0x3c , 0x61 , 0 } ;
2008-04-04 22:47:43 +00:00
/* & l middle-dot << a a is an expansion. */
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
UChar tData1 [ ] [ 20 ] = {
{ 0xb7 , 0 } , /* standalone middle dot(0xb7) */
{ 0x387 , 0 } , /* standalone middle dot(0x387) */
{ 0x61 , 0 } , /* a */
{ 0x6C , 0 } , /* l */
2009-04-23 00:23:57 +00:00
{ 0x4C , 0x0332 , 0 } , /* l with [first primary ignorable] */
2008-04-04 22:47:43 +00:00
{ 0x6C , 0xb7 , 0 } , /* l with middle dot(0xb7) */
{ 0x6C , 0x387 , 0 } , /* l with middle dot(0x387) */
{ 0x4C , 0xb7 , 0 } , /* L with middle dot(0xb7) */
{ 0x4C , 0x387 , 0 } , /* L with middle dot(0x387) */
{ 0x6C , 0x61 , 0x387 , 0 } , /* la with middle dot(0x387) */
{ 0x4C , 0x61 , 0xb7 , 0 } , /* La with middle dot(0xb7) */
} ;
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
log_verbose ( " \n \n EN collation: " ) ;
coll = ucol_open ( " en " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Tailoring test: &z <<a|- failed! -> %s \n " , u_errorName ( status ) ) ;
2008-04-04 22:47:43 +00:00
return ;
}
for ( j = 0 ; j < 11 ; j + + ) {
tLen = u_strlen ( tData1 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ j ] , tLen , resColl , 100 ) ;
if ( ( j > 0 ) & & ( strcmp ( ( char * ) resColl , ( char * ) prevColl ) < 0 ) ) {
2009-04-23 00:23:57 +00:00
log_err ( " \n Expecting greater key than previous test case: Data[%d] :%s. " ,
2008-04-04 22:47:43 +00:00
j , tData1 [ j ] ) ;
}
log_verbose ( " \n Data[%d] :%s \t len: %d key: " , j , tData1 [ j ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
uprv_memcpy ( prevColl , resColl , sizeof ( uint8_t ) * ( rLen + 1 ) ) ;
}
ucol_close ( coll ) ;
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
log_verbose ( " \n \n JA collation: " ) ;
coll = ucol_open ( " ja " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Tailoring test: &z <<a|- failed! " ) ;
return ;
}
for ( j = 0 ; j < 11 ; j + + ) {
tLen = u_strlen ( tData1 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ j ] , tLen , resColl , 100 ) ;
if ( ( j > 0 ) & & ( strcmp ( ( char * ) resColl , ( char * ) prevColl ) < 0 ) ) {
2009-04-23 00:23:57 +00:00
log_err ( " \n Expecting greater key than previous test case: Data[%d] :%s. " ,
2008-04-04 22:47:43 +00:00
j , tData1 [ j ] ) ;
}
log_verbose ( " \n Data[%d] :%s \t len: %d key: " , j , tData1 [ j ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
uprv_memcpy ( prevColl , resColl , sizeof ( uint8_t ) * ( rLen + 1 ) ) ;
}
ucol_close ( coll ) ;
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
log_verbose ( " \n \n Tailoring test: & middle dot < a " ) ;
ruleLen = u_strlen ( rule1 ) ;
coll = ucol_openRules ( rule1 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Tailoring test: & middle dot < a failed! " ) ;
return ;
}
for ( j = 0 ; j < 11 ; j + + ) {
tLen = u_strlen ( tData1 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ j ] , tLen , resColl , 100 ) ;
if ( ( j > 0 ) & & ( strcmp ( ( char * ) resColl , ( char * ) prevColl ) < 0 ) ) {
2009-04-23 00:23:57 +00:00
log_err ( " \n Expecting greater key than previous test case: Data[%d] :%s. " ,
2008-04-04 22:47:43 +00:00
j , tData1 [ j ] ) ;
}
log_verbose ( " \n Data[%d] :%s \t len: %d key: " , j , tData1 [ j ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
uprv_memcpy ( prevColl , resColl , sizeof ( uint8_t ) * ( rLen + 1 ) ) ;
}
ucol_close ( coll ) ;
2009-04-23 00:23:57 +00:00
2008-04-04 22:47:43 +00:00
log_verbose ( " \n \n Tailoring test: & l middle-dot << a " ) ;
ruleLen = u_strlen ( rule2 ) ;
coll = ucol_openRules ( rule2 , ruleLen , UCOL_OFF , UCOL_TERTIARY , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Tailoring test: & l middle-dot << a failed! " ) ;
return ;
}
for ( j = 0 ; j < 11 ; j + + ) {
tLen = u_strlen ( tData1 [ j ] ) ;
rLen = ucol_getSortKey ( coll , tData1 [ j ] , tLen , resColl , 100 ) ;
if ( ( j > 0 ) & & ( j ! = 3 ) & & ( strcmp ( ( char * ) resColl , ( char * ) prevColl ) < 0 ) ) {
2009-04-23 00:23:57 +00:00
log_err ( " \n Expecting greater key than previous test case: Data[%d] :%s. " ,
2008-04-04 22:47:43 +00:00
j , tData1 [ j ] ) ;
}
if ( ( j = = 3 ) & & ( strcmp ( ( char * ) resColl , ( char * ) prevColl ) > 0 ) ) {
2009-04-23 00:23:57 +00:00
log_err ( " \n Expecting smaller key than previous test case: Data[%d] :%s. " ,
2008-04-04 22:47:43 +00:00
j , tData1 [ j ] ) ;
}
log_verbose ( " \n Data[%d] :%s \t len: %d key: " , j , tData1 [ j ] , rLen ) ;
for ( i = 0 ; i < rLen ; i + + ) {
log_verbose ( " %02X " , resColl [ i ] ) ;
}
uprv_memcpy ( prevColl , resColl , sizeof ( uint8_t ) * ( rLen + 1 ) ) ;
}
ucol_close ( coll ) ;
}
2008-06-11 21:37:03 +00:00
static void
TestOutOfBuffer5468 ( void )
{
static const char * test = " \\ u4e00 " ;
UChar ustr [ 256 ] ;
int32_t ustr_length = u_unescape ( test , ustr , 256 ) ;
unsigned char shortKeyBuf [ 1 ] ;
int32_t sortkey_length ;
UErrorCode status = U_ZERO_ERROR ;
static UCollator * coll = NULL ;
2009-04-23 00:23:57 +00:00
2008-06-11 21:37:03 +00:00
coll = ucol_open ( " root " , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Couldn't open UCA -> %s \n " , u_errorName ( status ) ) ;
2008-06-11 21:37:03 +00:00
return ;
}
ucol_setStrength ( coll , UCOL_PRIMARY ) ;
ucol_setAttribute ( coll , UCOL_STRENGTH , UCOL_PRIMARY , & status ) ;
ucol_setAttribute ( coll , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err ( " Failed setting atributes \n " ) ;
return ;
2009-04-23 00:23:57 +00:00
}
2008-06-11 21:37:03 +00:00
sortkey_length = ucol_getSortKey ( coll , ustr , ustr_length , shortKeyBuf , sizeof ( shortKeyBuf ) ) ;
if ( sortkey_length ! = 4 ) {
log_err ( " expecting length of sortKey is 4 got:%d " , sortkey_length ) ;
}
log_verbose ( " length of sortKey is %d " , sortkey_length ) ;
ucol_close ( coll ) ;
}
2008-04-04 22:47:43 +00:00
2007-11-15 23:09:40 +00:00
# define TSKC_DATA_SIZE 5
# define TSKC_BUF_SIZE 50
static void
TestSortKeyConsistency ( void )
{
UErrorCode icuRC = U_ZERO_ERROR ;
UCollator * ucol ;
UChar data [ ] = { 0xFFFD , 0x0006 , 0x0006 , 0x0006 , 0xFFFD } ;
uint8_t bufFull [ TSKC_DATA_SIZE ] [ TSKC_BUF_SIZE ] ;
uint8_t bufPart [ TSKC_DATA_SIZE ] [ TSKC_BUF_SIZE ] ;
2007-12-26 18:50:17 +00:00
int32_t i , j , i2 ;
2007-11-15 23:09:40 +00:00
ucol = ucol_openFromShortString ( " LEN_S4 " , FALSE , NULL , & icuRC ) ;
if ( U_FAILURE ( icuRC ) )
2007-12-26 18:50:17 +00:00
{
2009-06-09 21:28:13 +00:00
log_err_status ( icuRC , " ucol_openFromShortString failed -> %s \n " , u_errorName ( icuRC ) ) ;
2007-11-15 23:09:40 +00:00
return ;
2007-12-26 18:50:17 +00:00
}
2007-11-15 23:09:40 +00:00
for ( i = 0 ; i < TSKC_DATA_SIZE ; i + + )
{
UCharIterator uiter ;
uint32_t state [ 2 ] = { 0 , 0 } ;
int32_t dataLen = i + 1 ;
2007-12-26 18:50:17 +00:00
for ( j = 0 ; j < TSKC_BUF_SIZE ; j + + )
bufFull [ i ] [ j ] = bufPart [ i ] [ j ] = 0 ;
2007-11-15 23:09:40 +00:00
2007-11-16 01:25:00 +00:00
/* Full sort key */
2007-11-15 23:09:40 +00:00
ucol_getSortKey ( ucol , data , dataLen , bufFull [ i ] , TSKC_BUF_SIZE ) ;
2007-11-16 01:25:00 +00:00
/* Partial sort key */
2007-11-15 23:09:40 +00:00
uiter_setString ( & uiter , data , dataLen ) ;
ucol_nextSortKeyPart ( ucol , & uiter , state , bufPart [ i ] , TSKC_BUF_SIZE , & icuRC ) ;
if ( U_FAILURE ( icuRC ) )
2007-12-26 18:50:17 +00:00
{
log_err ( " ucol_nextSortKeyPart failed \n " ) ;
ucol_close ( ucol ) ;
return ;
}
2007-11-15 23:09:40 +00:00
2007-12-26 18:50:17 +00:00
for ( i2 = 0 ; i2 < i ; i2 + + )
{
UBool fullMatch = TRUE ;
UBool partMatch = TRUE ;
for ( j = 0 ; j < TSKC_BUF_SIZE ; j + + )
{
fullMatch = fullMatch & & ( bufFull [ i ] [ j ] ! = bufFull [ i2 ] [ j ] ) ;
partMatch = partMatch & & ( bufPart [ i ] [ j ] ! = bufPart [ i2 ] [ j ] ) ;
}
if ( fullMatch ! = partMatch ) {
log_err ( fullMatch ? " full key was consistent, but partial key changed \n "
: " partial key was consistent, but full key changed \n " ) ;
ucol_close ( ucol ) ;
return ;
}
}
2007-11-15 23:09:40 +00:00
}
2007-11-16 01:25:00 +00:00
/*=============================================*/
2007-11-15 23:09:40 +00:00
ucol_close ( ucol ) ;
}
2007-12-21 00:08:12 +00:00
/* ticket: 6101 */
static void TestCroatianSortKey ( void ) {
2007-12-26 18:50:17 +00:00
const char * collString = " LHR_AN_CX_EX_FX_HX_NX_S3 " ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * ucol ;
UCharIterator iter ;
static const UChar text [ ] = { 0x0044 , 0xD81A } ;
size_t length = sizeof ( text ) / sizeof ( * text ) ;
uint8_t textSortKey [ 32 ] ;
size_t lenSortKey = 32 ;
size_t actualSortKeyLen ;
uint32_t uStateInfo [ 2 ] = { 0 , 0 } ;
ucol = ucol_openFromShortString ( collString , FALSE , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " ucol_openFromShortString error in Craotian test. -> %s \n " , u_errorName ( status ) ) ;
2007-12-26 18:50:17 +00:00
return ;
}
uiter_setString ( & iter , text , length ) ;
actualSortKeyLen = ucol_nextSortKeyPart (
ucol , & iter , ( uint32_t * ) uStateInfo ,
textSortKey , lenSortKey , & status
) ;
if ( actualSortKeyLen = = lenSortKey ) {
log_err ( " ucol_nextSortKeyPart did not give correct result in Croatian test. \n " ) ;
}
ucol_close ( ucol ) ;
2007-12-21 00:08:12 +00:00
}
2008-04-17 05:19:19 +00:00
/* ticket: 6140 */
2008-04-17 16:55:43 +00:00
/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
2009-04-23 00:23:57 +00:00
* they are both Hiragana and Katakana
2008-04-17 16:55:43 +00:00
*/
# define SORTKEYLEN 50
2008-04-17 05:19:19 +00:00
static void TestHiragana ( void ) {
UErrorCode status = U_ZERO_ERROR ;
UCollator * ucol ;
2008-04-17 16:55:43 +00:00
UCollationResult strcollresult ;
2008-04-17 05:19:19 +00:00
UChar data1 [ ] = { 0x3058 , 0x30B8 } ; /* Hiragana and Katakana letter Zi */
UChar data2 [ ] = { 0x3057 , 0x3099 , 0x30B7 , 0x3099 } ;
int32_t data1Len = sizeof ( data1 ) / sizeof ( * data1 ) ;
int32_t data2Len = sizeof ( data2 ) / sizeof ( * data2 ) ;
2008-04-17 16:55:43 +00:00
int32_t i , j ;
uint8_t sortKey1 [ SORTKEYLEN ] ;
uint8_t sortKey2 [ SORTKEYLEN ] ;
2008-04-17 05:19:19 +00:00
UCharIterator uiter1 ;
UCharIterator uiter2 ;
uint32_t state1 [ 2 ] = { 0 , 0 } ;
uint32_t state2 [ 2 ] = { 0 , 0 } ;
int32_t keySize1 ;
int32_t keySize2 ;
ucol = ucol_openFromShortString ( " LJA_AN_CX_EX_FX_HO_NX_S4 " , FALSE , NULL ,
& status ) ;
if ( U_FAILURE ( status ) ) {
2009-06-09 21:28:13 +00:00
log_err_status ( status , " Error status: %s; Unable to open collator from short string. \n " , u_errorName ( status ) ) ;
2008-04-17 05:19:19 +00:00
return ;
}
2007-11-15 23:09:40 +00:00
2008-04-17 05:19:19 +00:00
/* Start of full sort keys */
2008-04-17 17:58:36 +00:00
/* Full sort key1 */
2008-04-17 16:55:43 +00:00
keySize1 = ucol_getSortKey ( ucol , data1 , data1Len , sortKey1 , SORTKEYLEN ) ;
2008-04-17 17:58:36 +00:00
/* Full sort key2 */
2008-04-17 16:55:43 +00:00
keySize2 = ucol_getSortKey ( ucol , data2 , data2Len , sortKey2 , SORTKEYLEN ) ;
2008-04-17 05:19:19 +00:00
if ( keySize1 = = keySize2 ) {
for ( i = 0 ; i < keySize1 ; i + + ) {
if ( sortKey1 [ i ] ! = sortKey2 [ i ] ) {
log_err ( " Full sort keys are different. Should be equal. " ) ;
}
}
} else {
2008-04-17 16:55:43 +00:00
log_err ( " Full sort keys sizes doesn't match: %d %d " , keySize1 , keySize2 ) ;
2008-04-17 05:19:19 +00:00
}
/* End of full sort keys */
/* Start of partial sort keys */
2008-04-17 17:58:36 +00:00
/* Partial sort key1 */
2008-04-17 05:19:19 +00:00
uiter_setString ( & uiter1 , data1 , data1Len ) ;
2008-04-17 16:55:43 +00:00
keySize1 = ucol_nextSortKeyPart ( ucol , & uiter1 , state1 , sortKey1 , SORTKEYLEN , & status ) ;
2008-04-17 17:58:36 +00:00
/* Partial sort key2 */
2008-04-17 05:19:19 +00:00
uiter_setString ( & uiter2 , data2 , data2Len ) ;
2008-04-17 16:55:43 +00:00
keySize2 = ucol_nextSortKeyPart ( ucol , & uiter2 , state2 , sortKey2 , SORTKEYLEN , & status ) ;
if ( U_SUCCESS ( status ) & & keySize1 = = keySize2 ) {
2008-04-17 05:19:19 +00:00
for ( j = 0 ; j < keySize1 ; j + + ) {
if ( sortKey1 [ j ] ! = sortKey2 [ j ] ) {
log_err ( " Partial sort keys are different. Should be equal " ) ;
}
}
} else {
2008-04-17 16:55:43 +00:00
log_err ( " Error Status: %s or Partial sort keys sizes doesn't match: %d %d " , u_errorName ( status ) , keySize1 , keySize2 ) ;
2008-04-17 05:19:19 +00:00
}
/* End of partial sort keys */
/* Start of strcoll */
2008-04-17 17:58:36 +00:00
/* Use ucol_strcoll() to determine ordering */
2008-04-17 05:19:19 +00:00
strcollresult = ucol_strcoll ( ucol , data1 , data1Len , data2 , data2Len ) ;
if ( strcollresult ! = UCOL_EQUAL ) {
log_err ( " Result from ucol_strcoll() should be UCOL_EQUAL. " ) ;
}
2009-04-23 00:23:57 +00:00
2008-04-17 05:19:19 +00:00
ucol_close ( ucol ) ;
}
2008-04-17 16:55:43 +00:00
2010-06-14 21:27:44 +00:00
/* Convenient struct for running collation tests */
typedef struct {
const UChar source [ MAX_TOKEN_LEN ] ; /* String on left */
const UChar target [ MAX_TOKEN_LEN ] ; /* String on right */
UCollationResult result ; /* -1, 0 or +1, depending on collation */
} OneTestCase ;
/*
* Utility function to test one collation test case .
* @ param testcases Array of test cases .
* @ param n_testcases Size of the array testcases .
* @ param str_rules Array of rules . These rules should be specifying the same rule in different formats .
* @ param n_rules Size of the array str_rules .
*/
static void doTestOneTestCase ( const OneTestCase testcases [ ] ,
int n_testcases ,
const char * str_rules [ ] ,
int n_rules )
{
int rule_no , testcase_no ;
UChar rule [ 500 ] ;
int32_t length = 0 ;
UErrorCode status = U_ZERO_ERROR ;
UParseError parse_error ;
UCollator * myCollation ;
for ( rule_no = 0 ; rule_no < n_rules ; + + rule_no ) {
length = u_unescape ( str_rules [ rule_no ] , rule , 500 ) ;
if ( length = = 0 ) {
log_err ( " ERROR: The rule cannot be unescaped: %s \n " ) ;
return ;
}
myCollation = ucol_openRules ( rule , length , UCOL_ON , UCOL_TERTIARY , & parse_error , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
log_verbose ( " Testing the <<* syntax \n " ) ;
ucol_setAttribute ( myCollation , UCOL_NORMALIZATION_MODE , UCOL_ON , & status ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
for ( testcase_no = 0 ; testcase_no < n_testcases ; + + testcase_no ) {
doTest ( myCollation ,
testcases [ testcase_no ] . source ,
testcases [ testcase_no ] . target ,
testcases [ testcase_no ] . result
) ;
}
ucol_close ( myCollation ) ;
}
}
const static OneTestCase rangeTestcases [ ] = {
{ { 0x0061 } , { 0x0062 } , UCOL_LESS } , /* "a" < "b" */
{ { 0x0062 } , { 0x0063 } , UCOL_LESS } , /* "b" < "c" */
{ { 0x0061 } , { 0x0063 } , UCOL_LESS } , /* "a" < "c" */
{ { 0x0062 } , { 0x006b } , UCOL_LESS } , /* "b" << "k" */
{ { 0x006b } , { 0x006c } , UCOL_LESS } , /* "k" << "l" */
{ { 0x0062 } , { 0x006c } , UCOL_LESS } , /* "b" << "l" */
{ { 0x0061 } , { 0x006c } , UCOL_LESS } , /* "a" < "l" */
{ { 0x0061 } , { 0x006d } , UCOL_LESS } , /* "a" < "m" */
{ { 0x0079 } , { 0x006d } , UCOL_LESS } , /* "y" < "f" */
{ { 0x0079 } , { 0x0067 } , UCOL_LESS } , /* "y" < "g" */
{ { 0x0061 } , { 0x0068 } , UCOL_LESS } , /* "y" < "h" */
{ { 0x0061 } , { 0x0065 } , UCOL_LESS } , /* "g" < "e" */
{ { 0x0061 } , { 0x0031 } , UCOL_EQUAL } , /* "a" = "1" */
{ { 0x0061 } , { 0x0032 } , UCOL_EQUAL } , /* "a" = "2" */
{ { 0x0061 } , { 0x0033 } , UCOL_EQUAL } , /* "a" = "3" */
{ { 0x0061 } , { 0x0066 } , UCOL_LESS } , /* "a" < "f" */
{ { 0x006c , 0x0061 } , { 0x006b , 0x0062 } , UCOL_LESS } , /* "la" < "123" */
{ { 0x0061 , 0x0061 , 0x0061 } , { 0x0031 , 0x0032 , 0x0033 } , UCOL_EQUAL } , /* "aaa" = "123" */
{ { 0x0062 } , { 0x007a } , UCOL_LESS } , /* "b" < "z" */
{ { 0x0061 , 0x007a , 0x0062 } , { 0x0032 , 0x0079 , 0x006d } , UCOL_LESS } , /* "azm" = "2yc" */
2010-02-09 19:59:06 +00:00
} ;
2010-06-14 21:27:44 +00:00
static int nRangeTestcases = LEN ( rangeTestcases ) ;
const static OneTestCase rangeTestcasesSupplemental [ ] = {
{ { 0xfffe } , { 0xffff } , UCOL_LESS } , /* U+FFFE < U+FFFF */
{ { 0xffff } , { 0xd800 , 0xdc00 } , UCOL_LESS } , /* U+FFFF < U+10000 */
{ { 0xd800 , 0xdc00 } , { 0xd800 , 0xdc01 } , UCOL_LESS } , /* U+10000 < U+10001 */
{ { 0xfffe } , { 0xd800 , 0xdc01 } , UCOL_LESS } , /* U+FFFE < U+10001 */
{ { 0xd800 , 0xdc01 } , { 0xd800 , 0xdc02 } , UCOL_LESS } , /* U+10000 < U+10001 */
{ { 0xd800 , 0xdc01 } , { 0xd800 , 0xdc02 } , UCOL_LESS } , /* U+10000 < U+10001 */
{ { 0xfffe } , { 0xd800 , 0xdc02 } , UCOL_LESS } , /* U+FFFE < U+10001 */
2010-02-09 19:59:06 +00:00
} ;
2010-06-14 21:27:44 +00:00
static int nRangeTestcasesSupplemental = LEN ( rangeTestcasesSupplemental ) ;
const static OneTestCase rangeTestcasesQwerty [ ] = {
{ { 0x0071 } , { 0x0077 } , UCOL_LESS } , /* "q" < "w" */
{ { 0x0077 } , { 0x0065 } , UCOL_LESS } , /* "w" < "e" */
{ { 0x0079 } , { 0x0075 } , UCOL_LESS } , /* "y" < "u" */
{ { 0x0071 } , { 0x0075 } , UCOL_LESS } , /* "q" << "u" */
{ { 0x0074 } , { 0x0069 } , UCOL_LESS } , /* "t" << "i" */
{ { 0x006f } , { 0x0070 } , UCOL_LESS } , /* "o" << "p" */
{ { 0x0079 } , { 0x0065 } , UCOL_LESS } , /* "y" < "e" */
{ { 0x0069 } , { 0x0075 } , UCOL_LESS } , /* "i" < "u" */
{ { 0x0071 , 0x0075 , 0x0065 , 0x0073 , 0x0074 } ,
{ 0x0077 , 0x0065 , 0x0072 , 0x0065 } , UCOL_LESS } , /* "quest" < "were" */
{ { 0x0071 , 0x0075 , 0x0061 , 0x0063 , 0x006b } ,
{ 0x0071 , 0x0075 , 0x0065 , 0x0073 , 0x0074 } , UCOL_LESS } , /* "quack" < "quest" */
2010-02-09 19:59:06 +00:00
} ;
2010-06-14 21:27:44 +00:00
static int nRangeTestcasesQwerty = LEN ( rangeTestcasesQwerty ) ;
2010-02-09 19:59:06 +00:00
static void TestSameStrengthList ( void )
{
2010-06-14 21:27:44 +00:00
const char * strRules [ ] = {
/* Normal */
" &a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3 " ,
2010-02-09 19:59:06 +00:00
2010-06-14 21:27:44 +00:00
/* Lists */
" &a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123 " ,
} ;
doTestOneTestCase ( rangeTestcases , nRangeTestcases , strRules , LEN ( strRules ) ) ;
}
2010-02-09 19:59:06 +00:00
2010-06-14 21:27:44 +00:00
static void TestSameStrengthListQuoted ( void )
{
const char * strRules [ ] = {
/* Lists with quoted characters */
2010-10-14 18:44:44 +00:00
" & \\ u0061<*bcd &b<<*klm &k<<<*xyz &y<*f \\ u0067 \\ u0068e &a=*123 " ,
2010-06-14 21:27:44 +00:00
" &' \\ u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f' \\ u0067 \\ u0068'e &a=*123 " ,
2010-10-14 18:44:44 +00:00
" & \\ u0061<*b \\ u0063d &b<<*klm &k<<<*xyz & \\ u0079<*fgh \\ u0065 &a=* \\ u0031 \\ u0032 \\ u0033 " ,
2010-06-14 21:27:44 +00:00
" &' \\ u0061'<*b' \\ u0063'd &b<<*klm &k<<<*xyz &' \\ u0079'<*fgh' \\ u0065' &a=*' \\ u0031 \\ u0032 \\ u0033' " ,
2010-10-14 18:44:44 +00:00
" & \\ u0061<* \\ u0062c \\ u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=* \\ u0031 \\ u0032 \\ u0033 " ,
2010-06-14 21:27:44 +00:00
" &' \\ u0061'<*' \\ u0062'c' \\ u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*' \\ u0031 \\ u0032 \\ u0033' " ,
} ;
doTestOneTestCase ( rangeTestcases , nRangeTestcases , strRules , LEN ( strRules ) ) ;
}
static void TestSameStrengthListSupplemental ( void )
{
const char * strRules [ ] = {
" & \\ ufffe< \\ uffff< \\ U00010000< \\ U00010001< \\ U00010002 " ,
" & \\ ufffe< \\ uffff< \\ ud800 \\ udc00< \\ ud800 \\ udc01< \\ ud800 \\ udc02 " ,
" & \\ ufffe<* \\ uffff \\ U00010000 \\ U00010001 \\ U00010002 " ,
" & \\ ufffe<* \\ uffff \\ ud800 \\ udc00 \\ ud800 \\ udc01 \\ ud800 \\ udc02 " ,
} ;
doTestOneTestCase ( rangeTestcasesSupplemental , nRangeTestcasesSupplemental , strRules , LEN ( strRules ) ) ;
}
static void TestSameStrengthListQwerty ( void )
{
const char * strRules [ ] = {
" &q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d " , /* Normal */
" &q<*wer &w<<*tyu &t<<<*iop &o=*asd " , /* Lists */
2010-10-14 18:44:44 +00:00
" & \\ u0071< \\ u0077< \\ u0065< \\ u0072 & \\ u0077<< \\ u0074<< \\ u0079<< \\ u0075 & \\ u0074<<< \\ u0069<<< \\ u006f<<< \\ u0070 & \\ u006f= \\ u0061= \\ u0073= \\ u0064 " ,
" &' \\ u0071'< \\ u0077< \\ u0065< \\ u0072 & \\ u0077<<' \\ u0074'<< \\ u0079<< \\ u0075 & \\ u0074<<< \\ u0069<<<' \\ u006f'<<< \\ u0070 & \\ u006f= \\ u0061=' \\ u0073'= \\ u0064 " ,
" & \\ u0071<* \\ u0077 \\ u0065 \\ u0072 & \\ u0077<<* \\ u0074 \\ u0079 \\ u0075 & \\ u0074<<<* \\ u0069 \\ u006f \\ u0070 & \\ u006f=* \\ u0061 \\ u0073 \\ u0064 " ,
2010-10-14 20:25:09 +00:00
/* Quoted characters also will work if two quoted characters are not consecutive. */
2010-10-14 18:44:44 +00:00
" & \\ u0071<*' \\ u0077' \\ u0065 \\ u0072 & \\ u0077<<* \\ u0074' \\ u0079' \\ u0075 & \\ u0074<<<* \\ u0069 \\ u006f' \\ u0070' &' \\ u006f'=* \\ u0061 \\ u0073 \\ u0064 " ,
/* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
2010-10-14 20:25:09 +00:00
/* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
2010-10-14 18:44:44 +00:00
} ;
doTestOneTestCase ( rangeTestcasesQwerty , nRangeTestcasesQwerty , strRules , LEN ( strRules ) ) ;
}
static void TestSameStrengthListQuotedQwerty ( void )
{
const char * strRules [ ] = {
" &q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d " , /* Normal */
" &q<*wer &w<<*tyu &t<<<*iop &o=*asd " , /* Lists */
2010-10-14 20:25:09 +00:00
" &q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd' " , /* Lists with quotes */
/* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
/* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
2010-10-14 18:44:44 +00:00
} ;
2010-06-14 21:27:44 +00:00
doTestOneTestCase ( rangeTestcasesQwerty , nRangeTestcasesQwerty , strRules , LEN ( strRules ) ) ;
}
static void TestSameStrengthListRanges ( void )
{
const char * strRules [ ] = {
" &a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3 " ,
} ;
doTestOneTestCase ( rangeTestcases , nRangeTestcases , strRules , LEN ( strRules ) ) ;
}
static void TestSameStrengthListSupplementalRanges ( void )
{
const char * strRules [ ] = {
" & \\ ufffe<* \\ uffff- \\ U00010002 " ,
} ;
doTestOneTestCase ( rangeTestcasesSupplemental , nRangeTestcasesSupplemental , strRules , LEN ( strRules ) ) ;
}
static void TestSpecialCharacters ( void )
{
const char * strRules [ ] = {
/* Normal */
" &';'<'+'<','<'-'<'&'<'*' " ,
/* List */
" &';'<*'+,-&*' " ,
/* Range */
" &';'<*'+'-'-&*' " ,
} ;
const static OneTestCase specialCharacterStrings [ ] = {
{ { 0x003b } , { 0x002b } , UCOL_LESS } , /* ; < + */
{ { 0x002b } , { 0x002c } , UCOL_LESS } , /* + < , */
{ { 0x002c } , { 0x002d } , UCOL_LESS } , /* , < - */
{ { 0x002d } , { 0x0026 } , UCOL_LESS } , /* - < & */
} ;
doTestOneTestCase ( specialCharacterStrings , LEN ( specialCharacterStrings ) , strRules , LEN ( strRules ) ) ;
}
2010-10-14 18:44:44 +00:00
static void TestPrivateUseCharacters ( void )
{
const char * strRules [ ] = {
/* Normal */
" &' \\ u5ea7'<' \\ uE2D8'<' \\ uE2D9'<' \\ uE2DA'<' \\ uE2DB'<' \\ uE2DC'<' \\ u4e8d' " ,
" & \\ u5ea7< \\ uE2D8< \\ uE2D9< \\ uE2DA< \\ uE2DB< \\ uE2DC< \\ u4e8d " ,
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x5ea7 } , { 0xe2d8 } , UCOL_LESS } ,
{ { 0xe2d8 } , { 0xe2d9 } , UCOL_LESS } ,
{ { 0xe2d9 } , { 0xe2da } , UCOL_LESS } ,
{ { 0xe2da } , { 0xe2db } , UCOL_LESS } ,
{ { 0xe2db } , { 0xe2dc } , UCOL_LESS } ,
{ { 0xe2dc } , { 0x4e8d } , UCOL_LESS } ,
} ;
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
}
static void TestPrivateUseCharactersInList ( void )
{
const char * strRules [ ] = {
/* List */
" &' \\ u5ea7'<*' \\ uE2D8 \\ uE2D9 \\ uE2DA \\ uE2DB \\ uE2DC \\ u4e8d' " ,
/* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
" & \\ u5ea7<* \\ uE2D8 \\ uE2D9 \\ uE2DA \\ uE2DB \\ uE2DC \\ u4e8d " ,
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x5ea7 } , { 0xe2d8 } , UCOL_LESS } ,
{ { 0xe2d8 } , { 0xe2d9 } , UCOL_LESS } ,
{ { 0xe2d9 } , { 0xe2da } , UCOL_LESS } ,
{ { 0xe2da } , { 0xe2db } , UCOL_LESS } ,
{ { 0xe2db } , { 0xe2dc } , UCOL_LESS } ,
{ { 0xe2dc } , { 0x4e8d } , UCOL_LESS } ,
} ;
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
}
static void TestPrivateUseCharactersInRange ( void )
{
const char * strRules [ ] = {
/* Range */
" &' \\ u5ea7'<*' \\ uE2D8'-' \\ uE2DC \\ u4e8d' " ,
" & \\ u5ea7<* \\ uE2D8- \\ uE2DC \\ u4e8d " ,
/* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x5ea7 } , { 0xe2d8 } , UCOL_LESS } ,
{ { 0xe2d8 } , { 0xe2d9 } , UCOL_LESS } ,
{ { 0xe2d9 } , { 0xe2da } , UCOL_LESS } ,
{ { 0xe2da } , { 0xe2db } , UCOL_LESS } ,
{ { 0xe2db } , { 0xe2dc } , UCOL_LESS } ,
{ { 0xe2dc } , { 0x4e8d } , UCOL_LESS } ,
} ;
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
}
2010-06-14 21:27:44 +00:00
static void TestInvalidListsAndRanges ( void )
{
const char * invalidRules [ ] = {
/* Range not in starred expression */
" & \\ ufffe< \\ uffff- \\ U00010002 " ,
/* Range without start */
" &a<*-c " ,
/* Range without end */
" &a<*b- " ,
/* More than one hyphen */
" &a<*b-g-l " ,
/* Range in the wrong order */
" &a<*k-b " ,
} ;
UChar rule [ 500 ] ;
UErrorCode status = U_ZERO_ERROR ;
UParseError parse_error ;
int n_rules = LEN ( invalidRules ) ;
int rule_no ;
int length ;
UCollator * myCollation ;
for ( rule_no = 0 ; rule_no < n_rules ; + + rule_no ) {
length = u_unescape ( invalidRules [ rule_no ] , rule , 500 ) ;
if ( length = = 0 ) {
log_err ( " ERROR: The rule cannot be unescaped: %s \n " ) ;
2010-02-09 19:59:06 +00:00
return ;
}
2010-06-14 21:27:44 +00:00
myCollation = ucol_openRules ( rule , length , UCOL_ON , UCOL_TERTIARY , & parse_error , & status ) ;
if ( ! U_FAILURE ( status ) ) {
log_err ( " ERROR: Could not cause a failure as expected: \n " ) ;
2010-02-09 19:59:06 +00:00
}
2010-06-14 21:27:44 +00:00
status = U_ZERO_ERROR ;
}
2010-02-09 19:59:06 +00:00
}
2010-10-27 18:02:52 +00:00
/*
* This test ensures that characters placed before a character in a different script have the same lead byte
* in their collation key before and after script reordering .
*/
static void TestBeforeRuleWithScriptReordering ( void )
{
UParseError error ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * myCollation ;
char srules [ 500 ] = " &[before 1] \\ u03b1 < \\ u0e01 " ;
UChar rules [ 500 ] ;
uint32_t rulesLength = 0 ;
2010-11-03 02:41:22 +00:00
int32_t reorderCodes [ 1 ] = { USCRIPT_GREEK } ;
2010-10-30 00:42:12 +00:00
UCollationResult collResult ;
uint8_t baseKey [ 256 ] ;
uint32_t baseKeyLength ;
uint8_t beforeKey [ 256 ] ;
uint32_t beforeKeyLength ;
2010-10-27 18:02:52 +00:00
UChar base [ ] = { 0x03b1 } ; /* base */
int32_t baseLen = sizeof ( base ) / sizeof ( * base ) ;
UChar before [ ] = { 0x0e01 } ; /* ko kai */
int32_t beforeLen = sizeof ( before ) / sizeof ( * before ) ;
2010-10-30 00:42:12 +00:00
/*UChar *data[] = { before, base };
genericRulesStarter ( srules , data , 2 ) ; */
2010-11-02 02:21:57 +00:00
log_verbose ( " Testing the &[before 1] rule with [reorder grek] \n " ) ;
2010-10-30 00:42:12 +00:00
2010-10-28 20:28:40 +00:00
2010-10-30 00:42:12 +00:00
/* build collator */
2010-11-01 22:23:49 +00:00
log_verbose ( " Testing the &[before 1] rule with [scriptReorder grek] \n " ) ;
2010-10-27 18:02:52 +00:00
rulesLength = u_unescape ( srules , rules , LEN ( rules ) ) ;
myCollation = ucol_openRules ( rules , rulesLength , UCOL_ON , UCOL_TERTIARY , & error , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
2010-10-30 00:42:12 +00:00
/* check collation results - before rule applied but not script reordering */
2010-10-28 20:28:40 +00:00
collResult = ucol_strcoll ( myCollation , base , baseLen , before , beforeLen ) ;
2010-10-30 00:42:12 +00:00
if ( collResult ! = UCOL_GREATER ) {
log_err ( " Collation result not correct before script reordering = %d \n " , collResult ) ;
}
2010-10-27 18:02:52 +00:00
2010-10-30 00:42:12 +00:00
/* check the lead byte of the collation keys before script reordering */
2010-10-28 20:28:40 +00:00
baseKeyLength = ucol_getSortKey ( myCollation , base , baseLen , baseKey , 256 ) ;
beforeKeyLength = ucol_getSortKey ( myCollation , before , beforeLen , beforeKey , 256 ) ;
2010-10-27 18:02:52 +00:00
if ( baseKey [ 0 ] ! = beforeKey [ 0 ] ) {
log_err ( " Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x \n " , baseKey [ 0 ] , beforeKey [ 0 ] ) ;
}
2010-10-30 00:42:12 +00:00
/* reorder the scripts */
2010-11-03 02:41:22 +00:00
ucol_setReorderCodes ( myCollation , reorderCodes , 1 , & status ) ;
2010-11-02 02:21:57 +00:00
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: while setting script order: %s \n " , myErrorName ( status ) ) ;
return ;
}
2010-10-27 18:02:52 +00:00
2010-10-30 00:42:12 +00:00
/* check collation results - before rule applied and after script reordering */
2010-10-27 18:02:52 +00:00
collResult = ucol_strcoll ( myCollation , base , baseLen , before , beforeLen ) ;
2010-10-30 00:42:12 +00:00
if ( collResult ! = UCOL_GREATER ) {
log_err ( " Collation result not correct after script reordering = %d \n " , collResult ) ;
}
/* check the lead byte of the collation keys after script reordering */
2010-10-27 18:02:52 +00:00
ucol_getSortKey ( myCollation , base , baseLen , baseKey , 256 ) ;
ucol_getSortKey ( myCollation , before , beforeLen , beforeKey , 256 ) ;
if ( baseKey [ 0 ] ! = beforeKey [ 0 ] ) {
2010-10-30 00:42:12 +00:00
log_err ( " Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x \n " , baseKey [ 0 ] , beforeKey [ 0 ] ) ;
2010-10-27 18:02:52 +00:00
}
ucol_close ( myCollation ) ;
}
2010-11-04 20:12:39 +00:00
/*
2010-11-05 18:43:45 +00:00
* Test that in a primary - compressed sort key all bytes except the first one are unchanged under script reordering .
2010-11-04 20:12:39 +00:00
*/
static void TestNonLeadBytesDuringCollationReordering ( void )
{
UParseError error ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * myCollation ;
int32_t reorderCodes [ 1 ] = { USCRIPT_GREEK } ;
UCollationResult collResult ;
uint8_t baseKey [ 256 ] ;
uint32_t baseKeyLength ;
uint8_t reorderKey [ 256 ] ;
uint32_t reorderKeyLength ;
UChar testString [ ] = { 0x03b1 , 0x03b2 , 0x03b3 } ;
int i ;
log_verbose ( " Testing non-lead bytes in a sort key with and without reordering \n " ) ;
/* build collator tertiary */
myCollation = ucol_open ( " " , & status ) ;
ucol_setStrength ( myCollation , UCOL_TERTIARY ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
baseKeyLength = ucol_getSortKey ( myCollation , testString , LEN ( testString ) , baseKey , 256 ) ;
ucol_setReorderCodes ( myCollation , reorderCodes , LEN ( reorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: setting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
reorderKeyLength = ucol_getSortKey ( myCollation , testString , LEN ( testString ) , reorderKey , 256 ) ;
if ( baseKeyLength ! = reorderKeyLength ) {
log_err ( " Key lengths not the same during reordering. \n " , collResult ) ;
return ;
}
for ( i = 1 ; i < baseKeyLength ; i + + ) {
if ( baseKey [ i ] ! = reorderKey [ i ] ) {
log_err ( " Collation key bytes not the same at position %d. \n " , i ) ;
return ;
}
}
ucol_close ( myCollation ) ;
/* build collator quaternary */
myCollation = ucol_open ( " " , & status ) ;
ucol_setStrength ( myCollation , UCOL_QUATERNARY ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
baseKeyLength = ucol_getSortKey ( myCollation , testString , LEN ( testString ) , baseKey , 256 ) ;
ucol_setReorderCodes ( myCollation , reorderCodes , LEN ( reorderCodes ) , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: setting reorder codes: %s \n " , myErrorName ( status ) ) ;
return ;
}
reorderKeyLength = ucol_getSortKey ( myCollation , testString , LEN ( testString ) , reorderKey , 256 ) ;
if ( baseKeyLength ! = reorderKeyLength ) {
log_err ( " Key lengths not the same during reordering. \n " , collResult ) ;
return ;
}
for ( i = 1 ; i < baseKeyLength ; i + + ) {
if ( baseKey [ i ] ! = reorderKey [ i ] ) {
log_err ( " Collation key bytes not the same at position %d. \n " , i ) ;
return ;
}
}
ucol_close ( myCollation ) ;
}
2010-10-30 00:42:12 +00:00
/*
* Utility function to test one collation reordering test case .
* @ param testcases Array of test cases .
* @ param n_testcases Size of the array testcases .
* @ param str_rules Array of rules . These rules should be specifying the same rule in different formats .
* @ param n_rules Size of the array str_rules .
*/
2010-11-04 20:12:39 +00:00
static void doTestOneReorderingAPITestCase ( const OneTestCase testCases [ ] , uint32_t testCasesLen , const int32_t reorderTokens [ ] , int32_t reorderTokensLen )
2010-10-30 00:42:12 +00:00
{
int testCaseNum ;
UErrorCode status = U_ZERO_ERROR ;
UCollator * myCollation ;
2010-11-04 20:12:39 +00:00
int i ;
2010-10-30 00:42:12 +00:00
for ( testCaseNum = 0 ; testCaseNum < testCasesLen ; + + testCaseNum ) {
myCollation = ucol_open ( " " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
2010-11-03 02:41:22 +00:00
ucol_setReorderCodes ( myCollation , reorderTokens , reorderTokensLen , & status ) ;
2010-11-02 02:21:57 +00:00
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: while setting script order: %s \n " , myErrorName ( status ) ) ;
return ;
}
2010-10-30 00:42:12 +00:00
for ( testCaseNum = 0 ; testCaseNum < testCasesLen ; + + testCaseNum ) {
doTest ( myCollation ,
testCases [ testCaseNum ] . source ,
testCases [ testCaseNum ] . target ,
testCases [ testCaseNum ] . result
) ;
}
ucol_close ( myCollation ) ;
}
}
2010-10-27 18:02:52 +00:00
static void TestGreekFirstReorder ( void )
{
2010-10-30 00:42:12 +00:00
const char * strRules [ ] = {
2010-11-02 02:21:57 +00:00
" [reorder Grek] "
2010-10-30 00:42:12 +00:00
} ;
2010-10-27 18:02:52 +00:00
2010-10-30 00:42:12 +00:00
const int32_t apiRules [ ] = {
USCRIPT_GREEK
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x0391 } , { 0x0391 } , UCOL_EQUAL } ,
{ { 0x0041 } , { 0x0391 } , UCOL_GREATER } ,
{ { 0x03B1 , 0x0041 } , { 0x03B1 , 0x0391 } , UCOL_GREATER } ,
{ { 0x0060 } , { 0x0391 } , UCOL_LESS } ,
{ { 0x0391 } , { 0xe2dc } , UCOL_LESS } ,
{ { 0x0391 } , { 0x0060 } , UCOL_GREATER } ,
} ;
/* Test rules creation */
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
/* Test collation reordering API */
doTestOneReorderingAPITestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , apiRules , LEN ( apiRules ) ) ;
2010-10-27 18:02:52 +00:00
}
static void TestGreekLastReorder ( void )
{
2010-10-30 00:42:12 +00:00
const char * strRules [ ] = {
2010-11-02 02:21:57 +00:00
" [reorder Zzzz Grek] "
2010-10-30 00:42:12 +00:00
} ;
2010-10-27 18:02:52 +00:00
2010-10-30 00:42:12 +00:00
const int32_t apiRules [ ] = {
USCRIPT_UNKNOWN , USCRIPT_GREEK
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x0391 } , { 0x0391 } , UCOL_EQUAL } ,
{ { 0x0041 } , { 0x0391 } , UCOL_LESS } ,
{ { 0x03B1 , 0x0041 } , { 0x03B1 , 0x0391 } , UCOL_LESS } ,
{ { 0x0060 } , { 0x0391 } , UCOL_LESS } ,
{ { 0x0391 } , { 0xe2dc } , UCOL_GREATER } ,
} ;
/* Test rules creation */
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
/* Test collation reordering API */
doTestOneReorderingAPITestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , apiRules , LEN ( apiRules ) ) ;
2010-10-27 18:02:52 +00:00
}
static void TestNonScriptReorder ( void )
{
2010-10-30 00:42:12 +00:00
const char * strRules [ ] = {
2010-11-02 02:21:57 +00:00
" [reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy] "
2010-10-30 00:42:12 +00:00
} ;
2010-10-27 18:02:52 +00:00
2010-10-30 00:42:12 +00:00
const int32_t apiRules [ ] = {
2010-11-04 20:12:39 +00:00
USCRIPT_GREEK , UCOL_REORDER_CODE_SYMBOL , UCOL_REORDER_CODE_DIGIT , USCRIPT_LATIN ,
UCOL_REORDER_CODE_PUNCTUATION , UCOL_REORDER_CODE_SPACE , USCRIPT_UNKNOWN ,
UCOL_REORDER_CODE_CURRENCY
2010-10-30 00:42:12 +00:00
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x0391 } , { 0x0041 } , UCOL_LESS } ,
{ { 0x0041 } , { 0x0391 } , UCOL_GREATER } ,
{ { 0x0060 } , { 0x0041 } , UCOL_LESS } ,
{ { 0x0060 } , { 0x0391 } , UCOL_GREATER } ,
{ { 0x0024 } , { 0x0041 } , UCOL_GREATER } ,
} ;
/* Test rules creation */
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
/* Test collation reordering API */
doTestOneReorderingAPITestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , apiRules , LEN ( apiRules ) ) ;
}
static void TestHaniReorder ( void )
{
const char * strRules [ ] = {
2010-11-02 02:21:57 +00:00
" [reorder Hani] "
2010-10-30 00:42:12 +00:00
} ;
const int32_t apiRules [ ] = {
USCRIPT_HAN
} ;
const static OneTestCase privateUseCharacterStrings [ ] = {
{ { 0x4e00 } , { 0x0041 } , UCOL_LESS } ,
{ { 0x4e00 } , { 0x0060 } , UCOL_GREATER } ,
{ { 0xD86D , 0xDF40 } , { 0x0041 } , UCOL_LESS } ,
{ { 0xD86D , 0xDF40 } , { 0x0060 } , UCOL_GREATER } ,
{ { 0x4e00 } , { 0xD86D , 0xDF40 } , UCOL_LESS } ,
{ { 0xfa27 } , { 0x0041 } , UCOL_LESS } ,
{ { 0xD869 , 0xDF00 } , { 0x0041 } , UCOL_LESS } ,
} ;
/* Test rules creation */
doTestOneTestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , strRules , LEN ( strRules ) ) ;
/* Test collation reordering API */
doTestOneReorderingAPITestCase ( privateUseCharacterStrings , LEN ( privateUseCharacterStrings ) , apiRules , LEN ( apiRules ) ) ;
2010-10-27 18:02:52 +00:00
}
2010-11-01 22:23:49 +00:00
static int compare_uint8_t_arrays ( const uint8_t * a , const uint8_t * b )
{
for ( ; * a = = * b ; + + a , + + b ) {
if ( * a = = 0 ) {
return 0 ;
}
}
return ( * a < * b ? - 1 : 1 ) ;
}
static void TestImport ( void )
{
UCollator * vicoll ;
UCollator * escoll ;
UCollator * viescoll ;
UCollator * importviescoll ;
UParseError error ;
UErrorCode status = U_ZERO_ERROR ;
UChar * virules ;
int32_t viruleslength ;
UChar * esrules ;
int32_t esruleslength ;
UChar * viesrules ;
int32_t viesruleslength ;
char srules [ 500 ] = " [import vi][import es] " ;
UChar rules [ 500 ] ;
uint32_t length = 0 ;
int32_t itemCount ;
int32_t i , k ;
UChar32 start ;
UChar32 end ;
UChar str [ 500 ] ;
int32_t strLength ;
uint8_t sk1 [ 500 ] ;
uint8_t sk2 [ 500 ] ;
UBool b ;
USet * tailoredSet ;
USet * importTailoredSet ;
vicoll = ucol_open ( " vi " , & status ) ;
virules = ( UChar * ) ucol_getRules ( vicoll , & viruleslength ) ;
escoll = ucol_open ( " es " , & status ) ;
esrules = ( UChar * ) ucol_getRules ( escoll , & esruleslength ) ;
viesrules = ( UChar * ) uprv_malloc ( ( viruleslength + esruleslength + 1 ) * sizeof ( UChar * ) ) ;
viesrules [ 0 ] = 0 ;
u_strcat ( viesrules , virules ) ;
u_strcat ( viesrules , esrules ) ;
viesruleslength = viruleslength + esruleslength ;
viescoll = ucol_openRules ( viesrules , viesruleslength , UCOL_ON , UCOL_TERTIARY , & error , & status ) ;
/* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
length = u_unescape ( srules , rules , 500 ) ;
importviescoll = ucol_openRules ( rules , length , UCOL_ON , UCOL_TERTIARY , & error , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
tailoredSet = ucol_getTailoredSet ( viescoll , & status ) ;
importTailoredSet = ucol_getTailoredSet ( importviescoll , & status ) ;
if ( ! uset_equals ( tailoredSet , importTailoredSet ) ) {
log_err ( " Tailored sets not equal " ) ;
}
uset_close ( importTailoredSet ) ;
itemCount = uset_getItemCount ( tailoredSet ) ;
for ( i = 0 ; i < itemCount ; i + + ) {
strLength = uset_getItem ( tailoredSet , i , & start , & end , str , 500 , & status ) ;
if ( strLength < 2 ) {
for ( ; start < = end ; start + + ) {
k = 0 ;
U16_APPEND ( str , k , 500 , start , b ) ;
ucol_getSortKey ( viescoll , str , 1 , sk1 , 500 ) ;
ucol_getSortKey ( importviescoll , str , 1 , sk2 , 500 ) ;
if ( compare_uint8_t_arrays ( sk1 , sk2 ) ! = 0 ) {
log_err ( " Sort key for %s not equal \n " , str ) ;
break ;
}
}
} else {
ucol_getSortKey ( viescoll , str , strLength , sk1 , 500 ) ;
ucol_getSortKey ( importviescoll , str , strLength , sk2 , 500 ) ;
if ( compare_uint8_t_arrays ( sk1 , sk2 ) ! = 0 ) {
log_err ( " ZZSort key for %s not equal \n " , str ) ;
break ;
}
}
}
uset_close ( tailoredSet ) ;
2010-11-02 16:46:23 +00:00
uprv_free ( viesrules ) ;
ucol_close ( vicoll ) ;
ucol_close ( escoll ) ;
ucol_close ( viescoll ) ;
ucol_close ( importviescoll ) ;
2010-11-01 22:23:49 +00:00
}
static void TestImportWithType ( void )
{
UCollator * vicoll ;
UCollator * decoll ;
UCollator * videcoll ;
UCollator * importvidecoll ;
UParseError error ;
UErrorCode status = U_ZERO_ERROR ;
const UChar * virules ;
int32_t viruleslength ;
const UChar * derules ;
int32_t deruleslength ;
UChar * viderules ;
int32_t videruleslength ;
const char srules [ 500 ] = " [import vi][import de-u-co-phonebk] " ;
UChar rules [ 500 ] ;
uint32_t length = 0 ;
int32_t itemCount ;
int32_t i , k ;
UChar32 start ;
UChar32 end ;
UChar str [ 500 ] ;
int32_t strLength ;
uint8_t sk1 [ 500 ] ;
uint8_t sk2 [ 500 ] ;
USet * tailoredSet ;
USet * importTailoredSet ;
vicoll = ucol_open ( " vi " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
virules = ucol_getRules ( vicoll , & viruleslength ) ;
/* decoll = ucol_open("de@collation=phonebook", &status); */
decoll = ucol_open ( " de-u-co-phonebk " , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
derules = ucol_getRules ( decoll , & deruleslength ) ;
viderules = ( UChar * ) uprv_malloc ( ( viruleslength + deruleslength + 1 ) * sizeof ( UChar * ) ) ;
viderules [ 0 ] = 0 ;
u_strcat ( viderules , virules ) ;
u_strcat ( viderules , derules ) ;
videruleslength = viruleslength + deruleslength ;
videcoll = ucol_openRules ( viderules , videruleslength , UCOL_ON , UCOL_TERTIARY , & error , & status ) ;
/* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
length = u_unescape ( srules , rules , 500 ) ;
importvidecoll = ucol_openRules ( rules , length , UCOL_ON , UCOL_TERTIARY , & error , & status ) ;
if ( U_FAILURE ( status ) ) {
log_err_status ( status , " ERROR: in creation of rule based collator: %s \n " , myErrorName ( status ) ) ;
return ;
}
tailoredSet = ucol_getTailoredSet ( videcoll , & status ) ;
importTailoredSet = ucol_getTailoredSet ( importvidecoll , & status ) ;
if ( ! uset_equals ( tailoredSet , importTailoredSet ) ) {
log_err ( " Tailored sets not equal " ) ;
}
uset_close ( importTailoredSet ) ;
itemCount = uset_getItemCount ( tailoredSet ) ;
for ( i = 0 ; i < itemCount ; i + + ) {
strLength = uset_getItem ( tailoredSet , i , & start , & end , str , 500 , & status ) ;
if ( strLength < 2 ) {
for ( ; start < = end ; start + + ) {
k = 0 ;
U16_APPEND_UNSAFE ( str , k , start ) ;
ucol_getSortKey ( videcoll , str , 1 , sk1 , 500 ) ;
ucol_getSortKey ( importvidecoll , str , 1 , sk2 , 500 ) ;
if ( compare_uint8_t_arrays ( sk1 , sk2 ) ! = 0 ) {
log_err ( " Sort key for %s not equal \n " , str ) ;
break ;
}
}
} else {
ucol_getSortKey ( videcoll , str , strLength , sk1 , 500 ) ;
ucol_getSortKey ( importvidecoll , str , strLength , sk2 , 500 ) ;
if ( compare_uint8_t_arrays ( sk1 , sk2 ) ! = 0 ) {
log_err ( " Sort key for %s not equal \n " , str ) ;
break ;
}
}
}
uset_close ( tailoredSet ) ;
2010-11-02 16:46:23 +00:00
uprv_free ( viderules ) ;
ucol_close ( videcoll ) ;
ucol_close ( importvidecoll ) ;
ucol_close ( vicoll ) ;
ucol_close ( decoll ) ;
2010-11-01 22:23:49 +00:00
}
2010-06-14 21:27:44 +00:00
2002-10-30 05:44:54 +00:00
# define TEST(x) addTest(root, &x, "tscoll / cmsccoll / " # x)
2001-03-20 07:22:33 +00:00
void addMiscCollTest ( TestNode * * root )
2001-05-17 23:09:35 +00:00
{
2004-04-28 05:31:19 +00:00
TEST ( TestRuleOptions ) ;
TEST ( TestBeforePrefixFailure ) ;
TEST ( TestContractionClosure ) ;
TEST ( TestPrefixCompose ) ;
TEST ( TestStrCollIdenticalPrefix ) ;
TEST ( TestPrefix ) ;
2004-11-11 23:34:58 +00:00
TEST ( TestNewJapanese ) ;
2004-04-28 05:31:19 +00:00
/*TEST(TestLimitations);*/
TEST ( TestNonChars ) ;
TEST ( TestExtremeCompression ) ;
TEST ( TestSurrogates ) ;
TEST ( TestVariableTopSetting ) ;
TEST ( TestBocsuCoverage ) ;
TEST ( TestCyrillicTailoring ) ;
TEST ( TestCase ) ;
TEST ( IncompleteCntTest ) ;
TEST ( BlackBirdTest ) ;
TEST ( FunkyATest ) ;
TEST ( BillFairmanTest ) ;
TEST ( RamsRulesTest ) ;
TEST ( IsTailoredTest ) ;
TEST ( TestCollations ) ;
TEST ( TestChMove ) ;
TEST ( TestImplicitTailoring ) ;
TEST ( TestFCDProblem ) ;
TEST ( TestEmptyRule ) ;
2004-05-14 07:10:56 +00:00
/*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
2004-04-28 05:31:19 +00:00
TEST ( TestJ815 ) ;
/*TEST(TestJ831);*/ /* we changed lv locale */
TEST ( TestBefore ) ;
TEST ( TestRedundantRules ) ;
TEST ( TestExpansionSyntax ) ;
TEST ( TestHangulTailoring ) ;
TEST ( TestUCARules ) ;
TEST ( TestIncrementalNormalize ) ;
TEST ( TestComposeDecompose ) ;
TEST ( TestCompressOverlap ) ;
TEST ( TestContraction ) ;
TEST ( TestExpansion ) ;
/*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
/*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
2002-10-30 06:09:25 +00:00
TEST ( TestOptimize ) ;
2002-10-30 05:44:54 +00:00
TEST ( TestSuppressContractions ) ;
2003-02-20 01:13:36 +00:00
TEST ( Alexis2 ) ;
2003-04-30 23:26:55 +00:00
TEST ( TestHebrewUCA ) ;
TEST ( TestPartialSortKeyTermination ) ;
TEST ( TestSettings ) ;
2003-05-01 00:57:27 +00:00
TEST ( TestEquals ) ;
2003-05-29 21:15:26 +00:00
TEST ( TestJ2726 ) ;
2003-07-22 16:49:56 +00:00
TEST ( NullRule ) ;
2003-08-18 22:12:04 +00:00
TEST ( TestNumericCollation ) ;
2003-11-12 20:45:53 +00:00
TEST ( TestTibetanConformance ) ;
2004-01-16 07:14:08 +00:00
TEST ( TestPinyinProblem ) ;
TEST ( TestImplicitGeneration ) ;
2004-04-28 05:31:19 +00:00
TEST ( TestSeparateTrees ) ;
2004-05-08 07:59:36 +00:00
TEST ( TestBeforePinyin ) ;
TEST ( TestBeforeTightening ) ;
/*TEST(TestMoreBefore);*/
2004-05-14 07:10:56 +00:00
TEST ( TestTailorNULL ) ;
2005-09-17 06:26:58 +00:00
TEST ( TestUpperFirstQuaternary ) ;
2006-01-28 08:25:52 +00:00
TEST ( TestJ4960 ) ;
2006-07-06 06:30:06 +00:00
TEST ( TestJ5223 ) ;
2006-08-22 17:51:36 +00:00
TEST ( TestJ5232 ) ;
2006-09-07 20:12:11 +00:00
TEST ( TestJ5367 ) ;
2008-04-17 05:19:19 +00:00
TEST ( TestHiragana ) ;
2007-11-30 04:29:20 +00:00
TEST ( TestSortKeyConsistency ) ;
TEST ( TestVI5913 ) ; /* VI, RO tailored rules */
2007-12-21 00:08:12 +00:00
TEST ( TestCroatianSortKey ) ;
2008-04-04 22:47:43 +00:00
TEST ( TestTailor6179 ) ;
TEST ( TestUCAPrecontext ) ;
2008-06-11 21:37:03 +00:00
TEST ( TestOutOfBuffer5468 ) ;
2010-02-09 19:59:06 +00:00
TEST ( TestSameStrengthList ) ;
2010-11-01 22:23:49 +00:00
2010-06-14 21:27:44 +00:00
TEST ( TestSameStrengthListQuoted ) ;
TEST ( TestSameStrengthListSupplemental ) ;
TEST ( TestSameStrengthListQwerty ) ;
2010-10-14 18:44:44 +00:00
TEST ( TestSameStrengthListQuotedQwerty ) ;
2010-06-14 21:27:44 +00:00
TEST ( TestSameStrengthListRanges ) ;
TEST ( TestSameStrengthListSupplementalRanges ) ;
TEST ( TestSpecialCharacters ) ;
2010-10-14 18:44:44 +00:00
TEST ( TestPrivateUseCharacters ) ;
TEST ( TestPrivateUseCharactersInList ) ;
TEST ( TestPrivateUseCharactersInRange ) ;
2010-11-01 22:23:49 +00:00
TEST ( TestInvalidListsAndRanges ) ;
TEST ( TestImport ) ;
TEST ( TestImportWithType ) ;
2010-10-30 00:42:12 +00:00
2010-11-04 20:12:39 +00:00
TEST ( TestBeforeRuleWithScriptReordering ) ;
TEST ( TestNonLeadBytesDuringCollationReordering ) ;
2010-10-30 00:42:12 +00:00
TEST ( TestGreekFirstReorder ) ;
TEST ( TestGreekLastReorder ) ;
TEST ( TestNonScriptReorder ) ;
TEST ( TestHaniReorder ) ;
2001-03-20 07:22:33 +00:00
}
2002-09-20 17:54:45 +00:00
# endif /* #if !UCONFIG_NO_COLLATION */