2001-08-25 02:06:37 +00:00
/********************************************************************
2006-09-28 08:40:44 +00:00
* Copyright ( c ) 2001 - 2006 International Business Machines
2003-06-04 01:06:48 +00:00
* Corporation and others . All Rights Reserved .
2001-08-25 02:06:37 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* File USRCHDAT . H
* Modification History :
* Name date Description
* synwee July 31 2001 creation
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2006-09-28 08:40:44 +00:00
/*
Note : This file is included by other C and C + + files . This file should not be directly compiled .
*/
2001-08-25 02:06:37 +00:00
# ifndef USRCHDAT_C
# define USRCHDAT_C
2006-09-28 08:40:44 +00:00
# include "unicode/ucol.h"
2002-09-20 17:54:45 +00:00
# if !UCONFIG_NO_COLLATION
2006-09-28 08:40:44 +00:00
U_CDECL_BEGIN
2001-08-25 02:06:37 +00:00
struct SearchData {
const char * text ;
const char * pattern ;
const char * collator ;
UCollationStrength strength ;
const char * breaker ;
2006-09-28 08:40:44 +00:00
int8_t offset [ 32 ] ;
uint8_t size [ 32 ] ;
2001-08-25 02:06:37 +00:00
} ;
2006-09-28 08:40:44 +00:00
U_CDECL_END
2001-08-25 02:06:37 +00:00
typedef struct SearchData SearchData ;
static const char * TESTCOLLATORRULE = " & o,O ; p,P " ;
static const char * EXTRACOLLATIONRULE = " & ae ; \\ u00e4 & AE ; \\ u00c4 & oe ; \\ u00f6 & OE ; \\ u00d6 & ue ; \\ u00fc & UE ; \\ u00dc " ;
static const SearchData BASIC [ ] = {
2001-09-07 21:57:32 +00:00
{ " xxxxxxxxxxxxxxxxxxxx " , " fisher " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
2001-08-25 02:06:37 +00:00
{ " silly spring string " , " string " , NULL , UCOL_TERTIARY , NULL , { 13 , - 1 } ,
{ 6 } } ,
{ " silly spring string string " , " string " , NULL , UCOL_TERTIARY , NULL ,
{ 13 , 20 , - 1 } , { 6 , 6 } } ,
{ " silly string spring string " , " string " , NULL , UCOL_TERTIARY , NULL ,
{ 6 , 20 , - 1 } , { 6 , 6 } } ,
{ " string spring string " , " string " , NULL , UCOL_TERTIARY , NULL , { 0 , 14 , - 1 } ,
{ 6 , 6 } } ,
{ " Scott Ganyo " , " c " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " Scott Ganyo " , " " , NULL , UCOL_TERTIARY , NULL , { 5 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
{ " a \\ u0300b " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
2002-07-25 22:27:23 +00:00
{ " \\ u00c9 " , " e " , NULL , UCOL_PRIMARY , NULL , { 0 , - 1 } , { 1 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
2002-03-19 21:50:15 +00:00
static const SearchData BREAKITERATOREXACT [ ] = {
2001-08-25 02:06:37 +00:00
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , " characterbreaker " , { 0 , 5 , - 1 } ,
{ 3 , 3 } } ,
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , " wordbreaker " , { 5 , - 1 } , { 3 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY ,
" characterbreaker " , { 10 , 14 , - 1 } , { 3 , 2 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , " wordbreaker " ,
{ 10 , - 1 } , { 3 } } ,
{ " Channel, another channel, more channels, and one last Channel " ,
" Channel " , " es " , UCOL_TERTIARY , " wordbreaker " , { 0 , 54 , - 1 } , { 7 , 7 } } ,
2002-03-20 16:39:37 +00:00
/* jitterbug 1745 */
2002-03-19 21:50:15 +00:00
{ " testing that \\ u00e9 does not match e " , " e " , NULL , UCOL_TERTIARY ,
" characterbreaker " , { 1 , 17 , 30 , - 1 } , { 1 , 1 , 1 } } ,
{ " testing that string ab \\ u00e9cd does not match e " , " e " , NULL ,
UCOL_TERTIARY , " characterbreaker " , { 1 , 28 , 41 , - 1 } , { 1 , 1 , 1 } } ,
2002-07-25 22:27:23 +00:00
{ " \\ u00c9 " , " e " , " fr " , UCOL_PRIMARY , " characterbreaker " , { 0 , - 1 } , { 1 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData STRENGTH [ ] = {
/*012345678901234567890123456789012345678901234567890123456789*/
{ " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
UCOL_PRIMARY , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
{ " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
UCOL_PRIMARY , " wordbreaker " , { 16 , - 1 } , { 3 } } ,
{ " blackbirds Pat p \\ u00E9ch \\ u00E9 p \\ u00EAche p \\ u00E9cher p \\ u00EAcher Tod T \\ u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe " ,
" peche " , " fr " , UCOL_PRIMARY , NULL , { 15 , 21 , 27 , 34 , - 1 } , { 5 , 5 , 5 , 5 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , NULL ,
{ 10 , 14 , - 1 } , { 3 , 2 } } ,
{ " A channel, another CHANNEL, more Channels, and one last channel... " ,
" channel " , " es " , UCOL_PRIMARY , NULL , { 2 , 19 , 33 , 56 , - 1 } ,
{ 7 , 7 , 7 , 7 } } ,
2002-05-15 20:26:26 +00:00
{ " \\ u00c0 should match but not A " , " A \\ u0300 " , " en " , UCOL_IDENTICAL ,
NULL , { 0 , - 1 } , { 1 , 0 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData VARIABLE [ ] = {
/*012345678901234567890123456789012345678901234567890123456789*/
{ " blackbirds black blackbirds blackbird black-bird " ,
" blackbird " , NULL , UCOL_TERTIARY , NULL , { 0 , 17 , 28 , 38 , - 1 } ,
{ 9 , 9 , 9 , 10 } } ,
/* to see that it doesn't go into an infinite loop if the start of text
is a ignorable character */
{ " on " , " go " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " abcdefghijklmnopqrstuvwxyz " , " " , NULL , UCOL_PRIMARY , NULL ,
{ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 ,
20 , 21 , 22 , 23 , 24 , 25 , - 1 } , { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } } ,
/* testing tightest match */
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_QUATERNARY ,
NULL , { 1 , - 1 } , { 3 } } ,
/*012345678901234567890123456789012345678901234567890123456789 */
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_SECONDARY ,
NULL , { 1 , 6 , 13 , 21 , 31 , - 1 } , { 3 , 4 , 4 , 5 , 5 } } ,
/* totally ignorable text */
{ " --------------- " , " abc " , NULL , UCOL_SECONDARY ,
NULL , { - 1 } , { 0 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData NORMEXACT [ ] = {
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData NONNORMEXACT [ ] = {
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } ,
{ 0 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData OVERLAP [ ] = {
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , NULL , { 0 , 2 , 4 , - 1 } ,
{ 4 , 4 , 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData NONOVERLAP [ ] = {
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , NULL , { 0 , 4 , - 1 } , { 4 , 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData COLLATOR [ ] = {
/* english */
{ " fox fpx " , " fox " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 3 } } ,
/* tailored */
{ " fox fpx " , " fox " , NULL , UCOL_PRIMARY , NULL , { 0 , 4 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData PATTERN [ ] = {
{ " The quick brown fox jumps over the lazy foxes " , " the " , NULL ,
UCOL_PRIMARY , NULL , { 0 , 31 , - 1 } , { 3 , 3 } } ,
{ " The quick brown fox jumps over the lazy foxes " , " fox " , NULL ,
UCOL_PRIMARY , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData TEXT [ ] = {
{ " the foxy brown fox " , " fox " , NULL , UCOL_TERTIARY , NULL , { 4 , 15 , - 1 } ,
{ 3 , 3 } } ,
{ " the quick brown fox " , " fox " , NULL , UCOL_TERTIARY , NULL , { 16 , - 1 } ,
{ 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData COMPOSITEBOUNDARIES [ ] = {
{ " \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u00C0C " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " \\ u00C0A " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " B \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " \\ u00C0B " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } ,
{ 1 , 1 } } ,
{ " \\ u00C0 \\ u0300 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
/* A + 030A + 0301 */
{ " \\ u01FA " , " \\ u01FA " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " \\ u030A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u030AA " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " A \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u030A \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FAA " , " \\ u0301A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73 " , " \\ u0F73 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0F73 " , " \\ u0F71 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73 " , " \\ u0F72 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73 " , " \\ u0F71 \\ u0F72 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u0F73 " , " A \\ u0F71 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73A " , " \\ u0F72A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData MATCH [ ] = {
{ " a busy bee is a very busy beeee " , " bee " , NULL , UCOL_TERTIARY , NULL ,
{ 7 , 26 , - 1 } , { 3 , 3 } } ,
/* 012345678901234567890123456789012345678901234567890 */
{ " a busy bee is a very busy beeee with no bee life " , " bee " , NULL ,
UCOL_TERTIARY , NULL , { 7 , 26 , 40 , - 1 } , { 3 , 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData SUPPLEMENTARY [ ] = {
/* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
{ " abc \\ uD800 \\ uDC00 \\ uD800 \\ uDC01 \\ uD801 \\ uDC00 \\ uD800 \\ uDC00abc abc \\ uD800 \\ uDC00 \\ uD800 \\ uD800 \\ uDC00 \\ uD800 \\ uDC00 \\ uDC00 " ,
" \\ uD800 \\ uDC00 " , NULL , UCOL_TERTIARY , NULL , { 4 , 13 , 22 , 26 , 29 , - 1 } ,
{ 2 , 2 , 2 , 2 , 2 } } ,
2003-07-31 19:31:58 +00:00
{ " and \\ uD834 \\ uDDB9this sentence " , " \\ uD834 \\ uDDB9 " , NULL ,
UCOL_TERTIARY , NULL , { 3 , - 1 } , { 2 } } ,
{ " and \\ uD834 \\ uDDB9 this sentence " , " \\ uD834 \\ uDDB9 " , NULL ,
UCOL_TERTIARY , NULL , { 3 , - 1 } , { 4 } } ,
{ " and- \\ uD834 \\ uDDB9-this sentence " , " - \\ uD834 \\ uDDB9- " , NULL ,
UCOL_TERTIARY , NULL , { 3 , - 1 } , { 4 } } ,
{ " and, \\ uD834 \\ uDDB9,this sentence " , " , \\ uD834 \\ uDDB9, " , NULL ,
UCOL_TERTIARY , NULL , { 3 , - 1 } , { 4 } } ,
{ " and? \\ uD834 \\ uDDB9?this sentence " , " ? \\ uD834 \\ uDDB9? " , NULL ,
UCOL_TERTIARY , NULL , { 3 , - 1 } , { 4 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
2001-09-11 21:40:05 +00:00
static const char * CONTRACTIONRULE =
" &z = ab/c < AB < X \\ u0300 < ABC < X \\ u0300 \\ u0315 " ;
2001-08-25 02:06:37 +00:00
static const SearchData CONTRACTION [ ] = {
/* common discontiguous */
{ " A \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " A \\ u0300 \\ u0315 " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
/* contraction prefix */
{ " AB \\ u0315C " , " A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " AB \\ u0315C " , " AB " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " AB \\ u0315C " , " \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { 2 , - 1 } , { 1 } } ,
/* discontiguous problem here for backwards iteration.
accents not found because discontiguous stores all information */
{ " X \\ u0300 \\ u0319 \\ u0315 " , " \\ u0319 " , NULL , UCOL_TERTIARY , NULL , { - 1 } ,
{ 0 } } ,
/* ends not with a contraction character */
{ " X \\ u0315 \\ u0300D " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { - 1 } ,
{ 0 } } ,
{ " X \\ u0315 \\ u0300D " , " X \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL ,
{ 0 , - 1 } , { 3 } } ,
{ " X \\ u0300 \\ u031A \\ u0315D " , " X \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } ,
{ 0 } } ,
/* blocked discontiguous */
{ " X \\ u0300 \\ u031A \\ u0315D " , " \\ u031A \\ u0315D " , NULL , UCOL_TERTIARY , NULL ,
{ - 1 } , { 0 } } ,
2001-09-11 21:40:05 +00:00
{ " ab " , " z " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const char * IGNORABLERULE = " &a = \\ u0300 " ;
static const SearchData IGNORABLE [ ] = {
{ " \\ u0315 \\ u0300 \\ u0315 \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_PRIMARY , NULL ,
{ 0 , 3 , - 1 } , { 2 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData BASICCANONICAL [ ] = {
2001-09-07 21:57:32 +00:00
{ " xxxxxxxxxxxxxxxxxxxx " , " fisher " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
2001-08-25 02:06:37 +00:00
{ " silly spring string " , " string " , NULL , UCOL_TERTIARY , NULL , { 13 , - 1 } ,
{ 6 } } ,
{ " silly spring string string " , " string " , NULL , UCOL_TERTIARY , NULL ,
{ 13 , 20 , - 1 } , { 6 , 6 } } ,
{ " silly string spring string " , " string " , NULL , UCOL_TERTIARY , NULL ,
{ 6 , 20 , - 1 } , { 6 , 6 } } ,
{ " string spring string " , " string " , NULL , UCOL_TERTIARY , NULL , { 0 , 14 , - 1 } ,
{ 6 , 6 } } ,
{ " Scott Ganyo " , " c " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " Scott Ganyo " , " " , NULL , UCOL_TERTIARY , NULL , { 5 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
{ " a \\ u0300b " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " a \\ u0300 \\ u0325b " , " \\ u0300b " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 3 } } ,
2001-09-07 21:57:32 +00:00
{ " \\ u0325 \\ u0300A \\ u0325 \\ u0300 " , " \\ u0300A \\ u0300 " , NULL , UCOL_TERTIARY ,
NULL , { 0 , - 1 } , { 5 } } ,
{ " \\ u0325 \\ u0300A \\ u0325 \\ u0300 " , " \\ u0325A \\ u0325 " , NULL , UCOL_TERTIARY ,
NULL , { 0 , - 1 } , { 5 } } ,
2001-08-25 02:06:37 +00:00
{ " a \\ u0300 \\ u0325b \\ u0300 \\ u0325c \\ u0325b \\ u0300 \\ u0300b \\ u0325 " ,
" \\ u0300b \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 1 , 12 , - 1 } , { 5 , 3 } } ,
2003-07-03 19:17:35 +00:00
{ " \\ u00c4 \\ u0323 " , " A \\ u0323 \\ u0308 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u0308 \\ u0323 " , " \\ u0323 \\ u0308 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData NORMCANONICAL [ ] = {
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u0300 \\ u0325 " , " \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 2 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData BREAKITERATORCANONICAL [ ] = {
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , " characterbreaker " , { 0 , 5 , - 1 } ,
{ 3 , 3 } } ,
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , " wordbreaker " , { 5 , - 1 } , { 3 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY ,
" characterbreaker " , { 10 , 14 , - 1 } , { 3 , 2 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , " wordbreaker " ,
{ 10 , - 1 } , { 3 } } ,
{ " Channel, another channel, more channels, and one last Channel " ,
" Channel " , " es " , UCOL_TERTIARY , " wordbreaker " , { 0 , 54 , - 1 } , { 7 , 7 } } ,
2002-03-20 16:39:37 +00:00
/* jitterbug 1745 */
2002-03-19 21:50:15 +00:00
{ " testing that \\ u00e9 does not match e " , " e " , NULL , UCOL_TERTIARY ,
" characterbreaker " , { 1 , 17 , 30 , - 1 } , { 1 , 1 , 1 } } ,
{ " testing that string ab \\ u00e9cd does not match e " , " e " , NULL ,
UCOL_TERTIARY , " characterbreaker " , { 1 , 28 , 41 , - 1 } , { 1 , 1 , 1 } } ,
2002-07-25 22:27:23 +00:00
{ " \\ u00c9 " , " e " , " fr " , UCOL_PRIMARY , " characterbreaker " , { 0 , - 1 } , { 1 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData STRENGTHCANONICAL [ ] = {
/*012345678901234567890123456789012345678901234567890123456789 */
{ " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
UCOL_PRIMARY , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
{ " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
UCOL_PRIMARY , " wordbreaker " , { 16 , - 1 } , { 3 } } ,
{ " blackbirds Pat p \\ u00E9ch \\ u00E9 p \\ u00EAche p \\ u00E9cher p \\ u00EAcher Tod T \\ u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe " ,
" peche " , " fr " , UCOL_PRIMARY , NULL , { 15 , 21 , 27 , 34 , - 1 } , { 5 , 5 , 5 , 5 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , NULL ,
{ 10 , 14 , - 1 } , { 3 , 2 } } ,
{ " A channel, another CHANNEL, more Channels, and one last channel... " ,
" channel " , " es " , UCOL_PRIMARY , NULL , { 2 , 19 , 33 , 56 , - 1 } ,
{ 7 , 7 , 7 , 7 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData VARIABLECANONICAL [ ] = {
/*012345678901234567890123456789012345678901234567890123456789 */
{ " blackbirds black blackbirds blackbird black-bird " ,
" blackbird " , NULL , UCOL_TERTIARY , NULL , { 0 , 17 , 28 , 38 , - 1 } ,
{ 9 , 9 , 9 , 10 } } ,
/* to see that it doesn't go into an infinite loop if the start of text
is a ignorable character */
{ " on " , " go " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " abcdefghijklmnopqrstuvwxyz " , " " , NULL , UCOL_PRIMARY , NULL ,
{ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 ,
20 , 21 , 22 , 23 , 24 , 25 , - 1 } , { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } } ,
/* testing tightest match */
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_QUATERNARY ,
NULL , { 1 , - 1 } , { 3 } } ,
/*012345678901234567890123456789012345678901234567890123456789 */
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_SECONDARY ,
NULL , { 1 , 6 , 13 , 21 , 31 , - 1 } , { 3 , 4 , 4 , 5 , 5 } } ,
/* totally ignorable text */
{ " --------------- " , " abc " , NULL , UCOL_SECONDARY ,
NULL , { - 1 } , { 0 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData OVERLAPCANONICAL [ ] = {
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , NULL , { 0 , 2 , 4 , - 1 } ,
{ 4 , 4 , 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData NONOVERLAPCANONICAL [ ] = {
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , NULL , { 0 , 4 , - 1 } , { 4 , 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData COLLATORCANONICAL [ ] = {
/* english */
{ " fox fpx " , " fox " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 3 } } ,
/* tailored */
{ " fox fpx " , " fox " , NULL , UCOL_PRIMARY , NULL , { 0 , 4 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData PATTERNCANONICAL [ ] = {
{ " The quick brown fox jumps over the lazy foxes " , " the " , NULL ,
UCOL_PRIMARY , NULL , { 0 , 31 , - 1 } , { 3 , 3 } } ,
{ " The quick brown fox jumps over the lazy foxes " , " fox " , NULL ,
UCOL_PRIMARY , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData TEXTCANONICAL [ ] = {
{ " the foxy brown fox " , " fox " , NULL , UCOL_TERTIARY , NULL , { 4 , 15 , - 1 } ,
{ 3 , 3 } } ,
{ " the quick brown fox " , " fox " , NULL , UCOL_TERTIARY , NULL , { 16 , - 1 } ,
{ 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData COMPOSITEBOUNDARIESCANONICAL [ ] = {
{ " \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u00C0C " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " \\ u00C0A " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " B \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " \\ u00C0B " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } ,
{ 1 , 1 } } ,
/* \\u0300 blocked by \\u0300 */
{ " \\ u00C0 \\ u0300 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
/* A + 030A + 0301 */
{ " \\ u01FA " , " \\ u01FA " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " \\ u030A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " \\ u030AA " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
/* blocked accent */
{ " \\ u01FA " , " A \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u030A \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " \\ u01FAA " , " \\ u0301A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u0F73 " , " \\ u0F73 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0F73 " , " \\ u0F71 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0F73 " , " \\ u0F72 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0F73 " , " \\ u0F71 \\ u0F72 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u0F73 " , " A \\ u0F71 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u0F73A " , " \\ u0F72A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u01FA A \\ u0301 \\ u030A A \\ u030A \\ u0301 A \\ u030A \\ u01FA " , " A \\ u030A " ,
NULL , UCOL_TERTIARY , NULL , { 0 , 6 , 10 , 13 , - 1 } , { 1 , 3 , 2 , 1 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData MATCHCANONICAL [ ] = {
{ " a busy bee is a very busy beeee " , " bee " , NULL , UCOL_TERTIARY , NULL ,
{ 7 , 26 , - 1 } , { 3 , 3 } } ,
/*012345678901234567890123456789012345678901234567890 */
{ " a busy bee is a very busy beeee with no bee life " , " bee " , NULL ,
UCOL_TERTIARY , NULL , { 7 , 26 , 40 , - 1 } , { 3 , 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData SUPPLEMENTARYCANONICAL [ ] = {
/*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
{ " abc \\ uD800 \\ uDC00 \\ uD800 \\ uDC01 \\ uD801 \\ uDC00 \\ uD800 \\ uDC00abc abc \\ uD800 \\ uDC00 \\ uD800 \\ uD800 \\ uDC00 \\ uD800 \\ uDC00 \\ uDC00 " ,
" \\ uD800 \\ uDC00 " , NULL , UCOL_TERTIARY , NULL , { 4 , 13 , 22 , 26 , 29 , - 1 } ,
{ 2 , 2 , 2 , 2 , 2 } } ,
2003-07-31 19:31:58 +00:00
{ " and \\ uD834 \\ uDDB9this sentence " , " \\ uD834 \\ uDDB9 " , NULL ,
UCOL_TERTIARY , NULL , { 3 , - 1 } , { 2 } } ,
{ " and \\ uD834 \\ uDDB9 this sentence " , " \\ uD834 \\ uDDB9 " , NULL ,
UCOL_TERTIARY , NULL , { 3 , - 1 } , { 4 } } ,
{ " and- \\ uD834 \\ uDDB9-this sentence " , " - \\ uD834 \\ uDDB9- " , NULL ,
UCOL_TERTIARY , NULL , { 3 , - 1 } , { 4 } } ,
{ " and, \\ uD834 \\ uDDB9,this sentence " , " , \\ uD834 \\ uDDB9, " , NULL ,
UCOL_TERTIARY , NULL , { 3 , - 1 } , { 4 } } ,
{ " and? \\ uD834 \\ uDDB9?this sentence " , " ? \\ uD834 \\ uDDB9? " , NULL ,
UCOL_TERTIARY , NULL , { 3 , - 1 } , { 4 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData CONTRACTIONCANONICAL [ ] = {
/* common discontiguous */
{ " A \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 2 } } ,
{ " A \\ u0300 \\ u0315 " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
/* contraction prefix */
{ " AB \\ u0315C " , " A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " AB \\ u0315C " , " AB " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " AB \\ u0315C " , " \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { 2 , - 1 } , { 1 } } ,
/* discontiguous problem here for backwards iteration.
forwards gives 0 , 4 but backwards give 1 , 3 */
/* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {0, -1},
{ 4 } } , */
/* ends not with a contraction character */
{ " X \\ u0315 \\ u0300D " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { - 1 } ,
{ 0 } } ,
{ " X \\ u0315 \\ u0300D " , " X \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL ,
{ 0 , - 1 } , { 3 } } ,
{ " X \\ u0300 \\ u031A \\ u0315D " , " X \\ u0300 " , NULL , UCOL_TERTIARY , NULL ,
{ 0 , - 1 } , { 4 } } ,
/* blocked discontiguous */
{ " X \\ u0300 \\ u031A \\ u0315D " , " \\ u031A \\ u0315D " , NULL , UCOL_TERTIARY , NULL ,
{ 1 , - 1 } , { 4 } } ,
2001-09-11 21:40:05 +00:00
{ " ab " , " z " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
2002-09-20 17:54:45 +00:00
# endif /* #if !UCONFIG_NO_COLLATION */
2001-08-25 02:06:37 +00:00
# endif