2001-08-25 02:06:37 +00:00
/********************************************************************
* Copyright ( c ) 2001 ,
* International Business Machines Corporation and others .
* All Rights Reserved .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* File USRCHDAT . H
* Modification History :
* Name date Description
* synwee July 31 2001 creation
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# ifndef USRCHDAT_C
# define USRCHDAT_C
# include "unicode/ucol.h"
struct SearchData {
const char * text ;
const char * pattern ;
const char * collator ;
UCollationStrength strength ;
const char * breaker ;
2002-03-12 01:32:42 +00:00
int32_t offset [ 32 ] ;
2001-08-25 02:06:37 +00:00
uint32_t size [ 32 ] ;
} ;
typedef struct SearchData SearchData ;
static const char * TESTCOLLATORRULE = " & o,O ; p,P " ;
static const char * EXTRACOLLATIONRULE = " & ae ; \\ u00e4 & AE ; \\ u00c4 & oe ; \\ u00f6 & OE ; \\ u00d6 & ue ; \\ u00fc & UE ; \\ u00dc " ;
static const SearchData BASIC [ ] = {
2001-09-07 21:57:32 +00:00
{ " xxxxxxxxxxxxxxxxxxxx " , " fisher " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
2001-08-25 02:06:37 +00:00
{ " silly spring string " , " string " , NULL , UCOL_TERTIARY , NULL , { 13 , - 1 } ,
{ 6 } } ,
{ " silly spring string string " , " string " , NULL , UCOL_TERTIARY , NULL ,
{ 13 , 20 , - 1 } , { 6 , 6 } } ,
{ " silly string spring string " , " string " , NULL , UCOL_TERTIARY , NULL ,
{ 6 , 20 , - 1 } , { 6 , 6 } } ,
{ " string spring string " , " string " , NULL , UCOL_TERTIARY , NULL , { 0 , 14 , - 1 } ,
{ 6 , 6 } } ,
{ " Scott Ganyo " , " c " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " Scott Ganyo " , " " , NULL , UCOL_TERTIARY , NULL , { 5 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
{ " a \\ u0300b " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
2002-07-25 22:27:23 +00:00
{ " \\ u00c9 " , " e " , NULL , UCOL_PRIMARY , NULL , { 0 , - 1 } , { 1 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
2002-03-19 21:50:15 +00:00
static const SearchData BREAKITERATOREXACT [ ] = {
2001-08-25 02:06:37 +00:00
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , " characterbreaker " , { 0 , 5 , - 1 } ,
{ 3 , 3 } } ,
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , " wordbreaker " , { 5 , - 1 } , { 3 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY ,
" characterbreaker " , { 10 , 14 , - 1 } , { 3 , 2 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , " wordbreaker " ,
{ 10 , - 1 } , { 3 } } ,
{ " Channel, another channel, more channels, and one last Channel " ,
" Channel " , " es " , UCOL_TERTIARY , " wordbreaker " , { 0 , 54 , - 1 } , { 7 , 7 } } ,
2002-03-20 16:39:37 +00:00
/* jitterbug 1745 */
2002-03-19 21:50:15 +00:00
{ " testing that \\ u00e9 does not match e " , " e " , NULL , UCOL_TERTIARY ,
" characterbreaker " , { 1 , 17 , 30 , - 1 } , { 1 , 1 , 1 } } ,
{ " testing that string ab \\ u00e9cd does not match e " , " e " , NULL ,
UCOL_TERTIARY , " characterbreaker " , { 1 , 28 , 41 , - 1 } , { 1 , 1 , 1 } } ,
2002-07-25 22:27:23 +00:00
{ " \\ u00c9 " , " e " , " fr " , UCOL_PRIMARY , " characterbreaker " , { 0 , - 1 } , { 1 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData STRENGTH [ ] = {
/*012345678901234567890123456789012345678901234567890123456789*/
{ " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
UCOL_PRIMARY , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
{ " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
UCOL_PRIMARY , " wordbreaker " , { 16 , - 1 } , { 3 } } ,
{ " blackbirds Pat p \\ u00E9ch \\ u00E9 p \\ u00EAche p \\ u00E9cher p \\ u00EAcher Tod T \\ u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe " ,
" peche " , " fr " , UCOL_PRIMARY , NULL , { 15 , 21 , 27 , 34 , - 1 } , { 5 , 5 , 5 , 5 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , NULL ,
{ 10 , 14 , - 1 } , { 3 , 2 } } ,
{ " A channel, another CHANNEL, more Channels, and one last channel... " ,
" channel " , " es " , UCOL_PRIMARY , NULL , { 2 , 19 , 33 , 56 , - 1 } ,
{ 7 , 7 , 7 , 7 } } ,
2002-05-15 20:26:26 +00:00
{ " \\ u00c0 should match but not A " , " A \\ u0300 " , " en " , UCOL_IDENTICAL ,
NULL , { 0 , - 1 } , { 1 , 0 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData VARIABLE [ ] = {
/*012345678901234567890123456789012345678901234567890123456789*/
{ " blackbirds black blackbirds blackbird black-bird " ,
" blackbird " , NULL , UCOL_TERTIARY , NULL , { 0 , 17 , 28 , 38 , - 1 } ,
{ 9 , 9 , 9 , 10 } } ,
/* to see that it doesn't go into an infinite loop if the start of text
is a ignorable character */
{ " on " , " go " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " abcdefghijklmnopqrstuvwxyz " , " " , NULL , UCOL_PRIMARY , NULL ,
{ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 ,
20 , 21 , 22 , 23 , 24 , 25 , - 1 } , { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } } ,
/* testing tightest match */
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_QUATERNARY ,
NULL , { 1 , - 1 } , { 3 } } ,
/*012345678901234567890123456789012345678901234567890123456789 */
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_SECONDARY ,
NULL , { 1 , 6 , 13 , 21 , 31 , - 1 } , { 3 , 4 , 4 , 5 , 5 } } ,
/* totally ignorable text */
{ " --------------- " , " abc " , NULL , UCOL_SECONDARY ,
NULL , { - 1 } , { 0 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData NORMEXACT [ ] = {
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData NONNORMEXACT [ ] = {
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } ,
{ 0 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData OVERLAP [ ] = {
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , NULL , { 0 , 2 , 4 , - 1 } ,
{ 4 , 4 , 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData NONOVERLAP [ ] = {
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , NULL , { 0 , 4 , - 1 } , { 4 , 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData COLLATOR [ ] = {
/* english */
{ " fox fpx " , " fox " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 3 } } ,
/* tailored */
{ " fox fpx " , " fox " , NULL , UCOL_PRIMARY , NULL , { 0 , 4 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData PATTERN [ ] = {
{ " The quick brown fox jumps over the lazy foxes " , " the " , NULL ,
UCOL_PRIMARY , NULL , { 0 , 31 , - 1 } , { 3 , 3 } } ,
{ " The quick brown fox jumps over the lazy foxes " , " fox " , NULL ,
UCOL_PRIMARY , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData TEXT [ ] = {
{ " the foxy brown fox " , " fox " , NULL , UCOL_TERTIARY , NULL , { 4 , 15 , - 1 } ,
{ 3 , 3 } } ,
{ " the quick brown fox " , " fox " , NULL , UCOL_TERTIARY , NULL , { 16 , - 1 } ,
{ 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData COMPOSITEBOUNDARIES [ ] = {
{ " \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u00C0C " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " \\ u00C0A " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " B \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " \\ u00C0B " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } ,
{ 1 , 1 } } ,
{ " \\ u00C0 \\ u0300 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
/* A + 030A + 0301 */
{ " \\ u01FA " , " \\ u01FA " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " \\ u030A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u030AA " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " A \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u030A \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FAA " , " \\ u0301A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73 " , " \\ u0F73 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0F73 " , " \\ u0F71 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73 " , " \\ u0F72 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73 " , " \\ u0F71 \\ u0F72 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u0F73 " , " A \\ u0F71 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73A " , " \\ u0F72A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData MATCH [ ] = {
{ " a busy bee is a very busy beeee " , " bee " , NULL , UCOL_TERTIARY , NULL ,
{ 7 , 26 , - 1 } , { 3 , 3 } } ,
/* 012345678901234567890123456789012345678901234567890 */
{ " a busy bee is a very busy beeee with no bee life " , " bee " , NULL ,
UCOL_TERTIARY , NULL , { 7 , 26 , 40 , - 1 } , { 3 , 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData SUPPLEMENTARY [ ] = {
/* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
{ " abc \\ uD800 \\ uDC00 \\ uD800 \\ uDC01 \\ uD801 \\ uDC00 \\ uD800 \\ uDC00abc abc \\ uD800 \\ uDC00 \\ uD800 \\ uD800 \\ uDC00 \\ uD800 \\ uDC00 \\ uDC00 " ,
" \\ uD800 \\ uDC00 " , NULL , UCOL_TERTIARY , NULL , { 4 , 13 , 22 , 26 , 29 , - 1 } ,
{ 2 , 2 , 2 , 2 , 2 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
2001-09-11 21:40:05 +00:00
static const char * CONTRACTIONRULE =
" &z = ab/c < AB < X \\ u0300 < ABC < X \\ u0300 \\ u0315 " ;
2001-08-25 02:06:37 +00:00
static const SearchData CONTRACTION [ ] = {
/* common discontiguous */
{ " A \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " A \\ u0300 \\ u0315 " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
/* contraction prefix */
{ " AB \\ u0315C " , " A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " AB \\ u0315C " , " AB " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " AB \\ u0315C " , " \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { 2 , - 1 } , { 1 } } ,
/* discontiguous problem here for backwards iteration.
accents not found because discontiguous stores all information */
{ " X \\ u0300 \\ u0319 \\ u0315 " , " \\ u0319 " , NULL , UCOL_TERTIARY , NULL , { - 1 } ,
{ 0 } } ,
/* ends not with a contraction character */
{ " X \\ u0315 \\ u0300D " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { - 1 } ,
{ 0 } } ,
{ " X \\ u0315 \\ u0300D " , " X \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL ,
{ 0 , - 1 } , { 3 } } ,
{ " X \\ u0300 \\ u031A \\ u0315D " , " X \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { - 1 } ,
{ 0 } } ,
/* blocked discontiguous */
{ " X \\ u0300 \\ u031A \\ u0315D " , " \\ u031A \\ u0315D " , NULL , UCOL_TERTIARY , NULL ,
{ - 1 } , { 0 } } ,
2001-09-11 21:40:05 +00:00
{ " ab " , " z " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const char * IGNORABLERULE = " &a = \\ u0300 " ;
static const SearchData IGNORABLE [ ] = {
{ " \\ u0315 \\ u0300 \\ u0315 \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_PRIMARY , NULL ,
{ 0 , 3 , - 1 } , { 2 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData BASICCANONICAL [ ] = {
2001-09-07 21:57:32 +00:00
{ " xxxxxxxxxxxxxxxxxxxx " , " fisher " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
2001-08-25 02:06:37 +00:00
{ " silly spring string " , " string " , NULL , UCOL_TERTIARY , NULL , { 13 , - 1 } ,
{ 6 } } ,
{ " silly spring string string " , " string " , NULL , UCOL_TERTIARY , NULL ,
{ 13 , 20 , - 1 } , { 6 , 6 } } ,
{ " silly string spring string " , " string " , NULL , UCOL_TERTIARY , NULL ,
{ 6 , 20 , - 1 } , { 6 , 6 } } ,
{ " string spring string " , " string " , NULL , UCOL_TERTIARY , NULL , { 0 , 14 , - 1 } ,
{ 6 , 6 } } ,
{ " Scott Ganyo " , " c " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " Scott Ganyo " , " " , NULL , UCOL_TERTIARY , NULL , { 5 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
{ " a \\ u0300b " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " a \\ u0300 \\ u0325b " , " \\ u0300b " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 3 } } ,
2001-09-07 21:57:32 +00:00
{ " \\ u0325 \\ u0300A \\ u0325 \\ u0300 " , " \\ u0300A \\ u0300 " , NULL , UCOL_TERTIARY ,
NULL , { 0 , - 1 } , { 5 } } ,
{ " \\ u0325 \\ u0300A \\ u0325 \\ u0300 " , " \\ u0325A \\ u0325 " , NULL , UCOL_TERTIARY ,
NULL , { 0 , - 1 } , { 5 } } ,
2001-08-25 02:06:37 +00:00
{ " a \\ u0300 \\ u0325b \\ u0300 \\ u0325c \\ u0325b \\ u0300 \\ u0300b \\ u0325 " ,
" \\ u0300b \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 1 , 12 , - 1 } , { 5 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData NORMCANONICAL [ ] = {
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u0300 \\ u0325 " , " \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0325 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 2 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData BREAKITERATORCANONICAL [ ] = {
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , " characterbreaker " , { 0 , 5 , - 1 } ,
{ 3 , 3 } } ,
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , " wordbreaker " , { 5 , - 1 } , { 3 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY ,
" characterbreaker " , { 10 , 14 , - 1 } , { 3 , 2 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , " wordbreaker " ,
{ 10 , - 1 } , { 3 } } ,
{ " Channel, another channel, more channels, and one last Channel " ,
" Channel " , " es " , UCOL_TERTIARY , " wordbreaker " , { 0 , 54 , - 1 } , { 7 , 7 } } ,
2002-03-20 16:39:37 +00:00
/* jitterbug 1745 */
2002-03-19 21:50:15 +00:00
{ " testing that \\ u00e9 does not match e " , " e " , NULL , UCOL_TERTIARY ,
" characterbreaker " , { 1 , 17 , 30 , - 1 } , { 1 , 1 , 1 } } ,
{ " testing that string ab \\ u00e9cd does not match e " , " e " , NULL ,
UCOL_TERTIARY , " characterbreaker " , { 1 , 28 , 41 , - 1 } , { 1 , 1 , 1 } } ,
2002-07-25 22:27:23 +00:00
{ " \\ u00c9 " , " e " , " fr " , UCOL_PRIMARY , " characterbreaker " , { 0 , - 1 } , { 1 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData STRENGTHCANONICAL [ ] = {
/*012345678901234567890123456789012345678901234567890123456789 */
{ " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
UCOL_PRIMARY , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
{ " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
UCOL_PRIMARY , " wordbreaker " , { 16 , - 1 } , { 3 } } ,
{ " blackbirds Pat p \\ u00E9ch \\ u00E9 p \\ u00EAche p \\ u00E9cher p \\ u00EAcher Tod T \\ u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe " ,
" peche " , " fr " , UCOL_PRIMARY , NULL , { 15 , 21 , 27 , 34 , - 1 } , { 5 , 5 , 5 , 5 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , NULL ,
{ 10 , 14 , - 1 } , { 3 , 2 } } ,
{ " A channel, another CHANNEL, more Channels, and one last channel... " ,
" channel " , " es " , UCOL_PRIMARY , NULL , { 2 , 19 , 33 , 56 , - 1 } ,
{ 7 , 7 , 7 , 7 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData VARIABLECANONICAL [ ] = {
/*012345678901234567890123456789012345678901234567890123456789 */
{ " blackbirds black blackbirds blackbird black-bird " ,
" blackbird " , NULL , UCOL_TERTIARY , NULL , { 0 , 17 , 28 , 38 , - 1 } ,
{ 9 , 9 , 9 , 10 } } ,
/* to see that it doesn't go into an infinite loop if the start of text
is a ignorable character */
{ " on " , " go " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " abcdefghijklmnopqrstuvwxyz " , " " , NULL , UCOL_PRIMARY , NULL ,
{ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 ,
20 , 21 , 22 , 23 , 24 , 25 , - 1 } , { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } } ,
/* testing tightest match */
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_QUATERNARY ,
NULL , { 1 , - 1 } , { 3 } } ,
/*012345678901234567890123456789012345678901234567890123456789 */
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_SECONDARY ,
NULL , { 1 , 6 , 13 , 21 , 31 , - 1 } , { 3 , 4 , 4 , 5 , 5 } } ,
/* totally ignorable text */
{ " --------------- " , " abc " , NULL , UCOL_SECONDARY ,
NULL , { - 1 } , { 0 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData OVERLAPCANONICAL [ ] = {
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , NULL , { 0 , 2 , 4 , - 1 } ,
{ 4 , 4 , 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData NONOVERLAPCANONICAL [ ] = {
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , NULL , { 0 , 4 , - 1 } , { 4 , 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData COLLATORCANONICAL [ ] = {
/* english */
{ " fox fpx " , " fox " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 3 } } ,
/* tailored */
{ " fox fpx " , " fox " , NULL , UCOL_PRIMARY , NULL , { 0 , 4 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData PATTERNCANONICAL [ ] = {
{ " The quick brown fox jumps over the lazy foxes " , " the " , NULL ,
UCOL_PRIMARY , NULL , { 0 , 31 , - 1 } , { 3 , 3 } } ,
{ " The quick brown fox jumps over the lazy foxes " , " fox " , NULL ,
UCOL_PRIMARY , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData TEXTCANONICAL [ ] = {
{ " the foxy brown fox " , " fox " , NULL , UCOL_TERTIARY , NULL , { 4 , 15 , - 1 } ,
{ 3 , 3 } } ,
{ " the quick brown fox " , " fox " , NULL , UCOL_TERTIARY , NULL , { 16 , - 1 } ,
{ 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData COMPOSITEBOUNDARIESCANONICAL [ ] = {
{ " \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u00C0C " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " \\ u00C0A " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " B \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " \\ u00C0B " , " A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , 1 , - 1 } ,
{ 1 , 1 } } ,
/* \\u0300 blocked by \\u0300 */
{ " \\ u00C0 \\ u0300 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
/* A + 030A + 0301 */
{ " \\ u01FA " , " \\ u01FA " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " \\ u030A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " \\ u030AA " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
/* blocked accent */
{ " \\ u01FA " , " A \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u030A \\ u0301 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 1 } } ,
{ " \\ u01FAA " , " \\ u0301A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u0F73 " , " \\ u0F73 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0F73 " , " \\ u0F71 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0F73 " , " \\ u0F72 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0F73 " , " \\ u0F71 \\ u0F72 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u0F73 " , " A \\ u0F71 " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u0F73A " , " \\ u0F72A " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u01FA A \\ u0301 \\ u030A A \\ u030A \\ u0301 A \\ u030A \\ u01FA " , " A \\ u030A " ,
NULL , UCOL_TERTIARY , NULL , { 0 , 6 , 10 , 13 , - 1 } , { 1 , 3 , 2 , 1 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData MATCHCANONICAL [ ] = {
{ " a busy bee is a very busy beeee " , " bee " , NULL , UCOL_TERTIARY , NULL ,
{ 7 , 26 , - 1 } , { 3 , 3 } } ,
/*012345678901234567890123456789012345678901234567890 */
{ " a busy bee is a very busy beeee with no bee life " , " bee " , NULL ,
UCOL_TERTIARY , NULL , { 7 , 26 , 40 , - 1 } , { 3 , 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData SUPPLEMENTARYCANONICAL [ ] = {
/*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
{ " abc \\ uD800 \\ uDC00 \\ uD800 \\ uDC01 \\ uD801 \\ uDC00 \\ uD800 \\ uDC00abc abc \\ uD800 \\ uDC00 \\ uD800 \\ uD800 \\ uDC00 \\ uD800 \\ uDC00 \\ uDC00 " ,
" \\ uD800 \\ uDC00 " , NULL , UCOL_TERTIARY , NULL , { 4 , 13 , 22 , 26 , 29 , - 1 } ,
{ 2 , 2 , 2 , 2 , 2 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
static const SearchData CONTRACTIONCANONICAL [ ] = {
/* common discontiguous */
{ " A \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } , { 2 } } ,
{ " A \\ u0300 \\ u0315 " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { 1 , - 1 } ,
{ 2 } } ,
/* contraction prefix */
{ " AB \\ u0315C " , " A " , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } } ,
{ " AB \\ u0315C " , " AB " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
{ " AB \\ u0315C " , " \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { 2 , - 1 } , { 1 } } ,
/* discontiguous problem here for backwards iteration.
forwards gives 0 , 4 but backwards give 1 , 3 */
/* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {0, -1},
{ 4 } } , */
/* ends not with a contraction character */
{ " X \\ u0315 \\ u0300D " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL , { - 1 } ,
{ 0 } } ,
{ " X \\ u0315 \\ u0300D " , " X \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , NULL ,
{ 0 , - 1 } , { 3 } } ,
{ " X \\ u0300 \\ u031A \\ u0315D " , " X \\ u0300 " , NULL , UCOL_TERTIARY , NULL ,
{ 0 , - 1 } , { 4 } } ,
/* blocked discontiguous */
{ " X \\ u0300 \\ u031A \\ u0315D " , " \\ u031A \\ u0315D " , NULL , UCOL_TERTIARY , NULL ,
{ 1 , - 1 } , { 4 } } ,
2001-09-11 21:40:05 +00:00
{ " ab " , " z " , NULL , UCOL_TERTIARY , NULL , { 0 , - 1 } , { 2 } } ,
2001-08-25 02:06:37 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , NULL , { - 1 } , { 0 } }
} ;
# endif