2016-06-15 18:58:17 +00:00
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
2001-08-25 02:06:37 +00:00
/********************************************************************
2016-05-31 21:45:07 +00:00
* Copyright ( c ) 2001 - 2011 , 2015 International Business Machines
* Corporation and others . All Rights Reserved .
2001-08-25 02:06:37 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* File USRCHDAT . H
* Modification History :
* Name date Description
* synwee July 31 2001 creation
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2006-09-28 08:40:44 +00:00
/*
Note : This file is included by other C and C + + files . This file should not be directly compiled .
*/
2001-08-25 02:06:37 +00:00
# ifndef USRCHDAT_C
# define USRCHDAT_C
2006-09-28 08:40:44 +00:00
# include "unicode/ucol.h"
2002-09-20 17:54:45 +00:00
# if !UCONFIG_NO_COLLATION
2008-05-23 04:22:28 +00:00
/* Set to 1 if matches must be on grapheme boundaries */
# define GRAPHEME_BOUNDARIES 1
2006-09-28 08:40:44 +00:00
U_CDECL_BEGIN
2001-08-25 02:06:37 +00:00
struct SearchData {
const char * text ;
const char * pattern ;
2010-02-27 01:01:45 +00:00
const char * collator ; /* currently supported "fr" "es" "de", plus NULL/other => "en" */
2001-08-25 02:06:37 +00:00
UCollationStrength strength ;
2010-02-27 01:01:45 +00:00
USearchAttributeValue elemCompare ; /* value for the USEARCH_ELEMENT_COMPARISON attribute */
const char * breaker ; /* currently supported "wordbreaker" for EN_WORDBREAKER_, plus NULL/other => EN_CHARACTERBREAKER_ */
2006-09-28 08:40:44 +00:00
int8_t offset [ 32 ] ;
uint8_t size [ 32 ] ;
2001-08-25 02:06:37 +00:00
} ;
2006-09-28 08:40:44 +00:00
U_CDECL_END
2001-08-25 02:06:37 +00:00
typedef struct SearchData SearchData ;
static const char * TESTCOLLATORRULE = " & o,O ; p,P " ;
static const char * EXTRACOLLATIONRULE = " & ae ; \\ u00e4 & AE ; \\ u00c4 & oe ; \\ u00f6 & OE ; \\ u00d6 & ue ; \\ u00fc & UE ; \\ u00dc " ;
static const SearchData BASIC [ ] = {
2010-02-27 01:01:45 +00:00
{ " xxxxxxxxxxxxxxxxxxxx " , " fisher " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " silly spring string " , " string " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 13 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 6 } } ,
2010-02-27 01:01:45 +00:00
{ " silly spring string string " , " string " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ 13 , 20 , - 1 } , { 6 , 6 } } ,
2010-02-27 01:01:45 +00:00
{ " silly string spring string " , " string " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ 6 , 20 , - 1 } , { 6 , 6 } } ,
2010-02-27 01:01:45 +00:00
{ " string spring string " , " string " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 14 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 6 , 6 } } ,
2010-02-27 01:01:45 +00:00
{ " Scott Ganyo " , " c " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 1 } } ,
{ " Scott Ganyo " , " " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 5 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300b " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 2 } } ,
{ " a \\ u0300b " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ " \\ u00c9 " , " e " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
2011-03-22 03:50:26 +00:00
{ " x a \\ u0301 " , " a \\ u0301 " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 2 , - 1 } , { 2 } } ,
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
2002-03-19 21:50:15 +00:00
static const SearchData BREAKITERATOREXACT [ ] = {
2010-02-27 01:01:45 +00:00
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " characterbreaker " , { 0 , 5 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 3 , 3 } } ,
2010-02-27 01:01:45 +00:00
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " , { 5 , - 1 } , { 3 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2001-08-25 02:06:37 +00:00
" characterbreaker " , { 10 , 14 , - 1 } , { 3 , 2 } } ,
2010-02-27 01:01:45 +00:00
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " ,
2001-08-25 02:06:37 +00:00
{ 10 , - 1 } , { 3 } } ,
{ " Channel, another channel, more channels, and one last Channel " ,
2010-02-27 01:01:45 +00:00
" Channel " , " es " , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " , { 0 , 54 , - 1 } , { 7 , 7 } } ,
2002-03-20 16:39:37 +00:00
/* jitterbug 1745 */
2010-02-27 01:01:45 +00:00
{ " testing that \\ u00e9 does not match e " , " e " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2002-03-19 21:50:15 +00:00
" characterbreaker " , { 1 , 17 , 30 , - 1 } , { 1 , 1 , 1 } } ,
{ " testing that string ab \\ u00e9cd does not match e " , " e " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " characterbreaker " , { 1 , 28 , 41 , - 1 } , { 1 , 1 , 1 } } ,
{ " \\ u00c9 " , " e " , " fr " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " characterbreaker " , { 0 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
#if 0
/* Problem reported by Dave Bertoni, same as ticket 4279? */
2010-02-27 01:01:45 +00:00
{ " \\ u0043 \\ u004F \\ u0302 \\ u0054 \\ u00C9 " , " \\ u004F " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " characterbreaker " , { 1 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
} ;
# define PECHE_WITH_ACCENTS "un p\\u00E9ch\\u00E9, " \
" \\ u00E7a p \\ u00E8che par, " \
" p \\ u00E9cher, " \
" une p \\ u00EAche, " \
" un p \\ u00EAcher, " \
" j \\ u2019ai p \\ u00EAch \\ u00E9, " \
" un p \\ u00E9cheur, " \
" \\ u201Cp \\ u00E9che \\ u201D, " \
" decomp peche \\ u0301, " \
" base peche "
/* in the above, the interesting words and their offsets are:
3 pe < 301 > che < 301 >
13 pe < 300 > che
24 pe < 301 > cher
36 pe < 302 > che
46 pe < 302 > cher
59 pe < 302 > che < 301 >
69 pe < 301 > cheur
79 pe < 301 > che
94 peche < + 301 >
107 peche
*/
2001-08-25 02:06:37 +00:00
static const SearchData STRENGTH [ ] = {
2010-02-27 01:01:45 +00:00
/*012345678901234567890123456789012345678901234567890123456789*/
/*00*/ { " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
/*01*/ { " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " , { 16 , - 1 } , { 3 } } ,
/*02*/ { " blackbirds Pat p \\ u00E9ch \\ u00E9 p \\ u00EAche p \\ u00E9cher p \\ u00EAcher Tod T \\ u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe " ,
" peche " , " fr " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 15 , 21 , 27 , 34 , - 1 } , { 5 , 5 , 5 , 5 } } ,
/*03*/ { " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
{ 10 , 14 , - 1 } , { 3 , 2 } } ,
/*04*/ { " A channel, another CHANNEL, more Channels, and one last channel... " ,
" channel " , " es " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 2 , 19 , 33 , 56 , - 1 } , { 7 , 7 , 7 , 7 } } ,
/*05*/ { " \\ u00c0 should match but not A " , " A \\ u0300 " , " en " , UCOL_IDENTICAL , USEARCH_STANDARD_ELEMENT_COMPARISON ,
NULL , { 0 , - 1 } , { 1 , 0 } } ,
/* some tests for modified element comparison, ticket #7093 */
/*06*/ { PECHE_WITH_ACCENTS , " peche " , " en " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , 13 , 24 , 36 , 46 , 59 , 69 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*07*/ { PECHE_WITH_ACCENTS , " peche " , " en " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " , { 3 , 13 , 36 , 59 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*08*/ { PECHE_WITH_ACCENTS , " peche " , " en " , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 107 , - 1 } , { 5 } } ,
/*09*/ { PECHE_WITH_ACCENTS , " peche " , " en " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 13 , 24 , 36 , 46 , 59 , 69 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*10*/ { PECHE_WITH_ACCENTS , " peche " , " en " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 13 , 36 , 59 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*11*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " en " , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 24 , 69 , 79 , - 1 } , { 5 , 5 , 5 } } ,
/*12*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " en " , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " , { 79 , - 1 } , { 5 } } ,
/*13*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " en " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 24 , 69 , 79 , - 1 } , { 5 , 5 , 5 , 5 } } ,
/*14*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " en " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 79 , - 1 } , { 5 , 5 } } ,
/*15*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " en " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 24 , 69 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*16*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " en " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 6 , 5 } } ,
/*17*/ { PECHE_WITH_ACCENTS , " pech \\ u00E9 " , " en " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 59 , 94 , - 1 } , { 5 , 5 , 6 } } ,
/*18*/ { PECHE_WITH_ACCENTS , " pech \\ u00E9 " , " en " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 59 , 94 , - 1 } , { 5 , 5 , 6 } } ,
/*19*/ { PECHE_WITH_ACCENTS , " pech \\ u00E9 " , " en " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 13 , 24 , 36 , 46 , 59 , 69 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*20*/ { PECHE_WITH_ACCENTS , " pech \\ u00E9 " , " en " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 13 , 36 , 59 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*21*/ { PECHE_WITH_ACCENTS , " peche \\ u0301 " , " en " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 59 , 94 , - 1 } , { 5 , 5 , 6 } } ,
/*22*/ { PECHE_WITH_ACCENTS , " peche \\ u0301 " , " en " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 59 , 94 , - 1 } , { 5 , 5 , 6 } } ,
/*23*/ { PECHE_WITH_ACCENTS , " peche \\ u0301 " , " en " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 13 , 24 , 36 , 46 , 59 , 69 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*24*/ { PECHE_WITH_ACCENTS , " peche \\ u0301 " , " en " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 13 , 36 , 59 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/* more tests for modified element comparison (with fr), ticket #7093 */
/*25*/ { PECHE_WITH_ACCENTS , " peche " , " fr " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , 13 , 24 , 36 , 46 , 59 , 69 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*26*/ { PECHE_WITH_ACCENTS , " peche " , " fr " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " , { 3 , 13 , 36 , 59 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*27*/ { PECHE_WITH_ACCENTS , " peche " , " fr " , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 107 , - 1 } , { 5 } } ,
/*28*/ { PECHE_WITH_ACCENTS , " peche " , " fr " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 13 , 24 , 36 , 46 , 59 , 69 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*29*/ { PECHE_WITH_ACCENTS , " peche " , " fr " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 13 , 36 , 59 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*30*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " fr " , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 24 , 69 , 79 , - 1 } , { 5 , 5 , 5 } } ,
/*31*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " fr " , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " , { 79 , - 1 } , { 5 } } ,
/*32*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " fr " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 24 , 69 , 79 , - 1 } , { 5 , 5 , 5 , 5 } } ,
/*33*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " fr " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 79 , - 1 } , { 5 , 5 } } ,
/*34*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " fr " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 24 , 69 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*35*/ { PECHE_WITH_ACCENTS , " p \\ u00E9che " , " fr " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 6 , 5 } } ,
/*36*/ { PECHE_WITH_ACCENTS , " pech \\ u00E9 " , " fr " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 59 , 94 , - 1 } , { 5 , 5 , 6 } } ,
/*37*/ { PECHE_WITH_ACCENTS , " pech \\ u00E9 " , " fr " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 59 , 94 , - 1 } , { 5 , 5 , 6 } } ,
/*38*/ { PECHE_WITH_ACCENTS , " pech \\ u00E9 " , " fr " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 13 , 24 , 36 , 46 , 59 , 69 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*39*/ { PECHE_WITH_ACCENTS , " pech \\ u00E9 " , " fr " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 13 , 36 , 59 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*40*/ { PECHE_WITH_ACCENTS , " peche \\ u0301 " , " fr " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 59 , 94 , - 1 } , { 5 , 5 , 6 } } ,
/*41*/ { PECHE_WITH_ACCENTS , " peche \\ u0301 " , " fr " , UCOL_SECONDARY , USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 59 , 94 , - 1 } , { 5 , 5 , 6 } } ,
/*42*/ { PECHE_WITH_ACCENTS , " peche \\ u0301 " , " fr " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , NULL , { 3 , 13 , 24 , 36 , 46 , 59 , 69 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
/*43*/ { PECHE_WITH_ACCENTS , " peche \\ u0301 " , " fr " , UCOL_SECONDARY , USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD , " wordbreaker " , { 3 , 13 , 36 , 59 , 79 , 94 , 107 , - 1 } , { 5 , 5 , 5 , 5 , 5 , 6 , 5 } } ,
2008-05-23 04:22:28 +00:00
#if 0
/* Ticket 5382 */
2010-02-27 01:01:45 +00:00
{ " 12 \\ u0171 " , " \\ u0170 " , NULL , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 2 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData VARIABLE [ ] = {
/*012345678901234567890123456789012345678901234567890123456789*/
{ " blackbirds black blackbirds blackbird black-bird " ,
2010-02-27 01:01:45 +00:00
" blackbird " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 17 , 28 , 38 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 9 , 9 , 9 , 10 } } ,
/* to see that it doesn't go into an infinite loop if the start of text
is a ignorable character */
2010-02-27 01:01:45 +00:00
{ " on " , " go " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " abcdefghijklmnopqrstuvwxyz " , " " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 ,
20 , 21 , 22 , 23 , 24 , 25 , - 1 } , { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } } ,
/* testing tightest match */
2010-02-27 01:01:45 +00:00
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_QUATERNARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2001-08-25 02:06:37 +00:00
NULL , { 1 , - 1 } , { 3 } } ,
/*012345678901234567890123456789012345678901234567890123456789 */
2010-02-27 01:01:45 +00:00
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2001-08-25 02:06:37 +00:00
NULL , { 1 , 6 , 13 , 21 , 31 , - 1 } , { 3 , 4 , 4 , 5 , 5 } } ,
/* totally ignorable text */
2010-02-27 01:01:45 +00:00
{ " --------------- " , " abc " , NULL , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2001-08-25 02:06:37 +00:00
NULL , { - 1 } , { 0 } } ,
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData NORMEXACT [ ] = {
2010-02-27 01:01:45 +00:00
{ " a \\ u0300 \\ u0325 " , " a \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 3 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData NONNORMEXACT [ ] = {
2010-02-27 01:01:45 +00:00
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData OVERLAP [ ] = {
2010-02-27 01:01:45 +00:00
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 2 , 4 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 4 , 4 , 4 } } ,
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData NONOVERLAP [ ] = {
2010-02-27 01:01:45 +00:00
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 4 , - 1 } , { 4 , 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData COLLATOR [ ] = {
/* english */
2010-02-27 01:01:45 +00:00
{ " fox fpx " , " fox " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 3 } } ,
2001-08-25 02:06:37 +00:00
/* tailored */
2010-02-27 01:01:45 +00:00
{ " fox fpx " , " fox " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 4 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData PATTERN [ ] = {
{ " The quick brown fox jumps over the lazy foxes " , " the " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 31 , - 1 } , { 3 , 3 } } ,
2001-08-25 02:06:37 +00:00
{ " The quick brown fox jumps over the lazy foxes " , " fox " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData TEXT [ ] = {
2010-02-27 01:01:45 +00:00
{ " the foxy brown fox " , " fox " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 4 , 15 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 3 , 3 } } ,
2010-02-27 01:01:45 +00:00
{ " the quick brown fox " , " fox " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 16 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 3 } } ,
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData COMPOSITEBOUNDARIES [ ] = {
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " A \\ u00C0C " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u00C0A " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 1 } } ,
{ " B \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u00C0B " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u0300 \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u00C0C " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " \\ u00C0A " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " B \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 1 } } ,
{ " \\ u00C0B " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 1 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 1 , 1 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ " \\ u00C0 \\ u0300 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2001-08-25 02:06:37 +00:00
/* A + 030A + 0301 */
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u01FA " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " A \\ u030A \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " \\ u030A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u030AA " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " A \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u030A \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u030A \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ " A \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FAA " , " \\ u0301A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73 " , " \\ u0F73 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0F73 " , " \\ u0F71 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73 " , " \\ u0F72 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73 " , " \\ u0F71 \\ u0F72 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u0F73 " , " A \\ u0F71 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73A " , " \\ u0F72A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
/* Ticket 5024 */
2010-02-27 01:01:45 +00:00
{ " a \\ u00e1 " , " a \\ u00e1 " , NULL , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
/* Ticket 5420 */
2010-02-27 01:01:45 +00:00
{ " fu \\ u00dfball " , " fu \\ u00df " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 3 } } ,
{ " fu \\ u00dfball " , " fuss " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 3 } } ,
{ " fu \\ u00dfball " , " uss " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData MATCH [ ] = {
2010-02-27 01:01:45 +00:00
{ " a busy bee is a very busy beeee " , " bee " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ 7 , 26 , - 1 } , { 3 , 3 } } ,
/* 012345678901234567890123456789012345678901234567890 */
{ " a busy bee is a very busy beeee with no bee life " , " bee " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 7 , 26 , 40 , - 1 } , { 3 , 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData SUPPLEMENTARY [ ] = {
/* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
{ " abc \\ uD800 \\ uDC00 \\ uD800 \\ uDC01 \\ uD801 \\ uDC00 \\ uD800 \\ uDC00abc abc \\ uD800 \\ uDC00 \\ uD800 \\ uD800 \\ uDC00 \\ uD800 \\ uDC00 \\ uDC00 " ,
2010-02-27 01:01:45 +00:00
" \\ uD800 \\ uDC00 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 4 , 13 , 22 , 26 , 29 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 2 , 2 , 2 , 2 , 2 } } ,
2003-07-31 19:31:58 +00:00
{ " and \\ uD834 \\ uDDB9this sentence " , " \\ uD834 \\ uDDB9 " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , - 1 } , { 2 } } ,
2003-07-31 19:31:58 +00:00
{ " and \\ uD834 \\ uDDB9 this sentence " , " \\ uD834 \\ uDDB9 " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , - 1 } , { 4 } } ,
2003-07-31 19:31:58 +00:00
{ " and- \\ uD834 \\ uDDB9-this sentence " , " - \\ uD834 \\ uDDB9- " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , - 1 } , { 4 } } ,
2003-07-31 19:31:58 +00:00
{ " and, \\ uD834 \\ uDDB9,this sentence " , " , \\ uD834 \\ uDDB9, " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , - 1 } , { 4 } } ,
2003-07-31 19:31:58 +00:00
{ " and? \\ uD834 \\ uDDB9?this sentence " , " ? \\ uD834 \\ uDDB9? " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , - 1 } , { 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
2001-09-11 21:40:05 +00:00
static const char * CONTRACTIONRULE =
" &z = ab/c < AB < X \\ u0300 < ABC < X \\ u0300 \\ u0315 " ;
2001-08-25 02:06:37 +00:00
static const SearchData CONTRACTION [ ] = {
/* common discontiguous */
2010-02-27 01:01:45 +00:00
{ " A \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " A \\ u0300 \\ u0315 " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " A \\ u0300 \\ u0315 " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2001-08-25 02:06:37 +00:00
/* contraction prefix */
2010-02-27 01:01:45 +00:00
{ " AB \\ u0315C " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " AB \\ u0315C " , " AB " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " AB \\ u0315C " , " \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " AB \\ u0315C " , " AB " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
{ " AB \\ u0315C " , " \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 2 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# endif
2001-08-25 02:06:37 +00:00
/* discontiguous problem here for backwards iteration.
accents not found because discontiguous stores all information */
2010-02-27 01:01:45 +00:00
{ " X \\ u0300 \\ u0319 \\ u0315 " , " \\ u0319 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } ,
2001-08-25 02:06:37 +00:00
{ 0 } } ,
/* ends not with a contraction character */
2010-02-27 01:01:45 +00:00
{ " X \\ u0315 \\ u0300D " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } ,
2001-08-25 02:06:37 +00:00
{ 0 } } ,
2010-02-27 01:01:45 +00:00
{ " X \\ u0315 \\ u0300D " , " X \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ 0 , - 1 } , { 3 } } ,
2010-02-27 01:01:45 +00:00
{ " X \\ u0300 \\ u031A \\ u0315D " , " X \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } ,
2001-08-25 02:06:37 +00:00
{ 0 } } ,
/* blocked discontiguous */
2010-02-27 01:01:45 +00:00
{ " X \\ u0300 \\ u031A \\ u0315D " , " \\ u031A \\ u0315D " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
/*
* " ab " generates a contraction that ' s an expansion . The " z " matches the
* first CE of the expansion but the match fails because it ends in the
* middle of an expansion . . .
*/
2010-02-27 01:01:45 +00:00
{ " ab " , " z " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " ab " , " z " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const char * IGNORABLERULE = " &a = \\ u0300 " ;
static const SearchData IGNORABLE [ ] = {
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
/*
* This isn ' t much of a test when matches have to be on
* grapheme boundiaries . The match at 0 only works because
* it ' s at the start of the text .
*/
2010-02-27 01:01:45 +00:00
{ " \\ u0300 \\ u0315 \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2008-05-23 04:22:28 +00:00
{ 0 , - 1 } , { 2 } } ,
# else
2010-02-27 01:01:45 +00:00
{ " \\ u0300 \\ u0315 \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2007-10-18 22:29:39 +00:00
{ 0 , 3 , - 1 } , { 2 , 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData BASICCANONICAL [ ] = {
2010-02-27 01:01:45 +00:00
{ " xxxxxxxxxxxxxxxxxxxx " , " fisher " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " silly spring string " , " string " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 13 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 6 } } ,
2010-02-27 01:01:45 +00:00
{ " silly spring string string " , " string " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ 13 , 20 , - 1 } , { 6 , 6 } } ,
2010-02-27 01:01:45 +00:00
{ " silly string spring string " , " string " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ 6 , 20 , - 1 } , { 6 , 6 } } ,
2010-02-27 01:01:45 +00:00
{ " string spring string " , " string " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 14 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 6 , 6 } } ,
2010-02-27 01:01:45 +00:00
{ " Scott Ganyo " , " c " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 1 } } ,
{ " Scott Ganyo " , " " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 5 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300b " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325b " , " \\ u0300b " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u0325 \\ u0300A \\ u0325 \\ u0300 " , " \\ u0300A \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2008-05-23 04:22:28 +00:00
NULL , { - 1 } , { 0 } } ,
2010-02-27 01:01:45 +00:00
{ " \\ u0325 \\ u0300A \\ u0325 \\ u0300 " , " \\ u0325A \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2008-05-23 04:22:28 +00:00
NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325b \\ u0300 \\ u0325c \\ u0325b \\ u0300 \\ u0300b \\ u0325 " ,
2010-02-27 01:01:45 +00:00
" \\ u0300b \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 2 } } ,
2010-02-27 01:01:45 +00:00
{ " a \\ u0300b " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 1 } } ,
{ " a \\ u0300 \\ u0325b " , " \\ u0300b " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 3 } } ,
{ " \\ u0325 \\ u0300A \\ u0325 \\ u0300 " , " \\ u0300A \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2001-09-07 21:57:32 +00:00
NULL , { 0 , - 1 } , { 5 } } ,
2010-02-27 01:01:45 +00:00
{ " \\ u0325 \\ u0300A \\ u0325 \\ u0300 " , " \\ u0325A \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2001-09-07 21:57:32 +00:00
NULL , { 0 , - 1 } , { 5 } } ,
2001-08-25 02:06:37 +00:00
{ " a \\ u0300 \\ u0325b \\ u0300 \\ u0325c \\ u0325b \\ u0300 \\ u0300b \\ u0325 " ,
2010-02-27 01:01:45 +00:00
" \\ u0300b \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , 12 , - 1 } , { 5 , 3 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ " \\ u00c4 \\ u0323 " , " A \\ u0323 \\ u0308 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u0308 \\ u0323 " , " \\ u0323 \\ u0308 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
2008-05-23 04:22:28 +00:00
2001-08-25 02:06:37 +00:00
static const SearchData NORMCANONICAL [ ] = {
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
/*
* These tests don ' t really mean anything . With matches restricted to grapheme
* boundaries , isCanonicalMatch doesn ' t mean anything unless normalization is
* also turned on . . .
*/
2010-02-27 01:01:45 +00:00
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u0300 \\ u0325 " , " \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u0300 \\ u0325 " , " \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0325 \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 2 } } ,
2010-02-27 01:01:45 +00:00
{ " a \\ u0300 \\ u0325 " , " \\ u0300 \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 2 } } ,
2010-02-27 01:01:45 +00:00
{ " a \\ u0300 \\ u0325 " , " \\ u0325 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 2 } } ,
{ " a \\ u0300 \\ u0325 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData BREAKITERATORCANONICAL [ ] = {
2010-02-27 01:01:45 +00:00
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " characterbreaker " , { 0 , 5 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 3 , 3 } } ,
2010-02-27 01:01:45 +00:00
{ " foxy fox " , " fox " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " , { 5 , - 1 } , { 3 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2001-08-25 02:06:37 +00:00
" characterbreaker " , { 10 , 14 , - 1 } , { 3 , 2 } } ,
2010-02-27 01:01:45 +00:00
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " ,
2001-08-25 02:06:37 +00:00
{ 10 , - 1 } , { 3 } } ,
{ " Channel, another channel, more channels, and one last Channel " ,
2010-02-27 01:01:45 +00:00
" Channel " , " es " , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " , { 0 , 54 , - 1 } , { 7 , 7 } } ,
2002-03-20 16:39:37 +00:00
/* jitterbug 1745 */
2010-02-27 01:01:45 +00:00
{ " testing that \\ u00e9 does not match e " , " e " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2002-03-19 21:50:15 +00:00
" characterbreaker " , { 1 , 17 , 30 , - 1 } , { 1 , 1 , 1 } } ,
{ " testing that string ab \\ u00e9cd does not match e " , " e " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " characterbreaker " , { 1 , 28 , 41 , - 1 } , { 1 , 1 , 1 } } ,
{ " \\ u00c9 " , " e " , " fr " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " characterbreaker " , { 0 , - 1 } , { 1 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData STRENGTHCANONICAL [ ] = {
/*012345678901234567890123456789012345678901234567890123456789 */
{ " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
2010-02-27 01:01:45 +00:00
UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
2001-08-25 02:06:37 +00:00
{ " The quick brown fox jumps over the lazy foxes " , " fox " , " en " ,
2010-02-27 01:01:45 +00:00
UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , " wordbreaker " , { 16 , - 1 } , { 3 } } ,
2001-08-25 02:06:37 +00:00
{ " blackbirds Pat p \\ u00E9ch \\ u00E9 p \\ u00EAche p \\ u00E9cher p \\ u00EAcher Tod T \\ u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe " ,
2010-02-27 01:01:45 +00:00
" peche " , " fr " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 15 , 21 , 27 , 34 , - 1 } , { 5 , 5 , 5 , 5 } } ,
{ " This is a toe T \\ u00F6ne " , " toe " , " de " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ 10 , 14 , - 1 } , { 3 , 2 } } ,
{ " A channel, another CHANNEL, more Channels, and one last channel... " ,
2010-02-27 01:01:45 +00:00
" channel " , " es " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 2 , 19 , 33 , 56 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 7 , 7 , 7 , 7 } } ,
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData VARIABLECANONICAL [ ] = {
/*012345678901234567890123456789012345678901234567890123456789 */
{ " blackbirds black blackbirds blackbird black-bird " ,
2010-02-27 01:01:45 +00:00
" blackbird " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 17 , 28 , 38 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 9 , 9 , 9 , 10 } } ,
/* to see that it doesn't go into an infinite loop if the start of text
is a ignorable character */
2010-02-27 01:01:45 +00:00
{ " on " , " go " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " abcdefghijklmnopqrstuvwxyz " , " " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 ,
20 , 21 , 22 , 23 , 24 , 25 , - 1 } , { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } } ,
/* testing tightest match */
2010-02-27 01:01:45 +00:00
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_QUATERNARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2001-08-25 02:06:37 +00:00
NULL , { 1 , - 1 } , { 3 } } ,
/*012345678901234567890123456789012345678901234567890123456789 */
2010-02-27 01:01:45 +00:00
{ " abc a bc ab c a bc ab c " , " abc " , NULL , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2001-08-25 02:06:37 +00:00
NULL , { 1 , 6 , 13 , 21 , 31 , - 1 } , { 3 , 4 , 4 , 5 , 5 } } ,
/* totally ignorable text */
2010-02-27 01:01:45 +00:00
{ " --------------- " , " abc " , NULL , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON ,
2001-08-25 02:06:37 +00:00
NULL , { - 1 } , { 0 } } ,
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData OVERLAPCANONICAL [ ] = {
2010-02-27 01:01:45 +00:00
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 2 , 4 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 4 , 4 , 4 } } ,
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData NONOVERLAPCANONICAL [ ] = {
2010-02-27 01:01:45 +00:00
{ " abababab " , " abab " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 4 , - 1 } , { 4 , 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData COLLATORCANONICAL [ ] = {
/* english */
2010-02-27 01:01:45 +00:00
{ " fox fpx " , " fox " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 3 } } ,
2001-08-25 02:06:37 +00:00
/* tailored */
2010-02-27 01:01:45 +00:00
{ " fox fpx " , " fox " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 4 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData PATTERNCANONICAL [ ] = {
{ " The quick brown fox jumps over the lazy foxes " , " the " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 31 , - 1 } , { 3 , 3 } } ,
2001-08-25 02:06:37 +00:00
{ " The quick brown fox jumps over the lazy foxes " , " fox " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 16 , 40 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData TEXTCANONICAL [ ] = {
2010-02-27 01:01:45 +00:00
{ " the foxy brown fox " , " fox " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 4 , 15 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 3 , 3 } } ,
2010-02-27 01:01:45 +00:00
{ " the quick brown fox " , " fox " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 16 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 3 } } ,
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData COMPOSITEBOUNDARIESCANONICAL [ ] = {
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " A \\ u00C0C " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u00C0A " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 1 } } ,
{ " B \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u00C0B " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
/* first one matches only because it's at the start of the text */
2010-02-27 01:01:45 +00:00
{ " \\ u0300 \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
/* \\u0300 blocked by \\u0300 */
2010-02-27 01:01:45 +00:00
{ " \\ u00C0 \\ u0300 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u00C0C " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " \\ u00C0A " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 1 , - 1 } , { 1 , 1 } } ,
{ " B \\ u00C0 " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 1 } } ,
{ " \\ u00C0B " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0300 \\ u00C0 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 1 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 1 , 1 } } ,
/* \\u0300 blocked by \\u0300 */
2010-02-27 01:01:45 +00:00
{ " \\ u00C0 \\ u0300 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2001-08-25 02:06:37 +00:00
/* A + 030A + 0301 */
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u01FA " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " A \\ u030A \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u030A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u030A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u030AA " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# endif
2001-08-25 02:06:37 +00:00
/* blocked accent */
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " A \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FA " , " \\ u0301A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u030A \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " A \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u01FAA " , " \\ u0301A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " \\ u01FA " , " \\ u030A \\ u0301 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " A \\ u01FA " , " A \\ u030A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 1 } } ,
{ " \\ u01FAA " , " \\ u0301A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ " \\ u0F73 " , " \\ u0F73 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " \\ u0F73 " , " \\ u0F71 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73 " , " \\ u0F72 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " \\ u0F73 " , " \\ u0F71 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
{ " \\ u0F73 " , " \\ u0F72 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ " \\ u0F73 " , " \\ u0F71 \\ u0F72 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " A \\ u0F73 " , " A \\ u0F71 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " \\ u0F73A " , " \\ u0F72A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
{ " \\ u01FA A \\ u0301 \\ u030A A \\ u030A \\ u0301 A \\ u030A \\ u01FA " , " A \\ u030A " ,
2010-02-27 01:01:45 +00:00
NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 10 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " A \\ u0F73 " , " A \\ u0F71 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
{ " \\ u0F73A " , " \\ u0F72A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
2001-08-25 02:06:37 +00:00
{ " \\ u01FA A \\ u0301 \\ u030A A \\ u030A \\ u0301 A \\ u030A \\ u01FA " , " A \\ u030A " ,
2010-02-27 01:01:45 +00:00
NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 6 , 10 , 13 , - 1 } , { 1 , 3 , 2 , 1 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData MATCHCANONICAL [ ] = {
2010-02-27 01:01:45 +00:00
{ " a busy bee is a very busy beeee " , " bee " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL ,
2001-08-25 02:06:37 +00:00
{ 7 , 26 , - 1 } , { 3 , 3 } } ,
/*012345678901234567890123456789012345678901234567890 */
{ " a busy bee is a very busy beeee with no bee life " , " bee " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 7 , 26 , 40 , - 1 } , { 3 , 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData SUPPLEMENTARYCANONICAL [ ] = {
/*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
{ " abc \\ uD800 \\ uDC00 \\ uD800 \\ uDC01 \\ uD801 \\ uDC00 \\ uD800 \\ uDC00abc abc \\ uD800 \\ uDC00 \\ uD800 \\ uD800 \\ uDC00 \\ uD800 \\ uDC00 \\ uDC00 " ,
2010-02-27 01:01:45 +00:00
" \\ uD800 \\ uDC00 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 4 , 13 , 22 , 26 , 29 , - 1 } ,
2001-08-25 02:06:37 +00:00
{ 2 , 2 , 2 , 2 , 2 } } ,
2003-07-31 19:31:58 +00:00
{ " and \\ uD834 \\ uDDB9this sentence " , " \\ uD834 \\ uDDB9 " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , - 1 } , { 2 } } ,
2003-07-31 19:31:58 +00:00
{ " and \\ uD834 \\ uDDB9 this sentence " , " \\ uD834 \\ uDDB9 " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , - 1 } , { 4 } } ,
2003-07-31 19:31:58 +00:00
{ " and- \\ uD834 \\ uDDB9-this sentence " , " - \\ uD834 \\ uDDB9- " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , - 1 } , { 4 } } ,
2003-07-31 19:31:58 +00:00
{ " and, \\ uD834 \\ uDDB9,this sentence " , " , \\ uD834 \\ uDDB9, " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , - 1 } , { 4 } } ,
2003-07-31 19:31:58 +00:00
{ " and? \\ uD834 \\ uDDB9?this sentence " , " ? \\ uD834 \\ uDDB9? " , NULL ,
2010-02-27 01:01:45 +00:00
UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 3 , - 1 } , { 4 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
static const SearchData CONTRACTIONCANONICAL [ ] = {
/* common discontiguous */
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " A \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " A \\ u0300 \\ u0315 " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " A \\ u0300 \\ u0315 " , " \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 2 } } ,
{ " A \\ u0300 \\ u0315 " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2001-08-25 02:06:37 +00:00
/* contraction prefix */
2010-02-27 01:01:45 +00:00
{ " AB \\ u0315C " , " A " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " AB \\ u0315C " , " AB " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " AB \\ u0315C " , " \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " AB \\ u0315C " , " AB " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
{ " AB \\ u0315C " , " \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 2 , - 1 } , { 1 } } ,
2008-05-23 04:22:28 +00:00
# endif
2001-08-25 02:06:37 +00:00
/* discontiguous problem here for backwards iteration.
forwards gives 0 , 4 but backwards give 1 , 3 */
2010-02-27 01:01:45 +00:00
/* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1},
2001-08-25 02:06:37 +00:00
{ 4 } } , */
/* ends not with a contraction character */
2010-02-27 01:01:45 +00:00
{ " X \\ u0315 \\ u0300D " , " \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
{ " X \\ u0315 \\ u0300D " , " X \\ u0300 \\ u0315 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 3 } } ,
2008-05-23 04:22:28 +00:00
# if GRAPHEME_BOUNDARIES
2010-02-27 01:01:45 +00:00
{ " X \\ u0300 \\ u031A \\ u0315D " , " X \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
2001-08-25 02:06:37 +00:00
/* blocked discontiguous */
2010-02-27 01:01:45 +00:00
{ " X \\ u0300 \\ u031A \\ u0315D " , " \\ u031A \\ u0315D " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } } ,
2008-05-23 04:22:28 +00:00
/*
* " ab " generates a contraction that ' s an expansion . The " z " matches the
* first CE of the expansion but the match fails because it ends in the
* middle of an expansion . . .
*/
2010-02-27 01:01:45 +00:00
{ " ab " , " z " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# else
2010-02-27 01:01:45 +00:00
{ " X \\ u0300 \\ u031A \\ u0315D " , " X \\ u0300 " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 4 } } ,
2008-05-23 04:22:28 +00:00
/* blocked discontiguous */
2010-02-27 01:01:45 +00:00
{ " X \\ u0300 \\ u031A \\ u0315D " , " \\ u031A \\ u0315D " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 4 } } ,
2008-05-23 04:22:28 +00:00
2010-02-27 01:01:45 +00:00
{ " ab " , " z " , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , - 1 } , { 2 } } ,
2008-05-23 04:22:28 +00:00
# endif
2010-02-27 01:01:45 +00:00
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2001-08-25 02:06:37 +00:00
} ;
2008-05-23 04:22:28 +00:00
static const SearchData DIACRITICMATCH [ ] = {
2011-03-22 03:50:26 +00:00
{ " \\ u03BA \\ u03B1 \\ u03B9 \\ u0300 \\ u0020 \\ u03BA \\ u03B1 \\ u1F76 " , " \\ u03BA \\ u03B1 \\ u03B9 " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 5 , - 1 } , { 4 , 3 } } ,
{ " \\ u0061 \\ u0061 \\ u00E1 " , " \\ u0061 \\ u00E1 " , NULL , UCOL_SECONDARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , - 1 } , { 2 } } ,
{ " \\ u0020 \\ u00C2 \\ u0303 \\ u0020 \\ u0041 \\ u0061 \\ u1EAA \\ u0041 \\ u0302 \\ u0303 \\ u00C2 \\ u0303 \\ u1EAB \\ u0061 \\ u0302 \\ u0303 \\ u00E2 \\ u0303 \\ uD806 \\ uDC01 \\ u0300 \\ u0020 " ,
" \\ u00C2 \\ u0303 " , " LDE_AN_CX_EX_FX_HX_NX_S1 " , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 1 , 4 , 5 , 6 , 7 , 10 , 12 , 13 , 16 , - 1 } , { 2 , 1 , 1 , 1 , 3 , 2 , 1 , 3 , 2 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
2007-10-23 19:09:59 +00:00
} ;
2015-09-13 18:58:19 +00:00
static const SearchData INDICPREFIXMATCH [ ] = {
2015-09-13 07:43:51 +00:00
{ " \\ u0915 \\ u0020 \\ u0915 \\ u0901 \\ u0020 \\ u0915 \\ u0902 \\ u0020 \\ u0915 \\ u0903 \\ u0020 \\ u0915 \\ u0940 \\ u0020 \\ u0915 \\ u093F \\ u0020 \\ u0915 \\ u0943 \\ u0020 \\ u0915 \\ u093C \\ u0020 \\ u0958 " ,
" \\ u0915 " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 2 , 5 , 8 , 11 , 14 , 17 , 20 , 23 , - 1 } , { 1 , 2 , 2 , 2 , 1 , 1 , 1 , 2 , 1 } } ,
{ " \\ u0915 \\ u0924 \\ u0020 \\ u0915 \\ u0924 \\ u0940 \\ u0020 \\ u0915 \\ u0924 \\ u093F \\ u0020 \\ u0915 \\ u0924 \\ u0947 \\ u0020 \\ u0915 \\ u0943 \\ u0924 \\ u0020 \\ u0915 \\ u0943 \\ u0924 \\ u0947 " ,
" \\ u0915 \\ u0924 " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 0 , 3 , 7 , 11 , - 1 } , { 2 , 2 , 2 , 2 } } ,
{ " \\ u0915 \\ u0924 \\ u0020 \\ u0915 \\ u0924 \\ u0940 \\ u0020 \\ u0915 \\ u0924 \\ u093F \\ u0020 \\ u0915 \\ u0924 \\ u0947 \\ u0020 \\ u0915 \\ u0943 \\ u0924 \\ u0020 \\ u0915 \\ u0943 \\ u0924 \\ u0947 " ,
" \\ u0915 \\ u0943 \\ u0924 " , NULL , UCOL_PRIMARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { 15 , 19 , - 1 } , { 3 , 3 } } ,
{ NULL , NULL , NULL , UCOL_TERTIARY , USEARCH_STANDARD_ELEMENT_COMPARISON , NULL , { - 1 } , { 0 } }
} ;
2002-09-20 17:54:45 +00:00
# endif /* #if !UCONFIG_NO_COLLATION */
2001-08-25 02:06:37 +00:00
# endif