ICU-1030
Added Unicode Boyer Moore string search tests. X-SVN-Rev: 5588
This commit is contained in:
parent
75adf07067
commit
e7f55fd981
@ -58,7 +58,7 @@ cnmdptst.o cnormtst.o cnumtst.o cregrtst.o crestst.o creststn.o cturtst.o \
|
||||
cucdtst.o cutiltst.o encoll.o nucnvtst.o susctest.o nccbtst.o \
|
||||
cbiditst.o cbididat.o eurocreg.o udatatst.o utf16tst.o utransts.o \
|
||||
ncnvfbts.o ncnvtst.o putiltst.o cstrtest.o mstrmtst.o utf8tst.o ucmptst.o \
|
||||
stdnmtst.o ucmpetst.o ctstdep.o
|
||||
stdnmtst.o ucmpetst.o ctstdep.o usrchtst.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
@ -43,6 +43,7 @@ void addCollTest(TestNode** root)
|
||||
addAllCollTest(root);
|
||||
addMiscCollTest(root);
|
||||
|
||||
addSearchTest(root);
|
||||
}
|
||||
|
||||
|
||||
|
@ -48,5 +48,6 @@ void addRuleBasedCollTest(TestNode**);
|
||||
void addCollIterTest(TestNode**);
|
||||
void addAllCollTest(TestNode**);
|
||||
void addMiscCollTest(TestNode**);
|
||||
void addSearchTest(TestNode**);
|
||||
|
||||
#endif
|
||||
|
@ -300,6 +300,14 @@ SOURCE=.\udatatst.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\usrchdat.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\usrchtst.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\utf16tst.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
435
icu4c/source/test/cintltst/usrchdat.c
Normal file
435
icu4c/source/test/cintltst/usrchdat.c
Normal file
@ -0,0 +1,435 @@
|
||||
/********************************************************************
|
||||
* Copyright (c) 2001,
|
||||
* International Business Machines Corporation and others.
|
||||
* All Rights Reserved.
|
||||
********************************************************************
|
||||
* File USRCHDAT.H
|
||||
* Modification History:
|
||||
* Name date Description
|
||||
* synwee July 31 2001 creation
|
||||
********************************************************************/
|
||||
|
||||
#ifndef USRCHDAT_C
|
||||
#define USRCHDAT_C
|
||||
|
||||
#include "unicode/ucol.h"
|
||||
|
||||
struct SearchData {
|
||||
const char *text;
|
||||
const char *pattern;
|
||||
const char *collator;
|
||||
UCollationStrength strength;
|
||||
const char *breaker;
|
||||
UTextOffset offset[32];
|
||||
uint32_t size[32];
|
||||
};
|
||||
|
||||
typedef struct SearchData SearchData;
|
||||
|
||||
static const char *TESTCOLLATORRULE = "& o,O ; p,P";
|
||||
|
||||
static const char *EXTRACOLLATIONRULE = " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc";
|
||||
|
||||
static const SearchData BASIC[] = {
|
||||
{"silly spring string", "string", NULL, UCOL_TERTIARY, NULL, {13, -1},
|
||||
{6}},
|
||||
{"silly spring string string", "string", NULL, UCOL_TERTIARY, NULL,
|
||||
{13, 20, -1}, {6, 6}},
|
||||
{"silly string spring string", "string", NULL, UCOL_TERTIARY, NULL,
|
||||
{6, 20, -1}, {6, 6}},
|
||||
{"string spring string", "string", NULL, UCOL_TERTIARY, NULL, {0, 14, -1},
|
||||
{6, 6}},
|
||||
{"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
|
||||
{"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}},
|
||||
{"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1},
|
||||
{2}},
|
||||
{"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData BREAKITERATOR[] = {
|
||||
{"foxy fox", "fox", NULL, UCOL_TERTIARY, "characterbreaker", {0, 5, -1},
|
||||
{3, 3}},
|
||||
{"foxy fox", "fox", NULL, UCOL_TERTIARY, "wordbreaker", {5, -1}, {3}},
|
||||
{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY,
|
||||
"characterbreaker", {10, 14, -1}, {3, 2}},
|
||||
{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, "wordbreaker",
|
||||
{10, -1}, {3}},
|
||||
{"Channel, another channel, more channels, and one last Channel",
|
||||
"Channel", "es", UCOL_TERTIARY, "wordbreaker", {0, 54, -1}, {7, 7}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData STRENGTH[] = {
|
||||
/*012345678901234567890123456789012345678901234567890123456789*/
|
||||
{"The quick brown fox jumps over the lazy foxes", "fox", "en",
|
||||
UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}},
|
||||
{"The quick brown fox jumps over the lazy foxes", "fox", "en",
|
||||
UCOL_PRIMARY, "wordbreaker", {16, -1}, {3}},
|
||||
{"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
|
||||
"peche", "fr", UCOL_PRIMARY, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}},
|
||||
{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, NULL,
|
||||
{10, 14, -1}, {3, 2}},
|
||||
{"A channel, another CHANNEL, more Channels, and one last channel...",
|
||||
"channel", "es", UCOL_PRIMARY, NULL, {2, 19, 33, 56, -1},
|
||||
{7, 7, 7, 7}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData VARIABLE[] = {
|
||||
/*012345678901234567890123456789012345678901234567890123456789*/
|
||||
{"blackbirds black blackbirds blackbird black-bird",
|
||||
"blackbird", NULL, UCOL_TERTIARY, NULL, {0, 17, 28, 38, -1},
|
||||
{9, 9, 9, 10}},
|
||||
/* to see that it doesn't go into an infinite loop if the start of text
|
||||
is a ignorable character */
|
||||
{" on", "go", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, NULL,
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
|
||||
/* testing tightest match */
|
||||
{" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY,
|
||||
NULL, {1, -1}, {3}},
|
||||
/*012345678901234567890123456789012345678901234567890123456789 */
|
||||
{" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY,
|
||||
NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}},
|
||||
/* totally ignorable text */
|
||||
{" ---------------", "abc", NULL, UCOL_SECONDARY,
|
||||
NULL, {-1}, {0}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData NORMEXACT[] = {
|
||||
{"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1},
|
||||
{2}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData NONNORMEXACT[] = {
|
||||
{"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {-1},
|
||||
{0}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData OVERLAP[] = {
|
||||
{"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 2, 4, -1},
|
||||
{4, 4, 4}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData NONOVERLAP[] = {
|
||||
{"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 4, -1}, {4, 4}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData COLLATOR[] = {
|
||||
/* english */
|
||||
{"fox fpx", "fox", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}},
|
||||
/* tailored */
|
||||
{"fox fpx", "fox", NULL, UCOL_PRIMARY, NULL, {0, 4, -1}, {3, 3}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData PATTERN[] = {
|
||||
{"The quick brown fox jumps over the lazy foxes", "the", NULL,
|
||||
UCOL_PRIMARY, NULL, {0, 31, -1}, {3, 3}},
|
||||
{"The quick brown fox jumps over the lazy foxes", "fox", NULL,
|
||||
UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData TEXT[] = {
|
||||
{"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {4, 15, -1},
|
||||
{3, 3}},
|
||||
{"the quick brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {16, -1},
|
||||
{3}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData COMPOSITEBOUNDARIES[] = {
|
||||
{"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
|
||||
{"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
|
||||
{"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
|
||||
{"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1},
|
||||
{1, 1}},
|
||||
{"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
/* A + 030A + 0301 */
|
||||
{"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData MATCH[] = {
|
||||
{"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, NULL,
|
||||
{7, 26, -1}, {3, 3}},
|
||||
/* 012345678901234567890123456789012345678901234567890 */
|
||||
{"a busy bee is a very busy beeee with no bee life", "bee", NULL,
|
||||
UCOL_TERTIARY, NULL, {7, 26, 40, -1}, {3, 3, 3}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData SUPPLEMENTARY[] = {
|
||||
/* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
|
||||
{"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00",
|
||||
"\\uD800\\uDC00", NULL, UCOL_TERTIARY, NULL, {4, 13, 22, 26, 29, -1},
|
||||
{2, 2, 2, 2, 2}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const char *CONTRACTIONRULE = "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
|
||||
|
||||
static const SearchData CONTRACTION[] = {
|
||||
/* common discontiguous */
|
||||
{"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1},
|
||||
{2}},
|
||||
/* contraction prefix */
|
||||
{"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||
{"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}},
|
||||
/* discontiguous problem here for backwards iteration.
|
||||
accents not found because discontiguous stores all information */
|
||||
{"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {-1},
|
||||
{0}},
|
||||
/* ends not with a contraction character */
|
||||
{"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1},
|
||||
{0}},
|
||||
{"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL,
|
||||
{0, -1}, {3}},
|
||||
{"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL, {-1},
|
||||
{0}},
|
||||
/* blocked discontiguous */
|
||||
{"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL,
|
||||
{-1}, {0}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const char *IGNORABLERULE = "&a = \\u0300";
|
||||
|
||||
static const SearchData IGNORABLE[] = {
|
||||
{"\\u0315\\u0300 \\u0315\\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, NULL,
|
||||
{0, 3, -1}, {2, 3}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData BASICCANONICAL[] = {
|
||||
{"silly spring string", "string", NULL, UCOL_TERTIARY, NULL, {13, -1},
|
||||
{6}},
|
||||
{"silly spring string string", "string", NULL, UCOL_TERTIARY, NULL,
|
||||
{13, 20, -1}, {6, 6}},
|
||||
{"silly string spring string", "string", NULL, UCOL_TERTIARY, NULL,
|
||||
{6, 20, -1}, {6, 6}},
|
||||
{"string spring string", "string", NULL, UCOL_TERTIARY, NULL, {0, 14, -1},
|
||||
{6, 6}},
|
||||
{"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
|
||||
{"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}},
|
||||
{"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||
{"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
|
||||
{"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1},
|
||||
{2}},
|
||||
{"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
|
||||
{"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, NULL, {1, -1}, {3}},
|
||||
{"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325",
|
||||
"\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {1, 12, -1}, {5, 3}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData NORMCANONICAL[] = {
|
||||
{"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||
{"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||
{"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1},
|
||||
{2}},
|
||||
{"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1},
|
||||
{2}},
|
||||
{"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
|
||||
{"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData BREAKITERATORCANONICAL[] = {
|
||||
{"foxy fox", "fox", NULL, UCOL_TERTIARY, "characterbreaker", {0, 5, -1},
|
||||
{3, 3}},
|
||||
{"foxy fox", "fox", NULL, UCOL_TERTIARY, "wordbreaker", {5, -1}, {3}},
|
||||
{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY,
|
||||
"characterbreaker", {10, 14, -1}, {3, 2}},
|
||||
{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, "wordbreaker",
|
||||
{10, -1}, {3}},
|
||||
{"Channel, another channel, more channels, and one last Channel",
|
||||
"Channel", "es", UCOL_TERTIARY, "wordbreaker", {0, 54, -1}, {7, 7}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData STRENGTHCANONICAL[] = {
|
||||
/*012345678901234567890123456789012345678901234567890123456789 */
|
||||
{"The quick brown fox jumps over the lazy foxes", "fox", "en",
|
||||
UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}},
|
||||
{"The quick brown fox jumps over the lazy foxes", "fox", "en",
|
||||
UCOL_PRIMARY, "wordbreaker", {16, -1}, {3}},
|
||||
{"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
|
||||
"peche", "fr", UCOL_PRIMARY, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}},
|
||||
{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, NULL,
|
||||
{10, 14, -1}, {3, 2}},
|
||||
{"A channel, another CHANNEL, more Channels, and one last channel...",
|
||||
"channel", "es", UCOL_PRIMARY, NULL, {2, 19, 33, 56, -1},
|
||||
{7, 7, 7, 7}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData VARIABLECANONICAL[] = {
|
||||
/*012345678901234567890123456789012345678901234567890123456789 */
|
||||
{"blackbirds black blackbirds blackbird black-bird",
|
||||
"blackbird", NULL, UCOL_TERTIARY, NULL, {0, 17, 28, 38, -1},
|
||||
{9, 9, 9, 10}},
|
||||
/* to see that it doesn't go into an infinite loop if the start of text
|
||||
is a ignorable character */
|
||||
{" on", "go", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, NULL,
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
|
||||
/* testing tightest match */
|
||||
{" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY,
|
||||
NULL, {1, -1}, {3}},
|
||||
/*012345678901234567890123456789012345678901234567890123456789 */
|
||||
{" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY,
|
||||
NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}},
|
||||
/* totally ignorable text */
|
||||
{" ---------------", "abc", NULL, UCOL_SECONDARY,
|
||||
NULL, {-1}, {0}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData OVERLAPCANONICAL[] = {
|
||||
{"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 2, 4, -1},
|
||||
{4, 4, 4}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData NONOVERLAPCANONICAL[] = {
|
||||
{"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 4, -1}, {4, 4}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData COLLATORCANONICAL[] = {
|
||||
/* english */
|
||||
{"fox fpx", "fox", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}},
|
||||
/* tailored */
|
||||
{"fox fpx", "fox", NULL, UCOL_PRIMARY, NULL, {0, 4, -1}, {3, 3}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData PATTERNCANONICAL[] = {
|
||||
{"The quick brown fox jumps over the lazy foxes", "the", NULL,
|
||||
UCOL_PRIMARY, NULL, {0, 31, -1}, {3, 3}},
|
||||
{"The quick brown fox jumps over the lazy foxes", "fox", NULL,
|
||||
UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData TEXTCANONICAL[] = {
|
||||
{"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {4, 15, -1},
|
||||
{3, 3}},
|
||||
{"the quick brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {16, -1},
|
||||
{3}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = {
|
||||
{"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
|
||||
{"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
|
||||
{"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
|
||||
{"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1},
|
||||
{1, 1}},
|
||||
/* \\u0300 blocked by \\u0300 */
|
||||
{"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||
/* A + 030A + 0301 */
|
||||
{"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
/* blocked accent */
|
||||
{"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
|
||||
{"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||
{"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
|
||||
{"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||
{"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||
{"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A",
|
||||
NULL, UCOL_TERTIARY, NULL, {0, 6, 10, 13, -1}, {1, 3, 2, 1}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData MATCHCANONICAL[] = {
|
||||
{"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, NULL,
|
||||
{7, 26, -1}, {3, 3}},
|
||||
/*012345678901234567890123456789012345678901234567890 */
|
||||
{"a busy bee is a very busy beeee with no bee life", "bee", NULL,
|
||||
UCOL_TERTIARY, NULL, {7, 26, 40, -1}, {3, 3, 3}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData SUPPLEMENTARYCANONICAL[] = {
|
||||
/*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
|
||||
{"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00",
|
||||
"\\uD800\\uDC00", NULL, UCOL_TERTIARY, NULL, {4, 13, 22, 26, 29, -1},
|
||||
{2, 2, 2, 2, 2}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
static const SearchData CONTRACTIONCANONICAL[] = {
|
||||
/* common discontiguous */
|
||||
{"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
|
||||
{"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1},
|
||||
{2}},
|
||||
/* contraction prefix */
|
||||
{"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
|
||||
{"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
|
||||
{"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}},
|
||||
/* discontiguous problem here for backwards iteration.
|
||||
forwards gives 0, 4 but backwards give 1, 3 */
|
||||
/* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {0, -1},
|
||||
{4}}, */
|
||||
|
||||
/* ends not with a contraction character */
|
||||
{"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1},
|
||||
{0}},
|
||||
{"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL,
|
||||
{0, -1}, {3}},
|
||||
{"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL,
|
||||
{0, -1}, {4}},
|
||||
/* blocked discontiguous */
|
||||
{"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL,
|
||||
{1, -1}, {4}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
#endif
|
1937
icu4c/source/test/cintltst/usrchtst.c
Normal file
1937
icu4c/source/test/cintltst/usrchtst.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -47,7 +47,7 @@ tsmthred.o tsmutex.o tsnmfmt.o tsputil.o tstnorm.o tzbdtest.o \
|
||||
tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o transtst.o strtest.o thcoll.o \
|
||||
itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o unhxtrts.o hxuntrts.o \
|
||||
jahatrts.o hajatrts.o ufltlgts.o testutil.o transrt.o normconf.o sfwdchit.o \
|
||||
jamotest.o
|
||||
jamotest.o srchtest.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
@ -317,6 +317,10 @@ SOURCE=.\sfwdchit.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\srchtest.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\strtest.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
@ -649,6 +653,10 @@ SOURCE=.\sfwdchit.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\srchtest.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\strtest.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
1943
icu4c/source/test/intltest/srchtest.cpp
Normal file
1943
icu4c/source/test/intltest/srchtest.cpp
Normal file
File diff suppressed because it is too large
Load Diff
83
icu4c/source/test/intltest/srchtest.h
Normal file
83
icu4c/source/test/intltest/srchtest.h
Normal file
@ -0,0 +1,83 @@
|
||||
/****************************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2001, International Business Machines Corporation and others
|
||||
* All Rights Reserved.
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef _STRSRCH_H
|
||||
#define _STRSRCH_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/tblcoll.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "intltest.h"
|
||||
#include "unicode/usearch.h"
|
||||
|
||||
struct SearchData;
|
||||
typedef struct SearchData SearchData;
|
||||
|
||||
class StringSearchTest: public IntlTest
|
||||
{
|
||||
public:
|
||||
StringSearchTest();
|
||||
virtual ~StringSearchTest();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char* &name,
|
||||
char* par = NULL);
|
||||
|
||||
private:
|
||||
RuleBasedCollator *m_en_us_;
|
||||
RuleBasedCollator *m_fr_fr_;
|
||||
RuleBasedCollator *m_de_;
|
||||
RuleBasedCollator *m_es_;
|
||||
BreakIterator *m_en_wordbreaker_;
|
||||
BreakIterator *m_en_characterbreaker_;
|
||||
|
||||
RuleBasedCollator * getCollator(const char *collator);
|
||||
BreakIterator * getBreakIterator(const char *breaker);
|
||||
char * toCharString(const UnicodeString &text);
|
||||
Collator::ECollationStrength getECollationStrength(
|
||||
const UCollationStrength &strength) const;
|
||||
UBool assertEqualWithStringSearch( StringSearch *strsrch,
|
||||
const SearchData *search);
|
||||
UBool assertEqual(const SearchData *search);
|
||||
UBool assertCanonicalEqual(const SearchData *search);
|
||||
UBool assertEqualWithAttribute(const SearchData *search,
|
||||
USearchAttributeValue canonical,
|
||||
USearchAttributeValue overlap);
|
||||
void TestOpenClose();
|
||||
void TestInitialization();
|
||||
void TestBasic();
|
||||
void TestNormExact();
|
||||
void TestStrength();
|
||||
void TestBreakIterator();
|
||||
void TestVariable();
|
||||
void TestOverlap();
|
||||
void TestCollator();
|
||||
void TestPattern();
|
||||
void TestText();
|
||||
void TestCompositeBoundaries();
|
||||
void TestGetSetOffset();
|
||||
void TestGetSetAttribute();
|
||||
void TestGetMatch();
|
||||
void TestSetMatch();
|
||||
void TestReset();
|
||||
void TestSupplementary();
|
||||
void TestContraction();
|
||||
void TestIgnorable();
|
||||
void TestCanonical();
|
||||
void TestNormCanonical();
|
||||
void TestStrengthCanonical();
|
||||
void TestBreakIteratorCanonical();
|
||||
void TestVariableCanonical();
|
||||
void TestOverlapCanonical();
|
||||
void TestCollatorCanonical();
|
||||
void TestPatternCanonical();
|
||||
void TestTextCanonical();
|
||||
void TestCompositeBoundariesCanonical();
|
||||
void TestGetSetOffsetCanonical();
|
||||
void TestSupplementaryCanonical();
|
||||
void TestContractionCanonical();
|
||||
};
|
||||
|
||||
#endif
|
@ -37,6 +37,7 @@
|
||||
#include "tstnorm.h"
|
||||
#include "normconf.h"
|
||||
#include "thcoll.h"
|
||||
#include "srchtest.h"
|
||||
|
||||
#include "lcukocol.h"
|
||||
|
||||
@ -273,7 +274,7 @@ void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &n
|
||||
}
|
||||
break;
|
||||
|
||||
case 18: //all
|
||||
case 18: //all
|
||||
name = "LotusCollationTest";
|
||||
|
||||
name = "LotusCollationKoreanTest";
|
||||
@ -284,6 +285,16 @@ void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &n
|
||||
}
|
||||
break;
|
||||
|
||||
case 19:
|
||||
name = "StringSearchTest";
|
||||
if (exec) {
|
||||
logln("StringSearchTest---");
|
||||
|
||||
StringSearchTest test;
|
||||
callTest( test, par );
|
||||
}
|
||||
break;
|
||||
|
||||
default: name = ""; break;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user