diff --git a/icu4c/source/test/cintltst/Makefile.in b/icu4c/source/test/cintltst/Makefile.in index b210b41021..b9261bed34 100644 --- a/icu4c/source/test/cintltst/Makefile.in +++ b/icu4c/source/test/cintltst/Makefile.in @@ -58,7 +58,7 @@ cnmdptst.o cnormtst.o cnumtst.o cregrtst.o crestst.o creststn.o cturtst.o \ cucdtst.o cutiltst.o encoll.o nucnvtst.o susctest.o nccbtst.o \ cbiditst.o cbididat.o eurocreg.o udatatst.o utf16tst.o utransts.o \ ncnvfbts.o ncnvtst.o putiltst.o cstrtest.o mstrmtst.o utf8tst.o ucmptst.o \ -stdnmtst.o ucmpetst.o ctstdep.o +stdnmtst.o ucmpetst.o ctstdep.o usrchtst.o DEPS = $(OBJECTS:.o=.d) diff --git a/icu4c/source/test/cintltst/ccolltst.c b/icu4c/source/test/cintltst/ccolltst.c index c01e2bb6f3..a1821c1a18 100644 --- a/icu4c/source/test/cintltst/ccolltst.c +++ b/icu4c/source/test/cintltst/ccolltst.c @@ -43,6 +43,7 @@ void addCollTest(TestNode** root) addAllCollTest(root); addMiscCollTest(root); + addSearchTest(root); } diff --git a/icu4c/source/test/cintltst/ccolltst.h b/icu4c/source/test/cintltst/ccolltst.h index 6868df8447..75f9d239d2 100644 --- a/icu4c/source/test/cintltst/ccolltst.h +++ b/icu4c/source/test/cintltst/ccolltst.h @@ -48,5 +48,6 @@ void addRuleBasedCollTest(TestNode**); void addCollIterTest(TestNode**); void addAllCollTest(TestNode**); void addMiscCollTest(TestNode**); +void addSearchTest(TestNode**); #endif diff --git a/icu4c/source/test/cintltst/cintltst.dsp b/icu4c/source/test/cintltst/cintltst.dsp index 728c213ee2..af74222142 100644 --- a/icu4c/source/test/cintltst/cintltst.dsp +++ b/icu4c/source/test/cintltst/cintltst.dsp @@ -300,6 +300,14 @@ SOURCE=.\udatatst.c # End Source File # Begin Source File +SOURCE=.\usrchdat.c +# End Source File +# Begin Source File + +SOURCE=.\usrchtst.c +# End Source File +# Begin Source File + SOURCE=.\utf16tst.c # End Source File # Begin Source File diff --git a/icu4c/source/test/cintltst/usrchdat.c b/icu4c/source/test/cintltst/usrchdat.c new file mode 100644 index 0000000000..fb7771b899 --- /dev/null +++ b/icu4c/source/test/cintltst/usrchdat.c @@ -0,0 +1,435 @@ +/******************************************************************** + * Copyright (c) 2001, + * International Business Machines Corporation and others. + * All Rights Reserved. + ******************************************************************** + * File USRCHDAT.H + * Modification History: + * Name date Description + * synwee July 31 2001 creation + ********************************************************************/ + +#ifndef USRCHDAT_C +#define USRCHDAT_C + +#include "unicode/ucol.h" + +struct SearchData { + const char *text; + const char *pattern; + const char *collator; + UCollationStrength strength; + const char *breaker; + UTextOffset offset[32]; + uint32_t size[32]; +}; + +typedef struct SearchData SearchData; + +static const char *TESTCOLLATORRULE = "& o,O ; p,P"; + +static const char *EXTRACOLLATIONRULE = " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc"; + +static const SearchData BASIC[] = { + {"silly spring string", "string", NULL, UCOL_TERTIARY, NULL, {13, -1}, + {6}}, + {"silly spring string string", "string", NULL, UCOL_TERTIARY, NULL, + {13, 20, -1}, {6, 6}}, + {"silly string spring string", "string", NULL, UCOL_TERTIARY, NULL, + {6, 20, -1}, {6, 6}}, + {"string spring string", "string", NULL, UCOL_TERTIARY, NULL, {0, 14, -1}, + {6, 6}}, + {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, + {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}}, + {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, + {2}}, + {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData BREAKITERATOR[] = { + {"foxy fox", "fox", NULL, UCOL_TERTIARY, "characterbreaker", {0, 5, -1}, + {3, 3}}, + {"foxy fox", "fox", NULL, UCOL_TERTIARY, "wordbreaker", {5, -1}, {3}}, + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, + "characterbreaker", {10, 14, -1}, {3, 2}}, + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, "wordbreaker", + {10, -1}, {3}}, + {"Channel, another channel, more channels, and one last Channel", + "Channel", "es", UCOL_TERTIARY, "wordbreaker", {0, 54, -1}, {7, 7}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData STRENGTH[] = { + /*012345678901234567890123456789012345678901234567890123456789*/ + {"The quick brown fox jumps over the lazy foxes", "fox", "en", + UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, + {"The quick brown fox jumps over the lazy foxes", "fox", "en", + UCOL_PRIMARY, "wordbreaker", {16, -1}, {3}}, + {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", + "peche", "fr", UCOL_PRIMARY, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, NULL, + {10, 14, -1}, {3, 2}}, + {"A channel, another CHANNEL, more Channels, and one last channel...", + "channel", "es", UCOL_PRIMARY, NULL, {2, 19, 33, 56, -1}, + {7, 7, 7, 7}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData VARIABLE[] = { + /*012345678901234567890123456789012345678901234567890123456789*/ + {"blackbirds black blackbirds blackbird black-bird", + "blackbird", NULL, UCOL_TERTIARY, NULL, {0, 17, 28, 38, -1}, + {9, 9, 9, 10}}, + /* to see that it doesn't go into an infinite loop if the start of text + is a ignorable character */ + {" on", "go", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, NULL, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + /* testing tightest match */ + {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, + NULL, {1, -1}, {3}}, + /*012345678901234567890123456789012345678901234567890123456789 */ + {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, + NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, + /* totally ignorable text */ + {" ---------------", "abc", NULL, UCOL_SECONDARY, + NULL, {-1}, {0}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData NORMEXACT[] = { + {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, + {2}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData NONNORMEXACT[] = { + {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, + {0}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData OVERLAP[] = { + {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 2, 4, -1}, + {4, 4, 4}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData NONOVERLAP[] = { + {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 4, -1}, {4, 4}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData COLLATOR[] = { + /* english */ + {"fox fpx", "fox", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}}, + /* tailored */ + {"fox fpx", "fox", NULL, UCOL_PRIMARY, NULL, {0, 4, -1}, {3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData PATTERN[] = { + {"The quick brown fox jumps over the lazy foxes", "the", NULL, + UCOL_PRIMARY, NULL, {0, 31, -1}, {3, 3}}, + {"The quick brown fox jumps over the lazy foxes", "fox", NULL, + UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData TEXT[] = { + {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {4, 15, -1}, + {3, 3}}, + {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {16, -1}, + {3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData COMPOSITEBOUNDARIES[] = { + {"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, + {"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, + {"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, + {"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, + {1, 1}}, + {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + /* A + 030A + 0301 */ + {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData MATCH[] = { + {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, NULL, + {7, 26, -1}, {3, 3}}, + /* 012345678901234567890123456789012345678901234567890 */ + {"a busy bee is a very busy beeee with no bee life", "bee", NULL, + UCOL_TERTIARY, NULL, {7, 26, 40, -1}, {3, 3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData SUPPLEMENTARY[] = { + /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ + {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", + "\\uD800\\uDC00", NULL, UCOL_TERTIARY, NULL, {4, 13, 22, 26, 29, -1}, + {2, 2, 2, 2, 2}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const char *CONTRACTIONRULE = "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315"; + +static const SearchData CONTRACTION[] = { + /* common discontiguous */ + {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1}, + {2}}, + /* contraction prefix */ + {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}}, + /* discontiguous problem here for backwards iteration. + accents not found because discontiguous stores all information */ + {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {-1}, + {0}}, + /* ends not with a contraction character */ + {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, + {0}}, + {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, + {0, -1}, {3}}, + {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, + {0}}, + /* blocked discontiguous */ + {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL, + {-1}, {0}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const char *IGNORABLERULE = "&a = \\u0300"; + +static const SearchData IGNORABLE[] = { + {"\\u0315\\u0300 \\u0315\\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, NULL, + {0, 3, -1}, {2, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData BASICCANONICAL[] = { + {"silly spring string", "string", NULL, UCOL_TERTIARY, NULL, {13, -1}, + {6}}, + {"silly spring string string", "string", NULL, UCOL_TERTIARY, NULL, + {13, 20, -1}, {6, 6}}, + {"silly string spring string", "string", NULL, UCOL_TERTIARY, NULL, + {6, 20, -1}, {6, 6}}, + {"string spring string", "string", NULL, UCOL_TERTIARY, NULL, {0, 14, -1}, + {6, 6}}, + {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, + {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}}, + {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, + {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, + {2}}, + {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, + {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, NULL, {1, -1}, {3}}, + {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", + "\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {1, 12, -1}, {5, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData NORMCANONICAL[] = { + {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, + {2}}, + {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, + {2}}, + {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, + {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData BREAKITERATORCANONICAL[] = { + {"foxy fox", "fox", NULL, UCOL_TERTIARY, "characterbreaker", {0, 5, -1}, + {3, 3}}, + {"foxy fox", "fox", NULL, UCOL_TERTIARY, "wordbreaker", {5, -1}, {3}}, + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, + "characterbreaker", {10, 14, -1}, {3, 2}}, + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, "wordbreaker", + {10, -1}, {3}}, + {"Channel, another channel, more channels, and one last Channel", + "Channel", "es", UCOL_TERTIARY, "wordbreaker", {0, 54, -1}, {7, 7}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData STRENGTHCANONICAL[] = { + /*012345678901234567890123456789012345678901234567890123456789 */ + {"The quick brown fox jumps over the lazy foxes", "fox", "en", + UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, + {"The quick brown fox jumps over the lazy foxes", "fox", "en", + UCOL_PRIMARY, "wordbreaker", {16, -1}, {3}}, + {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", + "peche", "fr", UCOL_PRIMARY, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, + {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, NULL, + {10, 14, -1}, {3, 2}}, + {"A channel, another CHANNEL, more Channels, and one last channel...", + "channel", "es", UCOL_PRIMARY, NULL, {2, 19, 33, 56, -1}, + {7, 7, 7, 7}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData VARIABLECANONICAL[] = { + /*012345678901234567890123456789012345678901234567890123456789 */ + {"blackbirds black blackbirds blackbird black-bird", + "blackbird", NULL, UCOL_TERTIARY, NULL, {0, 17, 28, 38, -1}, + {9, 9, 9, 10}}, + /* to see that it doesn't go into an infinite loop if the start of text + is a ignorable character */ + {" on", "go", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, NULL, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + /* testing tightest match */ + {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, + NULL, {1, -1}, {3}}, + /*012345678901234567890123456789012345678901234567890123456789 */ + {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, + NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, + /* totally ignorable text */ + {" ---------------", "abc", NULL, UCOL_SECONDARY, + NULL, {-1}, {0}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData OVERLAPCANONICAL[] = { + {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 2, 4, -1}, + {4, 4, 4}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData NONOVERLAPCANONICAL[] = { + {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 4, -1}, {4, 4}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData COLLATORCANONICAL[] = { + /* english */ + {"fox fpx", "fox", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}}, + /* tailored */ + {"fox fpx", "fox", NULL, UCOL_PRIMARY, NULL, {0, 4, -1}, {3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData PATTERNCANONICAL[] = { + {"The quick brown fox jumps over the lazy foxes", "the", NULL, + UCOL_PRIMARY, NULL, {0, 31, -1}, {3, 3}}, + {"The quick brown fox jumps over the lazy foxes", "fox", NULL, + UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData TEXTCANONICAL[] = { + {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {4, 15, -1}, + {3, 3}}, + {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {16, -1}, + {3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = { + {"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, + {"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, + {"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, + {"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, + {1, 1}}, + /* \\u0300 blocked by \\u0300 */ + {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + /* A + 030A + 0301 */ + {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + /* blocked accent */ + {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, + {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, + {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", + NULL, UCOL_TERTIARY, NULL, {0, 6, 10, 13, -1}, {1, 3, 2, 1}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData MATCHCANONICAL[] = { + {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, NULL, + {7, 26, -1}, {3, 3}}, + /*012345678901234567890123456789012345678901234567890 */ + {"a busy bee is a very busy beeee with no bee life", "bee", NULL, + UCOL_TERTIARY, NULL, {7, 26, 40, -1}, {3, 3, 3}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData SUPPLEMENTARYCANONICAL[] = { + /*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ + {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", + "\\uD800\\uDC00", NULL, UCOL_TERTIARY, NULL, {4, 13, 22, 26, 29, -1}, + {2, 2, 2, 2, 2}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +static const SearchData CONTRACTIONCANONICAL[] = { + /* common discontiguous */ + {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, + {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1}, + {2}}, + /* contraction prefix */ + {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, + {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, + {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}}, + /* discontiguous problem here for backwards iteration. + forwards gives 0, 4 but backwards give 1, 3 */ + /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {0, -1}, + {4}}, */ + + /* ends not with a contraction character */ + {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, + {0}}, + {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, + {0, -1}, {3}}, + {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL, + {0, -1}, {4}}, + /* blocked discontiguous */ + {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL, + {1, -1}, {4}}, + {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} +}; + +#endif diff --git a/icu4c/source/test/cintltst/usrchtst.c b/icu4c/source/test/cintltst/usrchtst.c new file mode 100644 index 0000000000..c9da128ae7 --- /dev/null +++ b/icu4c/source/test/cintltst/usrchtst.c @@ -0,0 +1,1937 @@ +/******************************************************************** + * Copyright (c) 2001, + * International Business Machines Corporation and others. + * All Rights Reserved. + ******************************************************************** + * File usrchtst.c + * Modification History: + * Name Date Description + * synwee July 19 2001 creation + ********************************************************************/ + +#include "unicode/usearch.h" +#include "unicode/ustring.h" +#include "ccolltst.h" +#include "cmemory.h" +#include +#include "usrchdat.c" + +static UBool TOCLOSE_ = TRUE; +static UCollator *EN_US_; +static UCollator *FR_FR_; +static UCollator *DE_; +static UCollator *ES_; +static UBreakIterator *EN_WORDBREAKER_; +static UBreakIterator *EN_CHARACTERBREAKER_; + +/** +* Opening all static collators and break iterators +*/ +static void open() +{ + if (TOCLOSE_) { + UErrorCode status = U_ZERO_ERROR; + UChar rules[1024]; + int rulelength = 0; + + EN_US_ = ucol_open("en_US", &status); + FR_FR_ = ucol_open("fr_FR", &status); + DE_ = ucol_open("de_DE", &status); + ES_ = ucol_open("es_ES", &status); + + u_strcpy(rules, ucol_getRules(DE_, &rulelength)); + u_unescape(EXTRACOLLATIONRULE, rules + rulelength, 1024 - rulelength); + + ucol_close(DE_); + + DE_ = ucol_openRules(rules, u_strlen(rules), UNORM_NFD, UCOL_TERTIARY, + (UParseError *)NULL, &status); + u_strcpy(rules, ucol_getRules(ES_, &rulelength)); + u_unescape(EXTRACOLLATIONRULE, rules + rulelength, 1024 - rulelength); + + ucol_close(ES_); + ES_ = ucol_openRules(rules, u_strlen(rules), UNORM_NFD, UCOL_TERTIARY, + NULL, &status); + EN_WORDBREAKER_ = ubrk_open(UBRK_WORD, "en_US", NULL, 0, &status); + EN_CHARACTERBREAKER_ = ubrk_open(UBRK_CHARACTER, "en_US", NULL, 0, + &status); + TOCLOSE_ = TRUE; + } +} + +/** +* Start opening all static collators and break iterators +*/ +static void TestStart() +{ + open(); + TOCLOSE_ = FALSE; +} + +/** +* Closing all static collators and break iterators +*/ +static void close() +{ + if (TOCLOSE_) { + ucol_close(EN_US_); + ucol_close(FR_FR_); + ucol_close(DE_); + ucol_close(ES_); + ubrk_close(EN_WORDBREAKER_); + ubrk_close(EN_CHARACTERBREAKER_); + } + TOCLOSE_ = FALSE; +} + +/** +* End closing all static collators and break iterators +*/ +static void TestEnd() +{ + TOCLOSE_ = TRUE; + close(); +} + +/** +* output UChar strings for printing. +*/ +static char *toCharString(const UChar* unichars) +{ + static char result[1024]; + char *temp = result; + int count = 0; + int length = u_strlen(unichars); + + for (; count < length; count ++) { + UChar ch = unichars[count]; + if (ch >= 0x20 && ch <= 0x7e) { + *temp ++ = (char)ch; + } + else { + char digit[5]; + int zerosize; + *temp = 0; + strcat(temp, "\\u"); + temp = temp + 2; + sprintf(digit, "%x", ch); + zerosize = 4 - strlen(digit); + while (zerosize != 0) { + *temp ++ = '0'; + zerosize --; + } + *temp = 0; + strcat(temp, digit); + temp = temp + strlen(digit); + } + } + *temp = 0; + + return result; +} + +/** +* Getting the collator +*/ +static UCollator *getCollator(const char *collator) +{ + if (collator == NULL) { + return EN_US_; + } + if (strcmp(collator, "fr") == 0) { + return FR_FR_; + } + else if (strcmp(collator, "de") == 0) { + return DE_; + } + else if (strcmp(collator, "es") == 0) { + return ES_; + } + else { + return EN_US_; + } +} + +/** +* Getting the breakiterator +*/ +static UBreakIterator *getBreakIterator(const char *breaker) +{ + if (breaker == NULL) { + return NULL; + } + if (strcmp(breaker, "wordbreaker") == 0) { + return EN_WORDBREAKER_; + } + else { + return EN_CHARACTERBREAKER_; + } +} + +static void TestOpenClose() +{ + UErrorCode status = U_ZERO_ERROR; + UStringSearch *result; + const UChar pattern[32]; + const UChar text[128] ; + UBreakIterator *breakiter = ubrk_open(UBRK_WORD, "en_US", + text, 6, &status); + /* testing null arguments */ + result = usearch_open(NULL, 0, NULL, 0, NULL, NULL, &status); + if (U_SUCCESS(status) || result != NULL) { + log_err("Error: NULL arguments should produce an error and a NULL result\n"); + } + status = U_ZERO_ERROR; + result = usearch_openFromCollator(NULL, 0, NULL, 0, NULL, NULL, &status); + if (U_SUCCESS(status) || result != NULL) { + log_err("Error: NULL arguments should produce an error and a NULL result\n"); + } + + status = U_ZERO_ERROR; + result = usearch_open(pattern, 3, NULL, 0, NULL, NULL, &status); + if (U_SUCCESS(status) || result != NULL) { + log_err("Error: NULL arguments should produce an error and a NULL result\n"); + } + status = U_ZERO_ERROR; + result = usearch_openFromCollator(pattern, 3, NULL, 0, NULL, NULL, + &status); + if (U_SUCCESS(status) || result != NULL) { + log_err("Error: NULL arguments should produce an error and a NULL result\n"); + } + + status = U_ZERO_ERROR; + result = usearch_open(pattern, 3, text, 6, NULL, NULL, &status); + if (U_SUCCESS(status) || result != NULL) { + log_err("Error: NULL arguments should produce an error and a NULL result\n"); + } + status = U_ZERO_ERROR; + result = usearch_openFromCollator(pattern, 3, text, 6, NULL, NULL, + &status); + if (U_SUCCESS(status) || result != NULL) { + log_err("Error: NULL arguments should produce an error and a NULL result\n"); + } + + status = U_ZERO_ERROR; + result = usearch_open(pattern, 3, text, 6, "en_US", NULL, &status); + if (U_FAILURE(status) || result == NULL) { + log_err("Error: NULL break iterator is valid for opening search\n"); + } + else { + usearch_close(result); + } + open(); + status = U_ZERO_ERROR; + result = usearch_openFromCollator(pattern, 3, text, 6, EN_US_, NULL, + &status); + if (U_FAILURE(status) || result == NULL) { + log_err("Error: NULL break iterator is valid for opening search\n"); + } + else { + usearch_close(result); + } + + status = U_ZERO_ERROR; + result = usearch_open(pattern, 3, text, 6, "en_US", breakiter, &status); + if (U_FAILURE(status) || result == NULL) { + log_err("Error: Break iterator is valid for opening search\n"); + } + else { + usearch_close(result); + } + status = U_ZERO_ERROR; + result = usearch_openFromCollator(pattern, 3, text, 6, EN_US_, breakiter, + &status); + if (U_FAILURE(status) || result == NULL) { + log_err("Error: Break iterator is valid for opening search\n"); + } + else { + usearch_close(result); + } + ubrk_close(breakiter); + close(); +} + +static void TestInitialization() +{ + UErrorCode status = U_ZERO_ERROR; + UChar pattern[512]; + const UChar text[128]; + UStringSearch *result; + + /* simple test on the pattern ce construction */ + pattern[0] = 0x41; + pattern[1] = 0x42; + open(); + result = usearch_openFromCollator(pattern, 2, text, 3, EN_US_, NULL, + &status); + if (U_FAILURE(status)) { + log_err("Error opening search %s\n", u_errorName(status)); + } + usearch_close(result); + + /* testing if an extremely large pattern will fail the initialization */ + uprv_memset(pattern, 0x41, 512); + result = usearch_openFromCollator(pattern, 512, text, 3, EN_US_, NULL, + &status); + if (U_FAILURE(status)) { + log_err("Error opening search %s\n", u_errorName(status)); + } + usearch_close(result); + close(); +} + +static UBool assertEqualWithUStringSearch( UStringSearch *strsrch, + const SearchData search) +{ + int count = 0; + UErrorCode status = U_ZERO_ERROR; + UTextOffset matchindex = search.offset[count]; + int32_t textlength; + UChar matchtext[128]; + + if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || + usearch_getMatchedLength(strsrch) != 0) { + log_err("Error with the initialization of match start and length\n"); + } + /* start of following matches */ + while (U_SUCCESS(status) && matchindex >= 0) { + uint32_t matchlength = search.size[count]; + usearch_next(strsrch, &status); + if (matchindex != usearch_getMatchedStart(strsrch) || + matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error following match found at %d %d\n", + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return FALSE; + } + count ++; + + if (usearch_getMatchedText(strsrch, matchtext, 128, &status) != + (int32_t) matchlength || U_FAILURE(status) || + memcmp(matchtext, + usearch_getText(strsrch, &textlength) + matchindex, + matchlength * sizeof(UChar)) != 0) { + log_err("Error getting following matched text\n"); + } + + matchindex = search.offset[count]; + } + usearch_next(strsrch, &status); + if ((uint32_t)usearch_getMatchedStart(strsrch) != USEARCH_DONE || + usearch_getMatchedLength(strsrch) != 0) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error following match found at %d %d\n", + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return FALSE; + } + /* start of preceding matches */ + count = count == 0 ? 0 : count - 1; + matchindex = search.offset[count]; + while (U_SUCCESS(status) && matchindex >= 0) { + uint32_t matchlength = search.size[count]; + usearch_previous(strsrch, &status); + if (matchindex != usearch_getMatchedStart(strsrch) || + matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error preceding match found at %d %d\n", + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return FALSE; + } + + if (usearch_getMatchedText(strsrch, matchtext, 128, &status) != + (int32_t) matchlength || U_FAILURE(status) || + memcmp(matchtext, + usearch_getText(strsrch, &textlength) + matchindex, + matchlength * sizeof(UChar)) != 0) { + log_err("Error getting preceding matched text\n"); + } + + matchindex = count > 0 ? search.offset[count - 1] : -1; + count --; + } + usearch_previous(strsrch, &status); + if ((uint32_t)usearch_getMatchedStart(strsrch) != USEARCH_DONE || + usearch_getMatchedLength(strsrch) != 0) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error preceding match found at %d %d\n", + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return FALSE; + } + return TRUE; +} + +static UBool assertEqual(const SearchData search) +{ + UErrorCode status = U_ZERO_ERROR; + UChar pattern[32]; + UChar text[128]; + UCollator *collator = getCollator(search.collator); + UBreakIterator *breaker = getBreakIterator(search.breaker); + UStringSearch *strsrch; + + u_unescape(search.text, text, 128); + u_unescape(search.pattern, pattern, 32); + ucol_setStrength(collator, search.strength); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator, + breaker, &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + return FALSE; + } + + if (!assertEqualWithUStringSearch(strsrch, search)) { + ucol_setStrength(collator, UCOL_TERTIARY); + usearch_close(strsrch); + return FALSE; + } + ucol_setStrength(collator, UCOL_TERTIARY); + usearch_close(strsrch); + return TRUE; +} + +static UBool assertCanonicalEqual(const SearchData search) +{ + UErrorCode status = U_ZERO_ERROR; + UChar pattern[32]; + UChar text[128]; + UCollator *collator = getCollator(search.collator); + UBreakIterator *breaker = getBreakIterator(search.breaker); + UStringSearch *strsrch; + + u_unescape(search.text, text, 128); + u_unescape(search.pattern, pattern, 32); + ucol_setStrength(collator, search.strength); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator, + breaker, &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + return FALSE; + } + + if (!assertEqualWithUStringSearch(strsrch, search)) { + ucol_setStrength(collator, UCOL_TERTIARY); + usearch_close(strsrch); + return FALSE; + } + ucol_setStrength(collator, UCOL_TERTIARY); + usearch_close(strsrch); + return TRUE; +} + +static UBool assertEqualWithAttribute(const SearchData search, + USearchAttributeValue canonical, + USearchAttributeValue overlap) +{ + UErrorCode status = U_ZERO_ERROR; + UChar pattern[32]; + UChar text[128]; + UCollator *collator = getCollator(search.collator); + UBreakIterator *breaker = getBreakIterator(search.breaker); + UStringSearch *strsrch; + + u_unescape(search.text, text, 128); + u_unescape(search.pattern, pattern, 32); + ucol_setStrength(collator, search.strength); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator, + breaker, &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, canonical, + &status); + usearch_setAttribute(strsrch, USEARCH_OVERLAP, overlap, &status); + + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + return FALSE; + } + + if (!assertEqualWithUStringSearch(strsrch, search)) { + ucol_setStrength(collator, UCOL_TERTIARY); + usearch_close(strsrch); + return FALSE; + } + ucol_setStrength(collator, UCOL_TERTIARY); + usearch_close(strsrch); + return TRUE; +} + +static void TestBasic() +{ + int count = 0; + open(); + while (BASIC[count].text != NULL) { + if (!assertEqual(BASIC[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + close(); +} + +static void TestNormExact() +{ + int count = 0; + UErrorCode status = U_ZERO_ERROR; + open(); + ucol_setAttribute(EN_US_, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); + if (U_FAILURE(status)) { + log_err("Error setting collation normalization %s\n", + u_errorName(status)); + } + while (BASIC[count].text != NULL) { + if (!assertEqual(BASIC[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + count = 0; + while (NORMEXACT[count].text != NULL) { + if (!assertEqual(NORMEXACT[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + ucol_setAttribute(EN_US_, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); + count = 0; + while (NONNORMEXACT[count].text != NULL) { + if (!assertEqual(NONNORMEXACT[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + close(); +} + +static void TestStrength() +{ + int count = 0; + open(); + while (STRENGTH[count].text != NULL) { + if (!assertEqual(STRENGTH[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + close(); +} + +static void TestBreakIterator() { + UErrorCode status = U_ZERO_ERROR; + UStringSearch *strsrch; + UChar text[128]; + UChar pattern[32]; + int count = 0; + + open(); + u_unescape(BREAKITERATOR[0].text, text, 128); + u_unescape(BREAKITERATOR[0].pattern, pattern, 32); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, NULL, + &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + goto ENDTESTBREAKITERATOR; + } + + usearch_setBreakIterator(strsrch, NULL, &status); + if (U_FAILURE(status) || usearch_getBreakIterator(strsrch) != NULL) { + log_err("Error usearch_getBreakIterator returned wrong object"); + goto ENDTESTBREAKITERATOR; + } + + usearch_setBreakIterator(strsrch, EN_CHARACTERBREAKER_, &status); + if (U_FAILURE(status) || + usearch_getBreakIterator(strsrch) != EN_CHARACTERBREAKER_) { + log_err("Error usearch_getBreakIterator returned wrong object"); + goto ENDTESTBREAKITERATOR; + } + + usearch_setBreakIterator(strsrch, EN_WORDBREAKER_, &status); + if (U_FAILURE(status) || + usearch_getBreakIterator(strsrch) != EN_WORDBREAKER_) { + log_err("Error usearch_getBreakIterator returned wrong object"); + goto ENDTESTBREAKITERATOR; + } + + usearch_close(strsrch); + + count = 0; + while (count < 4) { + const SearchData *search = &(BREAKITERATOR[count]); + UCollator *collator = getCollator(search->collator); + UBreakIterator *breaker = getBreakIterator(search->breaker); + + u_unescape(search->text, text, 128); + u_unescape(search->pattern, pattern, 32); + ucol_setStrength(collator, search->strength); + + strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator, + breaker, &status); + if (U_FAILURE(status) || + usearch_getBreakIterator(strsrch) != breaker) { + log_err("Error setting break iterator\n"); + if (strsrch != NULL) { + usearch_close(strsrch); + } + } + if (!assertEqualWithUStringSearch(strsrch, *search)) { + ucol_setStrength(collator, UCOL_TERTIARY); + usearch_close(strsrch); + goto ENDTESTBREAKITERATOR; + } + search = &(BREAKITERATOR[count + 1]); + breaker = getBreakIterator(search->breaker); + usearch_setBreakIterator(strsrch, breaker, &status); + if (U_FAILURE(status) || + usearch_getBreakIterator(strsrch) != breaker) { + log_err("Error setting break iterator\n"); + usearch_close(strsrch); + goto ENDTESTBREAKITERATOR; + } + usearch_reset(strsrch); + if (!assertEqualWithUStringSearch(strsrch, *search)) { + log_err("Error at test number %d\n", count); + goto ENDTESTBREAKITERATOR; + } + usearch_close(strsrch); + count += 2; + } + count = 0; + while (BREAKITERATOR[count].text != NULL) { + if (!assertEqual(BREAKITERATOR[count])) { + log_err("Error at test number %d\n", count); + goto ENDTESTBREAKITERATOR; + } + count ++; + } + +ENDTESTBREAKITERATOR: + close(); +} + +static void TestVariable() +{ + int count = 0; + UErrorCode status = U_ZERO_ERROR; + open(); + ucol_setAttribute(EN_US_, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); + if (U_FAILURE(status)) { + log_err("Error setting collation alternate attribute %s\n", + u_errorName(status)); + } + while (VARIABLE[count].text != NULL) { + log_verbose("variable %d\n", count); + if (!assertEqual(VARIABLE[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + ucol_setAttribute(EN_US_, UCOL_ALTERNATE_HANDLING, + UCOL_NON_IGNORABLE, &status); + close(); +} + +static void TestOverlap() +{ + int count = 0; + open(); + while (OVERLAP[count].text != NULL) { + if (!assertEqualWithAttribute(OVERLAP[count], USEARCH_OFF, + USEARCH_ON)) { + log_err("Error at overlap test number %d\n", count); + } + count ++; + } + count = 0; + while (NONOVERLAP[count].text != NULL) { + if (!assertEqual(NONOVERLAP[count])) { + log_err("Error at non overlap test number %d\n", count); + } + count ++; + } + + count = 0; + while (count < 1) { + UChar pattern[32]; + UChar text[128]; + const SearchData *search = &(OVERLAP[count]); + UCollator *collator = getCollator(search->collator); + UStringSearch *strsrch; + UErrorCode status = U_ZERO_ERROR; + + u_unescape(search->text, text, 128); + u_unescape(search->pattern, pattern, 32); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator, + NULL, &status); + usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_ON, &status); + if (U_FAILURE(status) || + usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_ON) { + log_err("Error setting overlap option\n"); + } + if (!assertEqualWithUStringSearch(strsrch, *search)) { + usearch_close(strsrch); + return; + } + search = &(NONOVERLAP[count]); + usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_OFF, &status); + if (U_FAILURE(status) || + usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_OFF) { + log_err("Error setting overlap option\n"); + } + usearch_reset(strsrch); + if (!assertEqualWithUStringSearch(strsrch, *search)) { + usearch_close(strsrch); + log_err("Error at test number %d\n", count); + } + + count ++; + usearch_close(strsrch); + } + close(); +} + +static void TestCollator() +{ + /* test collator that thinks "o" and "p" are the same thing */ + UChar rules[32]; + UCollator *tailored = NULL; + UErrorCode status = U_ZERO_ERROR; + UChar pattern[32]; + UChar text[128]; + UStringSearch *strsrch; + + open(); + u_unescape(COLLATOR[0].text, text, 128); + u_unescape(COLLATOR[0].pattern, pattern, 32); + + strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, + NULL, &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + } + if (!assertEqualWithUStringSearch(strsrch, COLLATOR[0])) { + goto ENDTESTCOLLATOR; + } + + u_unescape(TESTCOLLATORRULE, rules, 32); + tailored = ucol_openRules(rules, -1, UNORM_NFD, COLLATOR[1].strength, + NULL, &status); + if (U_FAILURE(status)) { + log_err("Error opening rule based collator %s\n", u_errorName(status)); + } + + usearch_setCollator(strsrch, tailored, &status); + if (U_FAILURE(status) || usearch_getCollator(strsrch) != tailored) { + log_err("Error setting rule based collator\n"); + } + usearch_reset(strsrch); + if (!assertEqualWithUStringSearch(strsrch, COLLATOR[1])) { + goto ENDTESTCOLLATOR; + } + + usearch_setCollator(strsrch, EN_US_, &status); + usearch_reset(strsrch); + if (U_FAILURE(status) || usearch_getCollator(strsrch) != EN_US_) { + log_err("Error setting rule based collator\n"); + } + if (!assertEqualWithUStringSearch(strsrch, COLLATOR[0])) { + goto ENDTESTCOLLATOR; + } + +ENDTESTCOLLATOR: + usearch_close(strsrch); + if (tailored != NULL) { + ucol_close(tailored); + } + close(); +} + +static void TestPattern() +{ + UStringSearch *strsrch; + UChar pattern[32]; + UChar bigpattern[512]; + UChar text[128]; + const UChar *temp; + int32_t templength; + UErrorCode status = U_ZERO_ERROR; + + open(); + u_unescape(PATTERN[0].text, text, 128); + u_unescape(PATTERN[0].pattern, pattern, 32); + + ucol_setStrength(EN_US_, PATTERN[0].strength); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, + NULL, &status); + + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + goto ENDTESTPATTERN; + } + temp = usearch_getPattern(strsrch, &templength); + if (u_strcmp(pattern, temp) != 0) { + log_err("Error setting pattern\n"); + } + if (!assertEqualWithUStringSearch(strsrch, PATTERN[0])) { + goto ENDTESTPATTERN; + } + + u_unescape(PATTERN[1].pattern, pattern, 32); + usearch_setPattern(strsrch, pattern, -1, &status); + temp = usearch_getPattern(strsrch, &templength); + if (u_strcmp(pattern, temp) != 0) { + log_err("Error setting pattern\n"); + goto ENDTESTPATTERN; + } + usearch_reset(strsrch); + if (U_FAILURE(status)) { + log_err("Error setting pattern %s\n", u_errorName(status)); + } + if (!assertEqualWithUStringSearch(strsrch, PATTERN[1])) { + goto ENDTESTPATTERN; + } + + u_unescape(PATTERN[0].pattern, pattern, 32); + usearch_setPattern(strsrch, pattern, -1, &status); + temp = usearch_getPattern(strsrch, &templength); + if (u_strcmp(pattern, temp) != 0) { + log_err("Error setting pattern\n"); + goto ENDTESTPATTERN; + } + usearch_reset(strsrch); + if (U_FAILURE(status)) { + log_err("Error setting pattern %s\n", u_errorName(status)); + } + if (!assertEqualWithUStringSearch(strsrch, PATTERN[0])) { + goto ENDTESTPATTERN; + } + /* enormous pattern size to see if this crashes */ + for (templength = 0; templength != 512; templength ++) { + bigpattern[templength] = 0x61; + } + bigpattern[511] = 0; + usearch_setPattern(strsrch, bigpattern, -1, &status); + if (U_FAILURE(status)) { + log_err("Error setting pattern with size 512, %s \n", + u_errorName(status)); + } +ENDTESTPATTERN: + ucol_setStrength(EN_US_, UCOL_TERTIARY); + if (strsrch != NULL) { + usearch_close(strsrch); + } + close(); +} + +static void TestText() +{ + UStringSearch *strsrch; + UChar pattern[32]; + UChar text[128]; + const UChar *temp; + int32_t templength; + UErrorCode status = U_ZERO_ERROR; + + u_unescape(TEXT[0].text, text, 128); + u_unescape(TEXT[0].pattern, pattern, 32); + + open(); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, + NULL, &status); + + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + goto ENDTESTPATTERN; + } + temp = usearch_getText(strsrch, &templength); + if (u_strcmp(text, temp) != 0) { + log_err("Error setting text\n"); + } + if (!assertEqualWithUStringSearch(strsrch, TEXT[0])) { + goto ENDTESTPATTERN; + } + + u_unescape(TEXT[1].text, text, 32); + usearch_setText(strsrch, text, -1, &status); + temp = usearch_getText(strsrch, &templength); + if (u_strcmp(text, temp) != 0) { + log_err("Error setting text\n"); + goto ENDTESTPATTERN; + } + if (U_FAILURE(status)) { + log_err("Error setting text %s\n", u_errorName(status)); + } + if (!assertEqualWithUStringSearch(strsrch, TEXT[1])) { + goto ENDTESTPATTERN; + } + + u_unescape(TEXT[0].text, text, 32); + usearch_setText(strsrch, text, -1, &status); + temp = usearch_getText(strsrch, &templength); + if (u_strcmp(text, temp) != 0) { + log_err("Error setting text\n"); + goto ENDTESTPATTERN; + } + if (U_FAILURE(status)) { + log_err("Error setting pattern %s\n", u_errorName(status)); + } + if (!assertEqualWithUStringSearch(strsrch, TEXT[0])) { + goto ENDTESTPATTERN; + } +ENDTESTPATTERN: + if (strsrch != NULL) { + usearch_close(strsrch); + } + close(); +} + +static void TestCompositeBoundaries() +{ + int count = 0; + open(); + while (COMPOSITEBOUNDARIES[count].text != NULL) { + log_verbose("composite %d\n", count); + if (!assertEqual(COMPOSITEBOUNDARIES[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + close(); +} + +static void TestGetSetOffset() +{ + int index = 0; + UChar pattern[32]; + UChar text[128]; + UErrorCode status = U_ZERO_ERROR; + UStringSearch *strsrch; + + open(); + strsrch = usearch_openFromCollator(pattern, 16, text, 32, EN_US_, NULL, + &status); + /* testing out of bounds error */ + usearch_setOffset(strsrch, -1, &status); + if (U_SUCCESS(status)) { + log_err("Error expecting set offset error\n"); + } + usearch_setOffset(strsrch, 128, &status); + if (U_SUCCESS(status)) { + log_err("Error expecting set offset error\n"); + } + while (BASIC[index].text != NULL) { + int count = 0; + SearchData search = BASIC[index ++]; + UTextOffset matchindex = search.offset[count]; + int32_t textlength; + + u_unescape(search.text, text, 128); + u_unescape(search.pattern, pattern, 32); + status = U_ZERO_ERROR; + usearch_setText(strsrch, text, -1, &status); + usearch_setPattern(strsrch, pattern, -1, &status); + while (U_SUCCESS(status) && matchindex >= 0) { + uint32_t matchlength = search.size[count]; + usearch_next(strsrch, &status); + if (matchindex != usearch_getMatchedStart(strsrch) || + matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) { + char *str = toCharString(usearch_getText(strsrch, + &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error match found at %d %d\n", + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return; + } + matchindex = search.offset[count + 1] == -1 ? -1 : + search.offset[count + 2]; + if (search.offset[count + 1] != -1) { + usearch_setOffset(strsrch, search.offset[count + 1] + 1, + &status); + if (usearch_getOffset(strsrch) != search.offset[count + 1] + 1) { + log_err("Error setting offset\n"); + return; + } + } + + count += 2; + } + usearch_next(strsrch, &status); + if ((uint32_t)usearch_getMatchedStart(strsrch) != USEARCH_DONE) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error match found at %d %d\n", + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return; + } + } + usearch_close(strsrch); + close(); +} + +static void TestGetSetAttribute() +{ + UErrorCode status = U_ZERO_ERROR; + UChar pattern[32]; + const UChar text[128]; + UStringSearch *strsrch; + + open(); + strsrch = usearch_openFromCollator(pattern, 16, text, 32, EN_US_, NULL, + &status); + if (U_FAILURE(status)) { + log_err("Error opening search %s\n", u_errorName(status)); + return; + } + + usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_DEFAULT, &status); + if (U_FAILURE(status) || + usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_OFF) { + log_err("Error setting overlap to the default\n"); + } + usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_ON, &status); + if (U_FAILURE(status) || + usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_ON) { + log_err("Error setting overlap true\n"); + } + usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_OFF, &status); + if (U_FAILURE(status) || + usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_OFF) { + log_err("Error setting overlap false\n"); + } + usearch_setAttribute(strsrch, USEARCH_OVERLAP, + USEARCH_ATTRIBUTE_VALUE_COUNT, &status); + if (U_SUCCESS(status)) { + log_err("Error setting overlap to illegal value\n"); + } + status = U_ZERO_ERROR; + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_DEFAULT, + &status); + if (U_FAILURE(status) || + usearch_getAttribute(strsrch, USEARCH_CANONICAL_MATCH) != + USEARCH_OFF) { + log_err("Error setting canonical match to the default\n"); + } + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (U_FAILURE(status) || + usearch_getAttribute(strsrch, USEARCH_CANONICAL_MATCH) != + USEARCH_ON) { + log_err("Error setting canonical match true\n"); + } + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_OFF, + &status); + if (U_FAILURE(status) || + usearch_getAttribute(strsrch, USEARCH_CANONICAL_MATCH) != + USEARCH_OFF) { + log_err("Error setting canonical match false\n"); + } + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, + USEARCH_ATTRIBUTE_VALUE_COUNT, &status); + if (U_SUCCESS(status)) { + log_err("Error setting canonical match to illegal value\n"); + } + status = U_ZERO_ERROR; + usearch_setAttribute(strsrch, USEARCH_ATTRIBUTE_COUNT, USEARCH_DEFAULT, + &status); + if (U_SUCCESS(status)) { + log_err("Error setting illegal attribute success\n"); + } + + usearch_close(strsrch); + close(); +} + +static void TestGetMatch() +{ + int count = 0; + UErrorCode status = U_ZERO_ERROR; + UChar text[128]; + UChar pattern[32]; + SearchData search = MATCH[0]; + UTextOffset matchindex = search.offset[count]; + UStringSearch *strsrch; + int32_t textlength; + UChar matchtext[128]; + + open(); + u_unescape(search.text, text, 128); + u_unescape(search.pattern, pattern, 32); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, + NULL, &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + if (strsrch != NULL) { + usearch_close(strsrch); + } + return; + } + + while (U_SUCCESS(status) && matchindex >= 0) { + int32_t matchlength = search.size[count]; + usearch_next(strsrch, &status); + if (matchindex != usearch_getMatchedStart(strsrch) || + matchlength != usearch_getMatchedLength(strsrch)) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error match found at %d %d\n", + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return; + } + count ++; + + status = U_ZERO_ERROR; + if (usearch_getMatchedText(NULL, matchtext, 128, &status) != + USEARCH_DONE || U_SUCCESS(status)){ + log_err("Error expecting errors with NULL string search\n"); + } + status = U_ZERO_ERROR; + if (usearch_getMatchedText(strsrch, NULL, 128, &status) != + (int32_t)matchlength || U_SUCCESS(status)){ + log_err("Error pre-flighting match length\n"); + } + status = U_ZERO_ERROR; + if (usearch_getMatchedText(strsrch, matchtext, 0, &status) != + (int32_t)matchlength || U_SUCCESS(status)){ + log_err("Error getting match text with buffer size 0\n"); + } + status = U_ZERO_ERROR; + if (usearch_getMatchedText(strsrch, matchtext, matchlength, &status) + != (int32_t)matchlength || matchtext[matchlength - 1] == 0 || + U_FAILURE(status)){ + log_err("Error getting match text with exact size\n"); + } + status = U_ZERO_ERROR; + if (usearch_getMatchedText(strsrch, matchtext, 128, &status) != + (int32_t) matchlength || U_FAILURE(status) || + memcmp(matchtext, + usearch_getText(strsrch, &textlength) + matchindex, + matchlength * sizeof(UChar)) != 0 || + matchtext[matchlength] != 0) { + log_err("Error getting matched text\n"); + } + + matchindex = search.offset[count]; + } + status = U_ZERO_ERROR; + usearch_next(strsrch, &status); + if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || + usearch_getMatchedLength(strsrch) != 0) { + log_err("Error end of match not found\n"); + } + status = U_ZERO_ERROR; + if (usearch_getMatchedText(strsrch, matchtext, 128, &status) != + USEARCH_DONE) { + log_err("Error getting null matches\n"); + } + usearch_close(strsrch); + close(); +} + +static void TestSetMatch() +{ + int count = 0; + + open(); + while (MATCH[count].text != NULL) { + SearchData search = MATCH[count]; + int size = 0; + int index = 0; + UChar text[128]; + UChar pattern[32]; + UStringSearch *strsrch; + UErrorCode status = U_ZERO_ERROR; + u_unescape(search.text, text, 128); + u_unescape(search.pattern, pattern, 32); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, + NULL, &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + if (strsrch != NULL) { + usearch_close(strsrch); + } + return; + } + + size = 0; + while (search.offset[size] != -1) { + size ++; + } + + if (usearch_first(strsrch, &status) != search.offset[0] || + U_FAILURE(status)) { + log_err("Error getting first match\n"); + } + if (usearch_last(strsrch, &status) != search.offset[size -1] || + U_FAILURE(status)) { + log_err("Error getting last match\n"); + } + + while (index < size) { + if (index + 2 < size) { + if (usearch_following(strsrch, search.offset[index + 2] - 1, + &status) != search.offset[index + 2] || + U_FAILURE(status)) { + log_err("Error getting following match at index %d\n", + search.offset[index + 2] - 1); + } + } + if (index + 1 < size) { + if (usearch_preceding(strsrch, search.offset[index + 1] + + search.size[index + 1] + 1, + &status) != search.offset[index + 1] || + U_FAILURE(status)) { + log_err("Error getting preceeding match at index %d\n", + search.offset[index + 1] + 1); + } + } + index += 2; + } + status = U_ZERO_ERROR; + if (usearch_following(strsrch, u_strlen(text), &status) != + USEARCH_DONE) { + log_err("Error expecting out of bounds match\n"); + } + if (usearch_preceding(strsrch, 0, &status) != USEARCH_DONE) { + log_err("Error expecting out of bounds match\n"); + } + count ++; + usearch_close(strsrch); + } + close(); +} + +static void TestReset() +{ + UErrorCode status = U_ZERO_ERROR; + UChar text[128]; + UChar pattern[32]; + UStringSearch *strsrch; + + open(); + strsrch = usearch_openFromCollator(pattern, 16, text, 32, + EN_US_, NULL, &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + if (strsrch != NULL) { + usearch_close(strsrch); + } + return; + } + usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_ON, &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + usearch_setOffset(strsrch, 10, &status); + if (U_FAILURE(status)) { + log_err("Error setting attributes and offsets\n"); + } + else { + usearch_reset(strsrch); + if (usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_OFF || + usearch_getAttribute(strsrch, USEARCH_CANONICAL_MATCH) != + USEARCH_OFF || + usearch_getOffset(strsrch) != 0 || + usearch_getMatchedLength(strsrch) != 0 || + usearch_getMatchedStart(strsrch) != USEARCH_DONE) { + log_err("Error resetting string search\n"); + } + } + usearch_close(strsrch); + close(); +} + +static void TestSupplementary() +{ + int count = 0; + open(); + while (SUPPLEMENTARY[count].text != NULL) { + if (!assertEqual(SUPPLEMENTARY[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + close(); +} + +static void TestContraction() +{ + UChar rules[128]; + UChar pattern[128]; + UChar text[128]; + UCollator *collator; + UErrorCode status = U_ZERO_ERROR; + int count = 0; + UStringSearch *strsrch; + + u_unescape(CONTRACTIONRULE, rules, 128); + collator = ucol_openRules(rules, u_strlen(rules), UNORM_NFD, + UCOL_TERTIARY, NULL, &status); + if (U_FAILURE(status)) { + log_err("Error opening collator %s\n", u_errorName(status)); + } + strsrch = usearch_openFromCollator(pattern, 1, text, 1, collator, NULL, + &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + } + + while (CONTRACTION[count].text != NULL) { + u_unescape(CONTRACTION[count].text, text, 128); + u_unescape(CONTRACTION[count].pattern, pattern, 128); + usearch_setText(strsrch, text, -1, &status); + usearch_setPattern(strsrch, pattern, -1, &status); + if (!assertEqualWithUStringSearch(strsrch, CONTRACTION[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + usearch_close(strsrch); + ucol_close(collator); +} + +static void TestIgnorable() +{ + UChar rules[128]; + UChar pattern[128]; + UChar text[128]; + UCollator *collator; + UErrorCode status = U_ZERO_ERROR; + UStringSearch *strsrch; + uint32_t count = 0; + + u_unescape(IGNORABLERULE, rules, 128); + collator = ucol_openRules(rules, u_strlen(rules), UNORM_NFD, + IGNORABLE[count].strength, NULL, &status); + if (U_FAILURE(status)) { + log_err("Error opening collator %s\n", u_errorName(status)); + } + strsrch = usearch_openFromCollator(pattern, 1, text, 1, collator, NULL, + &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + } + + while (IGNORABLE[count].text != NULL) { + u_unescape(IGNORABLE[count].text, text, 128); + u_unescape(IGNORABLE[count].pattern, pattern, 128); + usearch_setText(strsrch, text, -1, &status); + usearch_setPattern(strsrch, pattern, -1, &status); + if (!assertEqualWithUStringSearch(strsrch, IGNORABLE[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + usearch_close(strsrch); + ucol_close(collator); +} + +static void TestCanonical() +{ + int count = 0; + open(); + while (BASICCANONICAL[count].text != NULL) { + if (!assertCanonicalEqual(BASICCANONICAL[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + close(); +} + +static void TestNormCanonical() +{ + int count = 0; + UErrorCode status = U_ZERO_ERROR; + open(); + ucol_setAttribute(EN_US_, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); + count = 0; + while (NORMCANONICAL[count].text != NULL) { + if (!assertCanonicalEqual(NORMCANONICAL[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + ucol_setAttribute(EN_US_, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); + close(); +} + +static void TestStrengthCanonical() +{ + int count = 0; + open(); + while (STRENGTHCANONICAL[count].text != NULL) { + if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + close(); +} + +static void TestBreakIteratorCanonical() { + UErrorCode status = U_ZERO_ERROR; + int count = 0; + + open(); + while (count < 4) { + UChar pattern[32]; + UChar text[128]; + const SearchData *search = &(BREAKITERATORCANONICAL[count]); + UCollator *collator = getCollator(search->collator); + UBreakIterator *breaker = getBreakIterator(search->breaker); + UStringSearch *strsrch; + + u_unescape(search->text, text, 128); + u_unescape(search->pattern, pattern, 32); + ucol_setStrength(collator, search->strength); + + strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator, + breaker, &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (U_FAILURE(status) || + usearch_getBreakIterator(strsrch) != breaker) { + log_err("Error setting break iterator\n"); + if (strsrch != NULL) { + usearch_close(strsrch); + } + } + if (!assertEqualWithUStringSearch(strsrch, *search)) { + ucol_setStrength(collator, UCOL_TERTIARY); + usearch_close(strsrch); + goto ENDTESTBREAKITERATOR; + } + search = &(BREAKITERATOR[count + 1]); + breaker = getBreakIterator(search->breaker); + usearch_setBreakIterator(strsrch, breaker, &status); + if (U_FAILURE(status) || + usearch_getBreakIterator(strsrch) != breaker) { + log_err("Error setting break iterator\n"); + usearch_close(strsrch); + goto ENDTESTBREAKITERATOR; + } + usearch_reset(strsrch); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (!assertEqualWithUStringSearch(strsrch, *search)) { + log_err("Error at test number %d\n", count); + goto ENDTESTBREAKITERATOR; + } + usearch_close(strsrch); + count += 2; + } + count = 0; + while (BREAKITERATORCANONICAL[count].text != NULL) { + if (!assertEqual(BREAKITERATORCANONICAL[count])) { + log_err("Error at test number %d\n", count); + goto ENDTESTBREAKITERATOR; + } + count ++; + } + +ENDTESTBREAKITERATOR: + close(); +} + +static void TestVariableCanonical() +{ + int count = 0; + UErrorCode status = U_ZERO_ERROR; + open(); + ucol_setAttribute(EN_US_, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); + if (U_FAILURE(status)) { + log_err("Error setting collation alternate attribute %s\n", + u_errorName(status)); + } + while (VARIABLE[count].text != NULL) { + log_verbose("variable %d\n", count); + if (!assertCanonicalEqual(VARIABLE[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + ucol_setAttribute(EN_US_, UCOL_ALTERNATE_HANDLING, + UCOL_NON_IGNORABLE, &status); + close(); +} + +static void TestOverlapCanonical() +{ + int count = 0; + open(); + while (OVERLAPCANONICAL[count].text != NULL) { + if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], USEARCH_ON, + USEARCH_ON)) { + log_err("Error at overlap test number %d\n", count); + } + count ++; + } + count = 0; + while (NONOVERLAP[count].text != NULL) { + if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) { + log_err("Error at non overlap test number %d\n", count); + } + count ++; + } + + count = 0; + while (count < 1) { + UChar pattern[32]; + UChar text[128]; + const SearchData *search = &(OVERLAPCANONICAL[count]); + UCollator *collator = getCollator(search->collator); + UStringSearch *strsrch; + UErrorCode status = U_ZERO_ERROR; + + u_unescape(search->text, text, 128); + u_unescape(search->pattern, pattern, 32); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator, + NULL, &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_ON, &status); + if (U_FAILURE(status) || + usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_ON) { + log_err("Error setting overlap option\n"); + } + if (!assertEqualWithUStringSearch(strsrch, *search)) { + usearch_close(strsrch); + return; + } + search = &(NONOVERLAPCANONICAL[count]); + usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_OFF, &status); + if (U_FAILURE(status) || + usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_OFF) { + log_err("Error setting overlap option\n"); + } + usearch_reset(strsrch); + if (!assertEqualWithUStringSearch(strsrch, *search)) { + usearch_close(strsrch); + log_err("Error at test number %d\n", count); + } + + count ++; + usearch_close(strsrch); + } + close(); +} + +static void TestCollatorCanonical() +{ + /* test collator that thinks "o" and "p" are the same thing */ + UChar rules[32]; + UCollator *tailored = NULL; + UErrorCode status = U_ZERO_ERROR; + UChar pattern[32]; + UChar text[128]; + UStringSearch *strsrch; + + open(); + u_unescape(COLLATORCANONICAL[0].text, text, 128); + u_unescape(COLLATORCANONICAL[0].pattern, pattern, 32); + + strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, + NULL, &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + } + if (!assertEqualWithUStringSearch(strsrch, COLLATORCANONICAL[0])) { + goto ENDTESTCOLLATOR; + } + + u_unescape(TESTCOLLATORRULE, rules, 32); + tailored = ucol_openRules(rules, -1, UNORM_NFD, + COLLATORCANONICAL[1].strength, NULL, &status); + if (U_FAILURE(status)) { + log_err("Error opening rule based collator %s\n", u_errorName(status)); + } + + usearch_setCollator(strsrch, tailored, &status); + if (U_FAILURE(status) || usearch_getCollator(strsrch) != tailored) { + log_err("Error setting rule based collator\n"); + } + usearch_reset(strsrch); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (!assertEqualWithUStringSearch(strsrch, COLLATORCANONICAL[1])) { + goto ENDTESTCOLLATOR; + } + + usearch_setCollator(strsrch, EN_US_, &status); + usearch_reset(strsrch); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (U_FAILURE(status) || usearch_getCollator(strsrch) != EN_US_) { + log_err("Error setting rule based collator\n"); + } + if (!assertEqualWithUStringSearch(strsrch, COLLATORCANONICAL[0])) { + goto ENDTESTCOLLATOR; + } + +ENDTESTCOLLATOR: + usearch_close(strsrch); + if (tailored != NULL) { + ucol_close(tailored); + } + close(); +} + +static void TestPatternCanonical() +{ + UStringSearch *strsrch; + UChar pattern[32]; + UChar text[128]; + const UChar *temp; + int32_t templength; + UErrorCode status = U_ZERO_ERROR; + + open(); + u_unescape(PATTERNCANONICAL[0].text, text, 128); + u_unescape(PATTERNCANONICAL[0].pattern, pattern, 32); + + ucol_setStrength(EN_US_, PATTERNCANONICAL[0].strength); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, + NULL, &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + goto ENDTESTPATTERN; + } + temp = usearch_getPattern(strsrch, &templength); + if (u_strcmp(pattern, temp) != 0) { + log_err("Error setting pattern\n"); + } + if (!assertEqualWithUStringSearch(strsrch, PATTERNCANONICAL[0])) { + goto ENDTESTPATTERN; + } + + u_unescape(PATTERNCANONICAL[1].pattern, pattern, 32); + usearch_setPattern(strsrch, pattern, -1, &status); + temp = usearch_getPattern(strsrch, &templength); + if (u_strcmp(pattern, temp) != 0) { + log_err("Error setting pattern\n"); + goto ENDTESTPATTERN; + } + usearch_reset(strsrch); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (U_FAILURE(status)) { + log_err("Error setting pattern %s\n", u_errorName(status)); + } + if (!assertEqualWithUStringSearch(strsrch, PATTERNCANONICAL[1])) { + goto ENDTESTPATTERN; + } + + u_unescape(PATTERNCANONICAL[0].pattern, pattern, 32); + usearch_setPattern(strsrch, pattern, -1, &status); + temp = usearch_getPattern(strsrch, &templength); + if (u_strcmp(pattern, temp) != 0) { + log_err("Error setting pattern\n"); + goto ENDTESTPATTERN; + } + usearch_reset(strsrch); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (U_FAILURE(status)) { + log_err("Error setting pattern %s\n", u_errorName(status)); + } + if (!assertEqualWithUStringSearch(strsrch, PATTERNCANONICAL[0])) { + goto ENDTESTPATTERN; + } +ENDTESTPATTERN: + ucol_setStrength(EN_US_, UCOL_TERTIARY); + if (strsrch != NULL) { + usearch_close(strsrch); + } + close(); +} + +static void TestTextCanonical() +{ + UStringSearch *strsrch; + UChar pattern[32]; + UChar text[128]; + const UChar *temp; + int32_t templength; + UErrorCode status = U_ZERO_ERROR; + + u_unescape(TEXTCANONICAL[0].text, text, 128); + u_unescape(TEXTCANONICAL[0].pattern, pattern, 32); + + open(); + strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, + NULL, &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + goto ENDTESTPATTERN; + } + temp = usearch_getText(strsrch, &templength); + if (u_strcmp(text, temp) != 0) { + log_err("Error setting text\n"); + } + if (!assertEqualWithUStringSearch(strsrch, TEXTCANONICAL[0])) { + goto ENDTESTPATTERN; + } + + u_unescape(TEXTCANONICAL[1].text, text, 32); + usearch_setText(strsrch, text, -1, &status); + temp = usearch_getText(strsrch, &templength); + if (u_strcmp(text, temp) != 0) { + log_err("Error setting text\n"); + goto ENDTESTPATTERN; + } + if (U_FAILURE(status)) { + log_err("Error setting text %s\n", u_errorName(status)); + } + if (!assertEqualWithUStringSearch(strsrch, TEXTCANONICAL[1])) { + goto ENDTESTPATTERN; + } + + u_unescape(TEXTCANONICAL[0].text, text, 32); + usearch_setText(strsrch, text, -1, &status); + temp = usearch_getText(strsrch, &templength); + if (u_strcmp(text, temp) != 0) { + log_err("Error setting text\n"); + goto ENDTESTPATTERN; + } + if (U_FAILURE(status)) { + log_err("Error setting pattern %s\n", u_errorName(status)); + } + if (!assertEqualWithUStringSearch(strsrch, TEXTCANONICAL[0])) { + goto ENDTESTPATTERN; + } +ENDTESTPATTERN: + if (strsrch != NULL) { + usearch_close(strsrch); + } + close(); +} + +static void TestCompositeBoundariesCanonical() +{ + int count = 0; + open(); + while (COMPOSITEBOUNDARIESCANONICAL[count].text != NULL) { + log_verbose("composite %d\n", count); + if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + close(); +} + +static void TestGetSetOffsetCanonical() +{ + int index = 0; + UChar pattern[32]; + UChar text[128]; + UErrorCode status = U_ZERO_ERROR; + UStringSearch *strsrch; + + open(); + strsrch = usearch_openFromCollator(pattern, 16, text, 32, EN_US_, NULL, + &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + /* testing out of bounds error */ + usearch_setOffset(strsrch, -1, &status); + if (U_SUCCESS(status)) { + log_err("Error expecting set offset error\n"); + } + usearch_setOffset(strsrch, 128, &status); + if (U_SUCCESS(status)) { + log_err("Error expecting set offset error\n"); + } + while (BASICCANONICAL[index].text != NULL) { + int count = 0; + SearchData search = BASICCANONICAL[index ++]; + UTextOffset matchindex = search.offset[count]; + int32_t textlength; + + if (BASICCANONICAL[index].text == NULL) { + /* skip the last one */ + break; + } + + u_unescape(search.text, text, 128); + u_unescape(search.pattern, pattern, 32); + status = U_ZERO_ERROR; + usearch_setText(strsrch, text, -1, &status); + usearch_setPattern(strsrch, pattern, -1, &status); + while (U_SUCCESS(status) && matchindex >= 0) { + uint32_t matchlength = search.size[count]; + usearch_next(strsrch, &status); + if (matchindex != usearch_getMatchedStart(strsrch) || + matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) { + char *str = toCharString(usearch_getText(strsrch, + &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error match found at %d %d\n", + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return; + } + matchindex = search.offset[count + 1] == -1 ? -1 : + search.offset[count + 2]; + if (search.offset[count + 1] != -1) { + usearch_setOffset(strsrch, search.offset[count + 1] + 1, + &status); + if (usearch_getOffset(strsrch) != search.offset[count + 1] + 1) { + log_err("Error setting offset\n"); + return; + } + } + + count += 2; + } + usearch_next(strsrch, &status); + if ((uint32_t)usearch_getMatchedStart(strsrch) != USEARCH_DONE) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error match found at %d %d\n", + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return; + } + } + usearch_close(strsrch); + close(); +} + +static void TestSupplementaryCanonical() +{ + int count = 0; + open(); + while (SUPPLEMENTARYCANONICAL[count].text != NULL) { + if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + close(); +} + +static void TestContractionCanonical() +{ + UChar rules[128]; + UChar pattern[128]; + UChar text[128]; + UCollator *collator; + UErrorCode status = U_ZERO_ERROR; + int count = 0; + UStringSearch *strsrch; + + u_unescape(CONTRACTIONRULE, rules, 128); + collator = ucol_openRules(rules, u_strlen(rules), UNORM_NFD, + UCOL_TERTIARY, NULL, &status); + if (U_FAILURE(status)) { + log_err("Error opening collator %s\n", u_errorName(status)); + } + strsrch = usearch_openFromCollator(pattern, 1, text, 1, collator, NULL, + &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, + &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + } + + while (CONTRACTIONCANONICAL[count].text != NULL) { + u_unescape(CONTRACTIONCANONICAL[count].text, text, 128); + u_unescape(CONTRACTIONCANONICAL[count].pattern, pattern, 128); + usearch_setText(strsrch, text, -1, &status); + usearch_setPattern(strsrch, pattern, -1, &status); + if (!assertEqualWithUStringSearch(strsrch, + CONTRACTIONCANONICAL[count])) { + log_err("Error at test number %d\n", count); + } + count ++; + } + usearch_close(strsrch); + ucol_close(collator); +} + +void addSearchTest(TestNode** root) +{ + addTest(root, &TestStart, "tscoll/usrchtst/TestStart"); + addTest(root, &TestOpenClose, "tscoll/usrchtst/TestOpenClose"); + addTest(root, &TestInitialization, "tscoll/usrchtst/TestInitialization"); + addTest(root, &TestBasic, "tscoll/usrchtst/TestBasic"); + addTest(root, &TestNormExact, "tscoll/usrchtst/TestNormExact"); + addTest(root, &TestStrength, "tscoll/usrchtst/TestStrength"); + addTest(root, &TestBreakIterator, "tscoll/usrchtst/TestBreakIterator"); + addTest(root, &TestVariable, "tscoll/usrchtst/TestVariable"); + addTest(root, &TestOverlap, "tscoll/usrchtst/TestOverlap"); + addTest(root, &TestCollator, "tscoll/usrchtst/TestCollator"); + addTest(root, &TestPattern, "tscoll/usrchtst/TestPattern"); + addTest(root, &TestText, "tscoll/usrchtst/TestText"); + addTest(root, &TestCompositeBoundaries, + "tscoll/usrchtst/TestCompositeBoundaries"); + addTest(root, &TestGetSetOffset, "tscoll/usrchtst/TestGetSetOffset"); + addTest(root, &TestGetSetAttribute, + "tscoll/usrchtst/TestGetSetAttribute"); + addTest(root, &TestGetMatch, "tscoll/usrchtst/TestGetMatch"); + addTest(root, &TestSetMatch, "tscoll/usrchtst/TestSetMatch"); + addTest(root, &TestReset, "tscoll/usrchtst/TestReset"); + addTest(root, &TestSupplementary, "tscoll/usrchtst/TestSupplementary"); + addTest(root, &TestContraction, "tscoll/usrchtst/TestContraction"); + addTest(root, &TestIgnorable, "tscoll/usrchtst/TestIgnorable"); + addTest(root, &TestCanonical, "tscoll/usrchtst/TestCanonical"); + addTest(root, &TestNormCanonical, "tscoll/usrchtst/TestNormCanonical"); + addTest(root, &TestStrengthCanonical, + "tscoll/usrchtst/TestStrengthCanonical"); + addTest(root, &TestBreakIteratorCanonical, + "tscoll/usrchtst/TestBreakIteratorCanonical"); + addTest(root, &TestVariableCanonical, + "tscoll/usrchtst/TestVariableCanonical"); + addTest(root, &TestOverlapCanonical, + "tscoll/usrchtst/TestOverlapCanonical"); + addTest(root, &TestCollatorCanonical, + "tscoll/usrchtst/TestCollatorCanonical"); + addTest(root, &TestPatternCanonical, + "tscoll/usrchtst/TestPatternCanonical"); + addTest(root, &TestTextCanonical, "tscoll/usrchtst/TestTextCanonical"); + addTest(root, &TestCompositeBoundariesCanonical, + "tscoll/usrchtst/TestCompositeBoundariesCanonical"); + addTest(root, &TestGetSetOffsetCanonical, + "tscoll/usrchtst/TestGetSetOffsetCanonical"); + addTest(root, &TestSupplementaryCanonical, + "tscoll/usrchtst/TestSupplementaryCanonical"); + addTest(root, &TestContractionCanonical, + "tscoll/usrchtst/TestContractionCanonical"); + addTest(root, &TestEnd, "tscoll/usrchtst/TestEnd"); +} + diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index 30b3fc67f2..5ba0e55ab9 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -47,7 +47,7 @@ tsmthred.o tsmutex.o tsnmfmt.o tsputil.o tstnorm.o tzbdtest.o \ tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o transtst.o strtest.o thcoll.o \ itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o unhxtrts.o hxuntrts.o \ jahatrts.o hajatrts.o ufltlgts.o testutil.o transrt.o normconf.o sfwdchit.o \ -jamotest.o +jamotest.o srchtest.o DEPS = $(OBJECTS:.o=.d) diff --git a/icu4c/source/test/intltest/intltest.dsp b/icu4c/source/test/intltest/intltest.dsp index 90dec3e776..7bcf41e0c7 100644 --- a/icu4c/source/test/intltest/intltest.dsp +++ b/icu4c/source/test/intltest/intltest.dsp @@ -317,6 +317,10 @@ SOURCE=.\sfwdchit.cpp # End Source File # Begin Source File +SOURCE=.\srchtest.cpp +# End Source File +# Begin Source File + SOURCE=.\strtest.cpp # End Source File # Begin Source File @@ -649,6 +653,10 @@ SOURCE=.\sfwdchit.h # End Source File # Begin Source File +SOURCE=.\srchtest.h +# End Source File +# Begin Source File + SOURCE=.\strtest.h # End Source File # Begin Source File diff --git a/icu4c/source/test/intltest/srchtest.cpp b/icu4c/source/test/intltest/srchtest.cpp new file mode 100644 index 0000000000..d7c5f52183 --- /dev/null +++ b/icu4c/source/test/intltest/srchtest.cpp @@ -0,0 +1,1943 @@ +/* +***************************************************************************** +* Copyright (C) 2001, International Business Machines orporation and others. +* All Rights Reserved. +****************************************************************************/ + +#include "srchtest.h" +#include "../cintltst/usrchdat.c" +#include "unicode/stsearch.h" +#include "unicode/ustring.h" +#include "unicode/schriter.h" +#include + +// private definitions ----------------------------------------------------- + +#define CASE(id,test) \ + case id: \ + name = #test; \ + if (exec) { \ + logln(#test "---"); \ + logln((UnicodeString)""); \ + test(); \ + } \ + break; + +// public contructors and destructors -------------------------------------- + +StringSearchTest::StringSearchTest() +{ + UErrorCode status = U_ZERO_ERROR; + + m_en_us_ = (RuleBasedCollator *)Collator::createInstance("en_US", status); + m_fr_fr_ = (RuleBasedCollator *)Collator::createInstance("fr_FR", status); + m_de_ = (RuleBasedCollator *)Collator::createInstance("de_DE", status); + m_es_ = (RuleBasedCollator *)Collator::createInstance("es_ES", status); + + UnicodeString rules; + rules.setTo(((RuleBasedCollator *)m_de_)->getRules()); + UChar extrarules[128]; + u_unescape(EXTRACOLLATIONRULE, extrarules, 128); + rules.append(extrarules, u_strlen(extrarules)); + delete m_de_; + + m_de_ = new RuleBasedCollator(rules, status); + + rules.setTo(((RuleBasedCollator *)m_es_)->getRules()); + rules.append(extrarules, u_strlen(extrarules)); + + delete m_es_; + + m_es_ = new RuleBasedCollator(rules, status); + + m_en_wordbreaker_ = BreakIterator::createWordInstance( + Locale::ENGLISH, status); + m_en_characterbreaker_ = BreakIterator::createCharacterInstance( + Locale::ENGLISH, status); +} + +StringSearchTest::~StringSearchTest() +{ + delete m_en_us_; + delete m_fr_fr_; + delete m_de_; + delete m_es_; + delete m_en_wordbreaker_; + delete m_en_characterbreaker_; +} + +// public methods ---------------------------------------------------------- + +void StringSearchTest::runIndexedTest(int32_t index, UBool exec, + const char* &name, char* par) +{ + if (m_en_us_ == NULL && m_fr_fr_ == NULL && m_de_ == NULL && + m_es_ == NULL && m_en_wordbreaker_ == NULL && + m_en_characterbreaker_ == NULL && exec) { + errln(__FILE__ " cannot test - failed to create collator."); + name = ""; + return; + } + + switch (index) { + CASE(0, TestOpenClose) + CASE(1, TestInitialization) + CASE(2, TestBasic) + CASE(3, TestNormExact) + CASE(4, TestStrength) + CASE(5, TestBreakIterator) + CASE(6, TestVariable) + CASE(7, TestOverlap) + CASE(8, TestCollator) + CASE(9, TestPattern) + CASE(10, TestText) + CASE(11, TestCompositeBoundaries) + CASE(12, TestGetSetOffset) + CASE(13, TestGetSetAttribute) + CASE(14, TestGetMatch) + CASE(15, TestSetMatch) + CASE(16, TestReset) + CASE(17, TestSupplementary) + CASE(18, TestContraction) + CASE(19, TestIgnorable) + CASE(20, TestCanonical) + CASE(21, TestNormCanonical) + CASE(22, TestStrengthCanonical) + CASE(23, TestBreakIteratorCanonical) + CASE(24, TestVariableCanonical) + CASE(25, TestOverlapCanonical) + CASE(26, TestCollatorCanonical) + CASE(27, TestPatternCanonical) + CASE(28, TestTextCanonical) + CASE(29, TestCompositeBoundariesCanonical) + CASE(30, TestGetSetOffsetCanonical) + CASE(31, TestSupplementaryCanonical) + CASE(32, TestContractionCanonical) + default: name = ""; break; + } +} + +// private methods ------------------------------------------------------ + +RuleBasedCollator * StringSearchTest::getCollator(const char *collator) +{ + if (collator == NULL) { + return m_en_us_; + } + if (strcmp(collator, "fr") == 0) { + return m_fr_fr_; + } + else if (strcmp(collator, "de") == 0) { + return m_de_; + } + else if (strcmp(collator, "es") == 0) { + return m_es_; + } + else { + return m_en_us_; + } +} + +BreakIterator * StringSearchTest::getBreakIterator(const char *breaker) +{ + if (breaker == NULL) { + return NULL; + } + if (strcmp(breaker, "wordbreaker") == 0) { + return m_en_wordbreaker_; + } + else { + return m_en_characterbreaker_; + } +} + +char * StringSearchTest::toCharString(const UnicodeString &text) +{ + UChar unichars[512]; + static char result[1024]; + int count = 0; + int index = 0; + int length = text.length(); + + text.extract(0, text.length(), unichars, 0); + + for (; count < length; count ++) { + UChar ch = unichars[count]; + if (ch >= 0x20 && ch <= 0x7e) { + result[index ++] = (char)ch; + } + else { + char digit[5]; + int zerosize; + result[index ++] = '\\'; + result[index ++] = 'u'; + sprintf(digit, "%x", ch); + zerosize = 4 - strlen(digit); + while (zerosize != 0) { + result[index ++] = '0'; + zerosize --; + } + result[index] = 0; + strcat(result, digit); + index += strlen(digit); + } + } + result[index] = 0; + + return result; +} + +Collator::ECollationStrength StringSearchTest::getECollationStrength( + const UCollationStrength &strength) const +{ + switch (strength) + { + case UCOL_PRIMARY : + return Collator::PRIMARY; + case UCOL_SECONDARY : + return Collator::SECONDARY; + case UCOL_TERTIARY : + return Collator::TERTIARY; + default : + return Collator::IDENTICAL; + } +} + +UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch, + const SearchData *search) +{ + int count = 0; + UErrorCode status = U_ZERO_ERROR; + UTextOffset matchindex = search->offset[count]; + UnicodeString matchtext; + + if (strsrch->getMatchedStart() != USEARCH_DONE || + strsrch->getMatchedLength() != 0) { + errln("Error with the initialization of match start and length"); + } + // start of following matches + while (U_SUCCESS(status) && matchindex >= 0) { + int32_t matchlength = search->size[count]; + strsrch->next(status); + if (matchindex != strsrch->getMatchedStart() || + matchlength != strsrch->getMatchedLength()) { + char *str = toCharString(strsrch->getText()); + errln("Text: %s", str); + str = toCharString(strsrch->getPattern()); + errln("Pattern: %s", str); + errln("Error following match found at %d %d", + strsrch->getMatchedStart(), strsrch->getMatchedLength()); + return FALSE; + } + count ++; + + strsrch->getMatchedText(matchtext); + + if (U_FAILURE(status) || + strsrch->getText().compareBetween(matchindex, + matchindex + matchlength, + matchtext, 0, + matchtext.length())) { + errln("Error getting following matched text"); + } + + matchindex = search->offset[count]; + } + strsrch->next(status); + if (strsrch->getMatchedStart() != USEARCH_DONE || + strsrch->getMatchedLength() != 0) { + char *str = toCharString(strsrch->getText()); + errln("Text: %s", str); + str = toCharString(strsrch->getPattern()); + errln("Pattern: %s", str); + errln("Error following match found at %d %d", + strsrch->getMatchedStart(), strsrch->getMatchedLength()); + return FALSE; + } + // start of preceding matches + count = count == 0 ? 0 : count - 1; + matchindex = search->offset[count]; + while (U_SUCCESS(status) && matchindex >= 0) { + int32_t matchlength = search->size[count]; + strsrch->previous(status); + if (matchindex != strsrch->getMatchedStart() || + matchlength != strsrch->getMatchedLength()) { + char *str = toCharString(strsrch->getText()); + errln("Text: %s", str); + str = toCharString(strsrch->getPattern()); + errln("Pattern: %s", str); + errln("Error following match found at %d %d", + strsrch->getMatchedStart(), strsrch->getMatchedLength()); + return FALSE; + } + + strsrch->getMatchedText(matchtext); + + if (U_FAILURE(status) || + strsrch->getText().compareBetween(matchindex, + matchindex + matchlength, + matchtext, 0, + matchtext.length())) { + errln("Error getting following matched text"); + } + + matchindex = count > 0 ? search->offset[count - 1] : -1; + count --; + } + strsrch->previous(status); + if (strsrch->getMatchedStart() != USEARCH_DONE || + strsrch->getMatchedLength() != 0) { + char *str = toCharString(strsrch->getText()); + errln("Text: %s", str); + str = toCharString(strsrch->getPattern()); + errln("Pattern: %s", str); + errln("Error following match found at %d %d", + strsrch->getMatchedStart(), strsrch->getMatchedLength()); + return FALSE; + } + return TRUE; +} + +UBool StringSearchTest::assertEqual(const SearchData *search) +{ + UErrorCode status = U_ZERO_ERROR; + + Collator *collator = getCollator(search->collator); + BreakIterator *breaker = getBreakIterator(search->breaker); + StringSearch *strsrch; + UChar temp[128]; + + u_unescape(search->text, temp, 128); + UnicodeString text; + text.setTo(temp); + u_unescape(search->pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp); + + if (breaker != NULL) { + breaker->setText(text); + } + collator->setStrength(getECollationStrength(search->strength)); + strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator, + breaker, status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + return FALSE; + } + + if (!assertEqualWithStringSearch(strsrch, search)) { + collator->setStrength(getECollationStrength(UCOL_TERTIARY)); + delete strsrch; + return FALSE; + } + collator->setStrength(getECollationStrength(UCOL_TERTIARY)); + delete strsrch; + return TRUE; +} + +UBool StringSearchTest::assertCanonicalEqual(const SearchData *search) +{ + UErrorCode status = U_ZERO_ERROR; + Collator *collator = getCollator(search->collator); + BreakIterator *breaker = getBreakIterator(search->breaker); + StringSearch *strsrch; + UChar temp[128]; + + u_unescape(search->text, temp, 128); + UnicodeString text; + text.setTo(temp); + u_unescape(search->pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp); + + if (breaker != NULL) { + breaker->setText(text); + } + collator->setStrength(getECollationStrength(search->strength)); + strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator, + breaker, status); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + return FALSE; + } + + if (!assertEqualWithStringSearch(strsrch, search)) { + collator->setStrength(getECollationStrength(UCOL_TERTIARY)); + delete strsrch; + return FALSE; + } + collator->setStrength(getECollationStrength(UCOL_TERTIARY)); + delete strsrch; + return TRUE; +} + +UBool StringSearchTest::assertEqualWithAttribute(const SearchData *search, + USearchAttributeValue canonical, + USearchAttributeValue overlap) +{ + UErrorCode status = U_ZERO_ERROR; + Collator *collator = getCollator(search->collator); + BreakIterator *breaker = getBreakIterator(search->breaker); + StringSearch *strsrch; + UChar temp[128]; + + u_unescape(search->text, temp, 128); + UnicodeString text; + text.setTo(temp); + u_unescape(search->pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp); + + if (breaker != NULL) { + breaker->setText(text); + } + collator->setStrength(getECollationStrength(search->strength)); + strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator, + breaker, status); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, canonical, status); + strsrch->setAttribute(USEARCH_OVERLAP, overlap, status); + + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + return FALSE; + } + + if (!assertEqualWithStringSearch(strsrch, search)) { + collator->setStrength(getECollationStrength(UCOL_TERTIARY)); + delete strsrch; + return FALSE; + } + collator->setStrength(getECollationStrength(UCOL_TERTIARY)); + delete strsrch; + return TRUE; +} + +void StringSearchTest::TestOpenClose() +{ + UErrorCode status = U_ZERO_ERROR; + StringSearch *result; + BreakIterator *breakiter = m_en_wordbreaker_; + UnicodeString pattern; + UnicodeString text; + UnicodeString temp("a"); + StringCharacterIterator chariter(text); + + /* testing null arguments */ + result = new StringSearch(pattern, text, NULL, NULL, status); + delete result; + if (U_SUCCESS(status)) { + errln("Error: NULL arguments should produce an error"); + } + chariter.setText(text); + status = U_ZERO_ERROR; + result = new StringSearch(pattern, chariter, NULL, NULL, status); + delete result; + if (U_SUCCESS(status)) { + errln("Error: NULL arguments should produce an error"); + } + text.append(0, 0x1); + status = U_ZERO_ERROR; + result = new StringSearch(pattern, text, NULL, NULL, status); + delete result; + if (U_SUCCESS(status)) { + errln("Error: Empty pattern should produce an error"); + } + chariter.setText(text); + status = U_ZERO_ERROR; + result = new StringSearch(pattern, chariter, NULL, NULL, status); + delete result; + if (U_SUCCESS(status)) { + errln("Error: Empty pattern should produce an error"); + } + text.remove(); + pattern.append(temp); + status = U_ZERO_ERROR; + result = new StringSearch(pattern, text, NULL, NULL, status); + delete result; + if (U_SUCCESS(status)) { + errln("Error: Empty text should produce an error"); + } + chariter.setText(text); + status = U_ZERO_ERROR; + result = new StringSearch(pattern, chariter, NULL, NULL, status); + delete result; + if (U_SUCCESS(status)) { + errln("Error: Empty text should produce an error"); + } + text.append(temp); + status = U_ZERO_ERROR; + result = new StringSearch(pattern, text, NULL, NULL, status); + delete result; + if (U_SUCCESS(status)) { + errln("Error: NULL arguments should produce an error"); + } + chariter.setText(text); + status = U_ZERO_ERROR; + result = new StringSearch(pattern, chariter, NULL, NULL, status); + delete result; + if (U_SUCCESS(status)) { + errln("Error: NULL arguments should produce an error"); + } + status = U_ZERO_ERROR; + result = new StringSearch(pattern, text, m_en_us_, NULL, status); + delete result; + if (U_FAILURE(status)) { + errln("Error: NULL break iterator is valid for opening search"); + } + status = U_ZERO_ERROR; + result = new StringSearch(pattern, chariter, m_en_us_, NULL, status); + delete result; + if (U_FAILURE(status)) { + errln("Error: NULL break iterator is valid for opening search"); + } + status = U_ZERO_ERROR; + result = new StringSearch(pattern, text, Locale::ENGLISH, NULL, status); + delete result; + if (U_FAILURE(status) || result == NULL) { + errln("Error: NULL break iterator is valid for opening search"); + } + status = U_ZERO_ERROR; + result = new StringSearch(pattern, chariter, Locale::ENGLISH, NULL, status); + delete result; + if (U_FAILURE(status)) { + errln("Error: NULL break iterator is valid for opening search"); + } + status = U_ZERO_ERROR; + result = new StringSearch(pattern, text, m_en_us_, breakiter, status); + delete result; + if (U_FAILURE(status)) { + errln("Error: Break iterator is valid for opening search"); + } + status = U_ZERO_ERROR; + result = new StringSearch(pattern, chariter, m_en_us_, NULL, status); + delete result; + if (U_FAILURE(status)) { + errln("Error: Break iterator is valid for opening search"); + } +} + +void StringSearchTest::TestInitialization() +{ + UErrorCode status = U_ZERO_ERROR; + UnicodeString pattern; + UnicodeString text; + UnicodeString temp("a"); + StringSearch *result; + + /* simple test on the pattern ce construction */ + pattern.append(temp); + pattern.append(temp); + text.append(temp); + text.append(temp); + text.append(temp); + result = new StringSearch(pattern, text, m_en_us_, NULL, status); + if (U_FAILURE(status)) { + errln("Error opening search %s", u_errorName(status)); + } + StringSearch *copy = new StringSearch(*result); + if (*(copy->getCollator()) != *(result->getCollator()) || + copy->getBreakIterator() != result->getBreakIterator() || + copy->getMatchedLength() != result->getMatchedLength() || + copy->getMatchedStart() != result->getMatchedStart() || + copy->getOffset() != result->getOffset() || + copy->getPattern() != result->getPattern() || + copy->getText() != result->getText() || + *(copy) != *(result)) + { + errln("Error copying StringSearch"); + } + delete result; + delete copy; + + /* testing if an extremely large pattern will fail the initialization */ + for (int count = 0; count < 512; count ++) { + pattern.append(temp); + } + result = new StringSearch(pattern, text, m_en_us_, NULL, status); + copy = new StringSearch(*result); + if (*(copy->getCollator()) != *(result->getCollator()) || + copy->getBreakIterator() != result->getBreakIterator() || + copy->getMatchedLength() != result->getMatchedLength() || + copy->getMatchedStart() != result->getMatchedStart() || + copy->getOffset() != result->getOffset() || + copy->getPattern() != result->getPattern() || + copy->getText() != result->getText() || + *(copy) != *(result)) + { + errln("Error copying StringSearch"); + } + if (U_FAILURE(status)) { + errln("Error opening search %s", u_errorName(status)); + } + delete result; + delete copy; +} + +void StringSearchTest::TestBasic() +{ + int count = 0; + while (BASIC[count].text != NULL) { + //printf("count %d", count); + if (!assertEqual(&BASIC[count])) { + errln("Error at test number %d", count); + } + count ++; + } +} + +void StringSearchTest::TestNormExact() +{ + int count = 0; + UErrorCode status = U_ZERO_ERROR; + m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); + if (U_FAILURE(status)) { + errln("Error setting collation normalization %s", + u_errorName(status)); + } + while (BASIC[count].text != NULL) { + if (!assertEqual(&BASIC[count])) { + errln("Error at test number %d", count); + } + count ++; + } + count = 0; + while (NORMEXACT[count].text != NULL) { + if (!assertEqual(&NORMEXACT[count])) { + errln("Error at test number %d", count); + } + count ++; + } + m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); + count = 0; + while (NONNORMEXACT[count].text != NULL) { + if (!assertEqual(&NONNORMEXACT[count])) { + errln("Error at test number %d", count); + } + count ++; + } +} + +void StringSearchTest::TestStrength() +{ + int count = 0; + while (STRENGTH[count].text != NULL) { + if (!assertEqual(&STRENGTH[count])) { + errln("Error at test number %d", count); + } + count ++; + } +} + +void StringSearchTest::TestBreakIterator() +{ + UChar temp[128]; + u_unescape(BREAKITERATOR[0].text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(BREAKITERATOR[0].pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + + UErrorCode status = U_ZERO_ERROR; + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, + status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + } + + strsrch->setBreakIterator(NULL, status); + if (U_FAILURE(status) || strsrch->getBreakIterator() != NULL) { + errln("Error usearch_getBreakIterator returned wrong object"); + } + + strsrch->setBreakIterator(m_en_characterbreaker_, status); + if (U_FAILURE(status) || + strsrch->getBreakIterator() != m_en_characterbreaker_) { + errln("Error usearch_getBreakIterator returned wrong object"); + } + + strsrch->setBreakIterator(m_en_wordbreaker_, status); + if (U_FAILURE(status) || + strsrch->getBreakIterator() != m_en_wordbreaker_) { + errln("Error usearch_getBreakIterator returned wrong object"); + } + + delete strsrch; + + int count = 0; + while (count < 4) { + const SearchData *search = &(BREAKITERATOR[count]); + RuleBasedCollator *collator = getCollator(search->collator); + BreakIterator *breaker = getBreakIterator(search->breaker); + StringSearch *strsrch; + + u_unescape(search->text, temp, 128); + text.setTo(temp, u_strlen(temp)); + u_unescape(search->pattern, temp, 128); + pattern.setTo(temp, u_strlen(temp)); + if (breaker != NULL) { + breaker->setText(text); + } + collator->setStrength(getECollationStrength(search->strength)); + + strsrch = new StringSearch(pattern, text, collator, breaker, status); + if (U_FAILURE(status) || + strsrch->getBreakIterator() != breaker) { + errln("Error setting break iterator"); + if (strsrch != NULL) { + delete strsrch; + } + } + if (!assertEqualWithStringSearch(strsrch, search)) { + collator->setStrength(getECollationStrength(UCOL_TERTIARY)); + delete strsrch; + } + search = &(BREAKITERATOR[count + 1]); + breaker = getBreakIterator(search->breaker); + if (breaker != NULL) { + breaker->setText(text); + } + strsrch->setBreakIterator(breaker, status); + if (U_FAILURE(status) || + strsrch->getBreakIterator() != breaker) { + errln("Error setting break iterator"); + delete strsrch; + } + strsrch->reset(); + if (!assertEqualWithStringSearch(strsrch, search)) { + errln("Error at test number %d", count); + } + delete strsrch; + count += 2; + } + count = 0; + while (BREAKITERATOR[count].text != NULL) { + if (!assertEqual(&BREAKITERATOR[count])) { + errln("Error at test number %d", count); + } + count ++; + } +} + +void StringSearchTest::TestVariable() +{ + int count = 0; + UErrorCode status = U_ZERO_ERROR; + m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); + if (U_FAILURE(status)) { + errln("Error setting collation alternate attribute %s", + u_errorName(status)); + } + while (VARIABLE[count].text != NULL) { + logln("variable %d", count); + if (!assertEqual(&VARIABLE[count])) { + errln("Error at test number %d", count); + } + count ++; + } + m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, + status); +} + +void StringSearchTest::TestOverlap() +{ + int count = 0; + while (OVERLAP[count].text != NULL) { + if (!assertEqualWithAttribute(&OVERLAP[count], USEARCH_OFF, + USEARCH_ON)) { + errln("Error at overlap test number %d", count); + } + count ++; + } + count = 0; + while (NONOVERLAP[count].text != NULL) { + if (!assertEqual(&NONOVERLAP[count])) { + errln("Error at non overlap test number %d", count); + } + count ++; + } + + count = 0; + while (count < 1) { + const SearchData *search = &(OVERLAP[count]); + UChar temp[128]; + u_unescape(search->text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(search->pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + + RuleBasedCollator *collator = getCollator(search->collator); + UErrorCode status = U_ZERO_ERROR; + StringSearch *strsrch = new StringSearch(pattern, text, + collator, NULL, + status); + + strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status); + if (U_FAILURE(status) || + strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) { + errln("Error setting overlap option"); + } + if (!assertEqualWithStringSearch(strsrch, search)) { + delete strsrch; + return; + } + + search = &(NONOVERLAP[count]); + strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status); + if (U_FAILURE(status) || + strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) { + errln("Error setting overlap option"); + } + strsrch->reset(); + if (!assertEqualWithStringSearch(strsrch, search)) { + delete strsrch; + errln("Error at test number %d", count); + } + + count ++; + delete strsrch; + } +} + +void StringSearchTest::TestCollator() +{ + // test collator that thinks "o" and "p" are the same thing + UChar temp[128]; + u_unescape(COLLATOR[0].text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(COLLATOR[0].pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + + UErrorCode status = U_ZERO_ERROR; + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, + status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + delete strsrch; + return; + } + if (!assertEqualWithStringSearch(strsrch, &COLLATOR[0])) { + delete strsrch; + return; + } + + u_unescape(TESTCOLLATORRULE, temp, 128); + UnicodeString rules; + rules.setTo(temp, u_strlen(temp)); + RuleBasedCollator *tailored = new RuleBasedCollator(rules, status); + tailored->setStrength(getECollationStrength(COLLATOR[1].strength)); + + if (U_FAILURE(status)) { + errln("Error opening rule based collator %s", u_errorName(status)); + delete strsrch; + if (tailored != NULL) { + delete tailored; + } + return; + } + + strsrch->setCollator(tailored, status); + if (U_FAILURE(status) || (*strsrch->getCollator()) != (*tailored)) { + errln("Error setting rule based collator"); + delete strsrch; + if (tailored != NULL) { + delete tailored; + } + } + strsrch->reset(); + if (!assertEqualWithStringSearch(strsrch, &COLLATOR[1])) { + delete strsrch; + if (tailored != NULL) { + delete tailored; + } + return; + } + + strsrch->setCollator(m_en_us_, status); + strsrch->reset(); + if (U_FAILURE(status) || (*strsrch->getCollator()) != (*m_en_us_)) { + errln("Error setting rule based collator"); + delete strsrch; + if (tailored != NULL) { + delete tailored; + } + } + if (!assertEqualWithStringSearch(strsrch, &COLLATOR[0])) { + errln("Error searching collator test"); + } + delete strsrch; + if (tailored != NULL) { + delete tailored; + } +} + +void StringSearchTest::TestPattern() +{ + + UChar temp[512]; + u_unescape(PATTERN[0].text, temp, 512); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(PATTERN[0].pattern, temp, 512); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + + m_en_us_->setStrength(getECollationStrength(PATTERN[0].strength)); + UErrorCode status = U_ZERO_ERROR; + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, + status); + + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); + if (strsrch != NULL) { + delete strsrch; + } + return; + } + if (strsrch->getPattern() != pattern) { + errln("Error setting pattern"); + } + if (!assertEqualWithStringSearch(strsrch, &PATTERN[0])) { + m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); + if (strsrch != NULL) { + delete strsrch; + } + return; + } + + u_unescape(PATTERN[1].pattern, temp, 512); + pattern.setTo(temp, u_strlen(temp)); + strsrch->setPattern(pattern, status); + if (pattern != strsrch->getPattern()) { + errln("Error setting pattern"); + m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); + if (strsrch != NULL) { + delete strsrch; + } + return; + } + strsrch->reset(); + if (U_FAILURE(status)) { + errln("Error setting pattern %s", u_errorName(status)); + } + if (!assertEqualWithStringSearch(strsrch, &PATTERN[1])) { + m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); + if (strsrch != NULL) { + delete strsrch; + } + return; + } + + u_unescape(PATTERN[0].pattern, temp, 512); + pattern.setTo(temp, u_strlen(temp)); + strsrch->setPattern(pattern, status); + if (pattern != strsrch->getPattern()) { + errln("Error setting pattern"); + m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); + if (strsrch != NULL) { + delete strsrch; + } + return; + } + strsrch->reset(); + if (U_FAILURE(status)) { + errln("Error setting pattern %s", u_errorName(status)); + } + if (!assertEqualWithStringSearch(strsrch, &PATTERN[0])) { + m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); + if (strsrch != NULL) { + delete strsrch; + } + return; + } + /* enormous pattern size to see if this crashes */ + for (int templength = 0; templength != 512; templength ++) { + temp[templength] = 0x61; + } + temp[511] = 0; + pattern.setTo(temp, 511); + strsrch->setPattern(pattern, status); + if (U_FAILURE(status)) { + errln("Error setting pattern with size 512, %s", u_errorName(status)); + } + m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); + if (strsrch != NULL) { + delete strsrch; + } +} + +void StringSearchTest::TestText() +{ + UChar temp[128]; + u_unescape(TEXT[0].text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(TEXT[0].pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + + UErrorCode status = U_ZERO_ERROR; + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, + status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + return; + } + if (text != strsrch->getText()) { + errln("Error setting text"); + } + if (!assertEqualWithStringSearch(strsrch, &TEXT[0])) { + delete strsrch; + return; + } + + u_unescape(TEXT[1].text, temp, 128); + text.setTo(temp, u_strlen(temp)); + strsrch->setText(text, status); + if (text != strsrch->getText()) { + errln("Error setting text"); + delete strsrch; + return; + } + if (U_FAILURE(status)) { + errln("Error setting text %s", u_errorName(status)); + } + if (!assertEqualWithStringSearch(strsrch, &TEXT[1])) { + delete strsrch; + return; + } + + u_unescape(TEXT[0].text, temp, 128); + text.setTo(temp, u_strlen(temp)); + strsrch->setText(text, status); + if (text != strsrch->getText()) { + errln("Error setting text"); + delete strsrch; + return; + } + if (U_FAILURE(status)) { + errln("Error setting pattern %s", u_errorName(status)); + } + if (!assertEqualWithStringSearch(strsrch, &TEXT[0])) { + errln("Error searching within set text"); + } + delete strsrch; +} + +void StringSearchTest::TestCompositeBoundaries() +{ + int count = 0; + while (COMPOSITEBOUNDARIES[count].text != NULL) { + logln("composite %d", count); + if (!assertEqual(&COMPOSITEBOUNDARIES[count])) { + errln("Error at test number %d", count); + } + count ++; + } +} + +void StringSearchTest::TestGetSetOffset() +{ + UErrorCode status = U_ZERO_ERROR; + UnicodeString pattern("1234567890123456"); + UnicodeString text("12345678901234567890123456789012"); + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, + NULL, status); + /* testing out of bounds error */ + strsrch->setOffset(-1, status); + if (U_SUCCESS(status)) { + errln("Error expecting set offset error"); + } + strsrch->setOffset(128, status); + if (U_SUCCESS(status)) { + errln("Error expecting set offset error"); + } + int index = 0; + while (BASIC[index].text != NULL) { + UErrorCode status = U_ZERO_ERROR; + SearchData search = BASIC[index ++]; + UChar temp[128]; + + u_unescape(search.text, temp, 128); + text.setTo(temp, u_strlen(temp)); + u_unescape(search.pattern, temp, 128); + pattern.setTo(temp, u_strlen(temp)); + strsrch->setText(text, status); + strsrch->setPattern(pattern, status); + + int count = 0; + UTextOffset matchindex = search.offset[count]; + while (U_SUCCESS(status) && matchindex >= 0) { + int32_t matchlength = search.size[count]; + strsrch->next(status); + if (matchindex != strsrch->getMatchedStart() || + matchlength != strsrch->getMatchedLength()) { + char *str = toCharString(strsrch->getText()); + errln("Text: %s", str); + str = toCharString(strsrch->getPattern()); + errln("Pattern: %s", str); + errln("Error match found at %d %d", + strsrch->getMatchedStart(), + strsrch->getMatchedLength()); + return; + } + matchindex = search.offset[count + 1] == -1 ? -1 : + search.offset[count + 2]; + if (search.offset[count + 1] != -1) { + strsrch->setOffset(search.offset[count + 1] + 1, status); + if (strsrch->getOffset() != search.offset[count + 1] + 1) { + errln("Error setting offset\n"); + return; + } + } + + count += 2; + } + strsrch->next(status); + if (strsrch->getMatchedStart() != USEARCH_DONE) { + char *str = toCharString(strsrch->getText()); + errln("Text: %s", str); + str = toCharString(strsrch->getPattern()); + errln("Pattern: %s", str); + errln("Error match found at %d %d", + strsrch->getMatchedStart(), + strsrch->getMatchedLength()); + return; + } + } + delete strsrch; +} + +void StringSearchTest::TestGetSetAttribute() +{ + UErrorCode status = U_ZERO_ERROR; + UnicodeString pattern("pattern"); + UnicodeString text("text"); + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, + status); + if (U_FAILURE(status)) { + errln("Error opening search %s", u_errorName(status)); + return; + } + + strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_DEFAULT, status); + if (U_FAILURE(status) || + strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) { + errln("Error setting overlap to the default"); + } + strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status); + if (U_FAILURE(status) || + strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) { + errln("Error setting overlap true"); + } + strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status); + if (U_FAILURE(status) || + strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) { + errln("Error setting overlap false"); + } + strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ATTRIBUTE_VALUE_COUNT, + status); + if (U_SUCCESS(status)) { + errln("Error setting overlap to illegal value"); + } + status = U_ZERO_ERROR; + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_DEFAULT, status); + if (U_FAILURE(status) || + strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF) { + errln("Error setting canonical match to the default"); + } + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + if (U_FAILURE(status) || + strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_ON) { + errln("Error setting canonical match true"); + } + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_OFF, status); + if (U_FAILURE(status) || + strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF) { + errln("Error setting canonical match false"); + } + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, + USEARCH_ATTRIBUTE_VALUE_COUNT, status); + if (U_SUCCESS(status)) { + errln("Error setting canonical match to illegal value"); + } + status = U_ZERO_ERROR; + strsrch->setAttribute(USEARCH_ATTRIBUTE_COUNT, USEARCH_DEFAULT, status); + if (U_SUCCESS(status)) { + errln("Error setting illegal attribute success"); + } + + delete strsrch; +} + +void StringSearchTest::TestGetMatch() +{ + UChar temp[128]; + SearchData search = MATCH[0]; + u_unescape(search.text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(search.pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + + UErrorCode status = U_ZERO_ERROR; + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, + status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + if (strsrch != NULL) { + delete strsrch; + } + return; + } + + int count = 0; + UTextOffset matchindex = search.offset[count]; + UnicodeString matchtext; + while (U_SUCCESS(status) && matchindex >= 0) { + int32_t matchlength = search.size[count]; + strsrch->next(status); + if (matchindex != strsrch->getMatchedStart() || + matchlength != strsrch->getMatchedLength()) { + char *str = toCharString(strsrch->getText()); + errln("Text: %s", str); + str = toCharString(strsrch->getPattern()); + errln("Pattern: %s", str); + errln("Error match found at %d %d", strsrch->getMatchedStart(), + strsrch->getMatchedLength()); + return; + } + count ++; + + status = U_ZERO_ERROR; + strsrch->getMatchedText(matchtext); + if (matchtext.length() != matchlength || U_FAILURE(status)){ + errln("Error getting match text"); + } + matchindex = search.offset[count]; + } + status = U_ZERO_ERROR; + strsrch->next(status); + if (strsrch->getMatchedStart() != USEARCH_DONE || + strsrch->getMatchedLength() != 0) { + errln("Error end of match not found"); + } + status = U_ZERO_ERROR; + strsrch->getMatchedText(matchtext); + if (matchtext.length() != 0) { + errln("Error getting null matches"); + } + delete strsrch; +} + +void StringSearchTest::TestSetMatch() +{ + int count = 0; + while (MATCH[count].text != NULL) { + SearchData search = MATCH[count]; + UChar temp[128]; + UErrorCode status = U_ZERO_ERROR; + u_unescape(search.text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(search.pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, + NULL, status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + if (strsrch != NULL) { + delete strsrch; + } + return; + } + + int size = 0; + while (search.offset[size] != -1) { + size ++; + } + + if (strsrch->first(status) != search.offset[0] || U_FAILURE(status)) { + errln("Error getting first match"); + } + if (strsrch->last(status) != search.offset[size -1] || + U_FAILURE(status)) { + errln("Error getting last match"); + } + + int index = 0; + while (index < size) { + if (index + 2 < size) { + if (strsrch->following(search.offset[index + 2] - 1, status) + != search.offset[index + 2] || U_FAILURE(status)) { + errln("Error getting following match at index %d", + search.offset[index + 2] - 1); + } + } + if (index + 1 < size) { + if (strsrch->preceding(search.offset[index + 1] + + search.size[index + 1] + 1, + status) != search.offset[index + 1] || + U_FAILURE(status)) { + errln("Error getting preceeding match at index %d", + search.offset[index + 1] + 1); + } + } + index += 2; + } + status = U_ZERO_ERROR; + if (strsrch->following(text.length(), status) != USEARCH_DONE) { + errln("Error expecting out of bounds match"); + } + if (strsrch->preceding(0, status) != USEARCH_DONE) { + errln("Error expecting out of bounds match"); + } + count ++; + delete strsrch; + } +} + +void StringSearchTest::TestReset() +{ + UErrorCode status = U_ZERO_ERROR; + UnicodeString text("text text text"); + UnicodeString pattern("pattern"); + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, + status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + if (strsrch != NULL) { + delete strsrch; + } + return; + } + strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + strsrch->setOffset(10, status); + if (U_FAILURE(status)) { + errln("Error setting attributes and offsets"); + } + else { + strsrch->reset(); + if (strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF || + strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF || + strsrch->getOffset() != 0 || strsrch->getMatchedLength() != 0 || + strsrch->getMatchedStart() != USEARCH_DONE) { + errln("Error resetting string search"); + } + } + delete strsrch; +} + +void StringSearchTest::TestSupplementary() +{ + int count = 0; + while (SUPPLEMENTARY[count].text != NULL) { + if (!assertEqual(&SUPPLEMENTARY[count])) { + errln("Error at test number %d", count); + } + count ++; + } +} + +void StringSearchTest::TestContraction() +{ + UChar temp[128]; + UErrorCode status = U_ZERO_ERROR; + + u_unescape(CONTRACTIONRULE, temp, 128); + UnicodeString rules; + rules.setTo(temp, u_strlen(temp)); + RuleBasedCollator *collator = new RuleBasedCollator(rules, + getECollationStrength(UCOL_TERTIARY), Normalizer::DECOMP, status); + if (U_FAILURE(status)) { + errln("Error opening collator %s", u_errorName(status)); + } + UnicodeString text("text"); + UnicodeString pattern("pattern"); + StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL, + status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + } + + int count = 0; + while (CONTRACTION[count].text != NULL) { + u_unescape(CONTRACTION[count].text, temp, 128); + text.setTo(temp, u_strlen(temp)); + u_unescape(CONTRACTION[count].pattern, temp, 128); + pattern.setTo(temp, u_strlen(temp)); + strsrch->setText(text, status); + strsrch->setPattern(pattern, status); + if (!assertEqualWithStringSearch(strsrch, &CONTRACTION[count])) { + errln("Error at test number %d", count); + } + count ++; + } + delete strsrch; + delete collator; +} + +void StringSearchTest::TestIgnorable() +{ + UChar temp[128]; + u_unescape(IGNORABLERULE, temp, 128); + UnicodeString rules; + rules.setTo(temp, u_strlen(temp)); + UErrorCode status = U_ZERO_ERROR; + int count = 0; + RuleBasedCollator *collator = new RuleBasedCollator(rules, + getECollationStrength(IGNORABLE[count].strength), + Normalizer::DECOMP, status); + if (U_FAILURE(status)) { + errln("Error opening collator %s", u_errorName(status)); + return; + } + UnicodeString pattern("pattern"); + UnicodeString text("text"); + StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL, + status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + delete collator; + return; + } + + while (IGNORABLE[count].text != NULL) { + u_unescape(IGNORABLE[count].text, temp, 128); + text.setTo(temp, u_strlen(temp)); + u_unescape(IGNORABLE[count].pattern, temp, 128); + pattern.setTo(temp, u_strlen(temp)); + strsrch->setText(text, status); + strsrch->setPattern(pattern, status); + if (!assertEqualWithStringSearch(strsrch, &IGNORABLE[count])) { + errln("Error at test number %d", count); + } + count ++; + } + delete strsrch; + delete collator; +} + +void StringSearchTest::TestCanonical() +{ + int count = 0; + while (BASICCANONICAL[count].text != NULL) { + if (!assertCanonicalEqual(&BASICCANONICAL[count])) { + errln("Error at test number %d", count); + } + count ++; + } +} + +void StringSearchTest::TestNormCanonical() +{ + UErrorCode status = U_ZERO_ERROR; + m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); + int count = 0; + while (NORMCANONICAL[count].text != NULL) { + if (!assertCanonicalEqual(&NORMCANONICAL[count])) { + errln("Error at test number %d", count); + } + count ++; + } + m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); +} + +void StringSearchTest::TestStrengthCanonical() +{ + int count = 0; + while (STRENGTHCANONICAL[count].text != NULL) { + if (!assertCanonicalEqual(&STRENGTHCANONICAL[count])) { + errln("Error at test number %d", count); + } + count ++; + } +} + +void StringSearchTest::TestBreakIteratorCanonical() +{ + UErrorCode status = U_ZERO_ERROR; + int count = 0; + + while (count < 4) { + UChar temp[128]; + const SearchData *search = &(BREAKITERATORCANONICAL[count]); + + u_unescape(search->text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(search->pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + RuleBasedCollator *collator = getCollator(search->collator); + collator->setStrength(getECollationStrength(search->strength)); + + BreakIterator *breaker = getBreakIterator(search->breaker); + StringSearch *strsrch = new StringSearch(pattern, text, collator, + breaker, status); + if (U_FAILURE(status)) { + errln("Error creating string search data"); + return; + } + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + if (U_FAILURE(status) || + strsrch->getBreakIterator() != breaker) { + errln("Error setting break iterator"); + delete strsrch; + return; + } + if (!assertEqualWithStringSearch(strsrch, search)) { + collator->setStrength(getECollationStrength(UCOL_TERTIARY)); + delete strsrch; + return; + } + search = &(BREAKITERATOR[count + 1]); + breaker = getBreakIterator(search->breaker); + breaker->setText(strsrch->getText()); + strsrch->setBreakIterator(breaker, status); + if (U_FAILURE(status) || strsrch->getBreakIterator() != breaker) { + errln("Error setting break iterator"); + delete strsrch; + return; + } + strsrch->reset(); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + if (!assertEqualWithStringSearch(strsrch, search)) { + errln("Error at test number %d", count); + return; + } + delete strsrch; + count += 2; + } + count = 0; + while (BREAKITERATORCANONICAL[count].text != NULL) { + if (!assertEqual(&BREAKITERATORCANONICAL[count])) { + errln("Error at test number %d", count); + return; + } + count ++; + } +} + +void StringSearchTest::TestVariableCanonical() +{ + int count = 0; + UErrorCode status = U_ZERO_ERROR; + m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); + if (U_FAILURE(status)) { + errln("Error setting collation alternate attribute %s", + u_errorName(status)); + } + while (VARIABLE[count].text != NULL) { + logln("variable %d", count); + if (!assertCanonicalEqual(&VARIABLE[count])) { + errln("Error at test number %d", count); + } + count ++; + } + m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, + status); +} + +void StringSearchTest::TestOverlapCanonical() +{ + int count = 0; + while (OVERLAPCANONICAL[count].text != NULL) { + if (!assertEqualWithAttribute(&OVERLAPCANONICAL[count], USEARCH_ON, + USEARCH_ON)) { + errln("Error at overlap test number %d", count); + } + count ++; + } + count = 0; + while (NONOVERLAP[count].text != NULL) { + if (!assertCanonicalEqual(&NONOVERLAPCANONICAL[count])) { + errln("Error at non overlap test number %d", count); + } + count ++; + } + + count = 0; + while (count < 1) { + UChar temp[128]; + const SearchData *search = &(OVERLAPCANONICAL[count]); + UErrorCode status = U_ZERO_ERROR; + + u_unescape(search->text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(search->pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + RuleBasedCollator *collator = getCollator(search->collator); + StringSearch *strsrch = new StringSearch(pattern, text, collator, + NULL, status); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status); + if (U_FAILURE(status) || + strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) { + errln("Error setting overlap option"); + } + if (!assertEqualWithStringSearch(strsrch, search)) { + delete strsrch; + return; + } + search = &(NONOVERLAPCANONICAL[count]); + strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status); + if (U_FAILURE(status) || + strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) { + errln("Error setting overlap option"); + } + strsrch->reset(); + if (!assertEqualWithStringSearch(strsrch, search)) { + delete strsrch; + errln("Error at test number %d", count); + } + + count ++; + delete strsrch; + } +} + +void StringSearchTest::TestCollatorCanonical() +{ + /* test collator that thinks "o" and "p" are the same thing */ + UChar temp[128]; + u_unescape(COLLATORCANONICAL[0].text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(COLLATORCANONICAL[0].pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + + UErrorCode status = U_ZERO_ERROR; + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, + NULL, status); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + } + if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[0])) { + delete strsrch; + return; + } + + u_unescape(TESTCOLLATORRULE, temp, 128); + UnicodeString rules; + rules.setTo(temp, u_strlen(temp)); + RuleBasedCollator *tailored = new RuleBasedCollator(rules, + getECollationStrength(COLLATORCANONICAL[1].strength), + Normalizer::DECOMP, status); + + if (U_FAILURE(status)) { + errln("Error opening rule based collator %s", u_errorName(status)); + } + + strsrch->setCollator(tailored, status); + if (U_FAILURE(status) || *(strsrch->getCollator()) != *tailored) { + errln("Error setting rule based collator"); + } + strsrch->reset(); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[1])) { + delete strsrch; + if (tailored != NULL) { + delete tailored; + } + } + + strsrch->setCollator(m_en_us_, status); + strsrch->reset(); + if (U_FAILURE(status) || *(strsrch->getCollator()) != *m_en_us_) { + errln("Error setting rule based collator"); + } + if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[0])) { + } + delete strsrch; + if (tailored != NULL) { + delete tailored; + } +} + +void StringSearchTest::TestPatternCanonical() +{ + + UChar temp[128]; + + u_unescape(PATTERNCANONICAL[0].text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(PATTERNCANONICAL[0].pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + + m_en_us_->setStrength( + getECollationStrength(PATTERNCANONICAL[0].strength)); + + UErrorCode status = U_ZERO_ERROR; + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, + status); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + goto ENDTESTPATTERN; + } + if (pattern != strsrch->getPattern()) { + errln("Error setting pattern"); + } + if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[0])) { + goto ENDTESTPATTERN; + } + + u_unescape(PATTERNCANONICAL[1].pattern, temp, 128); + pattern.setTo(temp, u_strlen(temp)); + strsrch->setPattern(pattern, status); + if (pattern != strsrch->getPattern()) { + errln("Error setting pattern"); + goto ENDTESTPATTERN; + } + strsrch->reset(); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + if (U_FAILURE(status)) { + errln("Error setting pattern %s", u_errorName(status)); + } + if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[1])) { + goto ENDTESTPATTERN; + } + + u_unescape(PATTERNCANONICAL[0].pattern, temp, 128); + pattern.setTo(temp, u_strlen(temp)); + strsrch->setPattern(pattern, status); + if (pattern != strsrch->getPattern()) { + errln("Error setting pattern"); + goto ENDTESTPATTERN; + } + strsrch->reset(); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + if (U_FAILURE(status)) { + errln("Error setting pattern %s", u_errorName(status)); + } + if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[0])) { + goto ENDTESTPATTERN; + } +ENDTESTPATTERN: + m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); + if (strsrch != NULL) { + delete strsrch; + } +} + +void StringSearchTest::TestTextCanonical() +{ + UChar temp[128]; + u_unescape(TEXTCANONICAL[0].text, temp, 128); + UnicodeString text; + text.setTo(temp, u_strlen(temp)); + u_unescape(TEXTCANONICAL[0].pattern, temp, 128); + UnicodeString pattern; + pattern.setTo(temp, u_strlen(temp)); + + UErrorCode status = U_ZERO_ERROR; + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, + status); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + goto ENDTESTPATTERN; + } + if (text != strsrch->getText()) { + errln("Error setting text"); + } + if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[0])) { + goto ENDTESTPATTERN; + } + + u_unescape(TEXTCANONICAL[1].text, temp, 128); + text.setTo(temp, u_strlen(temp)); + strsrch->setText(text, status); + if (text != strsrch->getText()) { + errln("Error setting text"); + goto ENDTESTPATTERN; + } + if (U_FAILURE(status)) { + errln("Error setting text %s", u_errorName(status)); + } + if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[1])) { + goto ENDTESTPATTERN; + } + + u_unescape(TEXTCANONICAL[0].text, temp, 128); + text.setTo(temp, u_strlen(temp)); + strsrch->setText(text, status); + if (text != strsrch->getText()) { + errln("Error setting text"); + goto ENDTESTPATTERN; + } + if (U_FAILURE(status)) { + errln("Error setting pattern %s", u_errorName(status)); + } + if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[0])) { + goto ENDTESTPATTERN; + } +ENDTESTPATTERN: + if (strsrch != NULL) { + delete strsrch; + } +} + +void StringSearchTest::TestCompositeBoundariesCanonical() +{ + int count = 0; + while (COMPOSITEBOUNDARIESCANONICAL[count].text != NULL) { + logln("composite %d", count); + if (!assertCanonicalEqual(&COMPOSITEBOUNDARIESCANONICAL[count])) { + errln("Error at test number %d", count); + } + count ++; + } +} + +void StringSearchTest::TestGetSetOffsetCanonical() +{ + + UErrorCode status = U_ZERO_ERROR; + UnicodeString text("text"); + UnicodeString pattern("pattern"); + StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, + status); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + /* testing out of bounds error */ + strsrch->setOffset(-1, status); + if (U_SUCCESS(status)) { + errln("Error expecting set offset error"); + } + strsrch->setOffset(128, status); + if (U_SUCCESS(status)) { + errln("Error expecting set offset error"); + } + int index = 0; + UChar temp[128]; + while (BASICCANONICAL[index].text != NULL) { + SearchData search = BASICCANONICAL[index ++]; + if (BASICCANONICAL[index].text == NULL) { + /* skip the last one */ + break; + } + + u_unescape(search.text, temp, 128); + text.setTo(temp, u_strlen(temp)); + u_unescape(search.pattern, temp, 128); + pattern.setTo(temp, u_strlen(temp)); + + UErrorCode status = U_ZERO_ERROR; + strsrch->setText(text, status); + + strsrch->setPattern(pattern, status); + + int count = 0; + UTextOffset matchindex = search.offset[count]; + while (U_SUCCESS(status) && matchindex >= 0) { + int32_t matchlength = search.size[count]; + strsrch->next(status); + if (matchindex != strsrch->getMatchedStart() || + matchlength != strsrch->getMatchedLength()) { + char *str = toCharString(strsrch->getText()); + errln("Text: %s", str); + str = toCharString(strsrch->getPattern()); + errln("Pattern: %s", str); + errln("Error match found at %d %d", + strsrch->getMatchedStart(), + strsrch->getMatchedLength()); + return; + } + matchindex = search.offset[count + 1] == -1 ? -1 : + search.offset[count + 2]; + if (search.offset[count + 1] != -1) { + strsrch->setOffset(search.offset[count + 1] + 1, status); + if (strsrch->getOffset() != search.offset[count + 1] + 1) { + errln("Error setting offset"); + return; + } + } + + count += 2; + } + strsrch->next(status); + if (strsrch->getMatchedStart() != USEARCH_DONE) { + char *str = toCharString(strsrch->getText()); + errln("Text: %s", str); + str = toCharString(strsrch->getPattern()); + errln("Pattern: %s", str); + errln("Error match found at %d %d", strsrch->getMatchedStart(), + strsrch->getMatchedLength()); + return; + } + } + delete strsrch; +} + +void StringSearchTest::TestSupplementaryCanonical() +{ + int count = 0; + while (SUPPLEMENTARYCANONICAL[count].text != NULL) { + if (!assertCanonicalEqual(&SUPPLEMENTARYCANONICAL[count])) { + errln("Error at test number %d", count); + } + count ++; + } +} + +void StringSearchTest::TestContractionCanonical() +{ + UChar temp[128]; + + u_unescape(CONTRACTIONRULE, temp, 128); + UnicodeString rules; + rules.setTo(temp, u_strlen(temp)); + + UErrorCode status = U_ZERO_ERROR; + RuleBasedCollator *collator = new RuleBasedCollator(rules, + getECollationStrength(UCOL_TERTIARY), Normalizer::DECOMP, status); + if (U_FAILURE(status)) { + errln("Error opening collator %s", u_errorName(status)); + } + UnicodeString text("text"); + UnicodeString pattern("pattern"); + StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL, + status); + strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + } + + int count = 0; + while (CONTRACTIONCANONICAL[count].text != NULL) { + u_unescape(CONTRACTIONCANONICAL[count].text, temp, 128); + text.setTo(temp, u_strlen(temp)); + u_unescape(CONTRACTIONCANONICAL[count].pattern, temp, 128); + pattern.setTo(temp, u_strlen(temp)); + strsrch->setText(text, status); + strsrch->setPattern(pattern, status); + if (!assertEqualWithStringSearch(strsrch, + &CONTRACTIONCANONICAL[count])) { + errln("Error at test number %d", count); + } + count ++; + } + delete strsrch; + delete collator; +} + + + diff --git a/icu4c/source/test/intltest/srchtest.h b/icu4c/source/test/intltest/srchtest.h new file mode 100644 index 0000000000..ad4f742913 --- /dev/null +++ b/icu4c/source/test/intltest/srchtest.h @@ -0,0 +1,83 @@ +/**************************************************************************** + * COPYRIGHT: + * Copyright (c) 2001, International Business Machines Corporation and others + * All Rights Reserved. + ***************************************************************************/ + +#ifndef _STRSRCH_H +#define _STRSRCH_H + +#include "unicode/utypes.h" +#include "unicode/tblcoll.h" +#include "unicode/brkiter.h" +#include "intltest.h" +#include "unicode/usearch.h" + +struct SearchData; +typedef struct SearchData SearchData; + +class StringSearchTest: public IntlTest +{ +public: + StringSearchTest(); + virtual ~StringSearchTest(); + + void runIndexedTest(int32_t index, UBool exec, const char* &name, + char* par = NULL); + +private: + RuleBasedCollator *m_en_us_; + RuleBasedCollator *m_fr_fr_; + RuleBasedCollator *m_de_; + RuleBasedCollator *m_es_; + BreakIterator *m_en_wordbreaker_; + BreakIterator *m_en_characterbreaker_; + + RuleBasedCollator * getCollator(const char *collator); + BreakIterator * getBreakIterator(const char *breaker); + char * toCharString(const UnicodeString &text); + Collator::ECollationStrength getECollationStrength( + const UCollationStrength &strength) const; + UBool assertEqualWithStringSearch( StringSearch *strsrch, + const SearchData *search); + UBool assertEqual(const SearchData *search); + UBool assertCanonicalEqual(const SearchData *search); + UBool assertEqualWithAttribute(const SearchData *search, + USearchAttributeValue canonical, + USearchAttributeValue overlap); + void TestOpenClose(); + void TestInitialization(); + void TestBasic(); + void TestNormExact(); + void TestStrength(); + void TestBreakIterator(); + void TestVariable(); + void TestOverlap(); + void TestCollator(); + void TestPattern(); + void TestText(); + void TestCompositeBoundaries(); + void TestGetSetOffset(); + void TestGetSetAttribute(); + void TestGetMatch(); + void TestSetMatch(); + void TestReset(); + void TestSupplementary(); + void TestContraction(); + void TestIgnorable(); + void TestCanonical(); + void TestNormCanonical(); + void TestStrengthCanonical(); + void TestBreakIteratorCanonical(); + void TestVariableCanonical(); + void TestOverlapCanonical(); + void TestCollatorCanonical(); + void TestPatternCanonical(); + void TestTextCanonical(); + void TestCompositeBoundariesCanonical(); + void TestGetSetOffsetCanonical(); + void TestSupplementaryCanonical(); + void TestContractionCanonical(); +}; + +#endif diff --git a/icu4c/source/test/intltest/tscoll.cpp b/icu4c/source/test/intltest/tscoll.cpp index f3ed4df3ca..5fbb66169f 100644 --- a/icu4c/source/test/intltest/tscoll.cpp +++ b/icu4c/source/test/intltest/tscoll.cpp @@ -37,6 +37,7 @@ #include "tstnorm.h" #include "normconf.h" #include "thcoll.h" +#include "srchtest.h" #include "lcukocol.h" @@ -273,7 +274,7 @@ void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &n } break; - case 18: //all + case 18: //all name = "LotusCollationTest"; name = "LotusCollationKoreanTest"; @@ -284,6 +285,16 @@ void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &n } break; + case 19: + name = "StringSearchTest"; + if (exec) { + logln("StringSearchTest---"); + + StringSearchTest test; + callTest( test, par ); + } + break; + default: name = ""; break; } }