2000-01-15 02:00:06 +00:00
|
|
|
/********************************************************************
|
2001-04-18 19:31:05 +00:00
|
|
|
* COPYRIGHT:
|
2001-03-21 19:46:49 +00:00
|
|
|
* Copyright (c) 1997-2001, International Business Machines Corporation and
|
2000-01-15 02:00:06 +00:00
|
|
|
* others. All Rights Reserved.
|
|
|
|
********************************************************************/
|
|
|
|
/********************************************************************************
|
1999-08-16 21:50:52 +00:00
|
|
|
*
|
|
|
|
* File CITERTST.C
|
|
|
|
*
|
|
|
|
* Modification History:
|
2001-04-18 19:31:05 +00:00
|
|
|
* Date Name Description
|
2001-02-23 23:41:16 +00:00
|
|
|
* Madhu Katragadda Ported for C API
|
|
|
|
* 02/19/01 synwee Modified test case for new collation iterator
|
2000-08-14 23:35:57 +00:00
|
|
|
*********************************************************************************/
|
1999-08-16 21:50:52 +00:00
|
|
|
/*
|
|
|
|
* Collation Iterator tests.
|
|
|
|
* (Let me reiterate my position...)
|
|
|
|
*/
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/ucol.h"
|
|
|
|
#include "unicode/uloc.h"
|
2001-02-23 23:41:16 +00:00
|
|
|
#include "unicode/uchar.h"
|
1999-12-28 23:57:50 +00:00
|
|
|
#include "unicode/ustring.h"
|
2001-02-23 23:41:16 +00:00
|
|
|
#include "cmemory.h"
|
2001-04-03 00:32:05 +00:00
|
|
|
#include "cintltst.h"
|
|
|
|
#include "citertst.h"
|
|
|
|
#include "ccolltst.h"
|
2001-05-14 18:58:36 +00:00
|
|
|
#include "filestrm.h"
|
|
|
|
#include "cstring.h"
|
|
|
|
#include "ucol_imp.h"
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
void addCollIterTest(TestNode** root)
|
|
|
|
{
|
|
|
|
addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
|
|
|
|
addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
|
|
|
|
addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
|
2001-02-24 02:21:31 +00:00
|
|
|
addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
|
2001-02-23 23:41:16 +00:00
|
|
|
addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
|
2001-04-18 19:31:05 +00:00
|
|
|
addTest(root, &TestNormalizedUnicodeChar,
|
2001-04-17 17:27:37 +00:00
|
|
|
"tscoll/citertst/TestNormalizedUnicodeChar");
|
2001-04-26 01:14:28 +00:00
|
|
|
addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
|
2001-04-07 00:20:32 +00:00
|
|
|
addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
|
2001-04-28 00:25:47 +00:00
|
|
|
addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
|
2001-04-17 17:27:37 +00:00
|
|
|
addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
|
2001-05-14 18:58:36 +00:00
|
|
|
addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
|
|
|
|
addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
|
2001-05-17 21:35:14 +00:00
|
|
|
addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2001-04-17 21:53:30 +00:00
|
|
|
/* The locales we support */
|
2001-04-07 00:20:32 +00:00
|
|
|
|
2001-05-04 00:02:24 +00:00
|
|
|
static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
|
2001-04-07 00:20:32 +00:00
|
|
|
|
|
|
|
static void TestBug672() {
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
UChar pattern[20];
|
|
|
|
UChar text[50];
|
|
|
|
int i;
|
|
|
|
int result[3][3];
|
|
|
|
|
|
|
|
u_uastrcpy(pattern, "resume");
|
|
|
|
u_uastrcpy(text, "Time to resume updating my resume.");
|
|
|
|
|
|
|
|
for (i = 0; i < 3; ++ i) {
|
|
|
|
UCollator *coll = ucol_open(LOCALES[i], &status);
|
|
|
|
UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
|
|
|
|
&status);
|
|
|
|
UCollationElements *titer = ucol_openElements(coll, text, -1,
|
|
|
|
&status);
|
|
|
|
if (U_FAILURE(status)) {
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
|
2001-04-07 00:20:32 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-04-07 00:20:32 +00:00
|
|
|
log_verbose("locale tested %s\n", LOCALES[i]);
|
|
|
|
|
2001-04-18 19:31:05 +00:00
|
|
|
while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
|
2001-04-07 00:20:32 +00:00
|
|
|
U_SUCCESS(status)) {
|
|
|
|
}
|
|
|
|
if (U_FAILURE(status)) {
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: reversing collation iterator :%s\n",
|
2001-04-07 00:20:32 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
ucol_reset(pitr);
|
|
|
|
|
|
|
|
ucol_setOffset(titer, u_strlen(pattern), &status);
|
|
|
|
if (U_FAILURE(status)) {
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: setting offset in collator :%s\n",
|
2001-04-07 00:20:32 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
result[i][0] = ucol_getOffset(titer);
|
|
|
|
log_verbose("Text iterator set to offset %d\n", result[i][0]);
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-04-07 00:20:32 +00:00
|
|
|
/* Use previous() */
|
|
|
|
ucol_previous(titer, &status);
|
|
|
|
result[i][1] = ucol_getOffset(titer);
|
|
|
|
log_verbose("Current offset %d after previous\n", result[i][1]);
|
|
|
|
|
|
|
|
/* Add one to index */
|
|
|
|
log_verbose("Adding one to current offset...\n");
|
|
|
|
ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
|
|
|
|
if (U_FAILURE(status)) {
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: setting offset in collator :%s\n",
|
2001-04-07 00:20:32 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
result[i][2] = ucol_getOffset(titer);
|
|
|
|
log_verbose("Current offset in text = %d\n", result[i][2]);
|
2001-05-01 17:14:49 +00:00
|
|
|
ucol_closeElements(pitr);
|
|
|
|
ucol_closeElements(titer);
|
2001-04-07 00:20:32 +00:00
|
|
|
ucol_close(coll);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (uprv_memcmp(result[0], result[1], 3) != 0 ||
|
|
|
|
uprv_memcmp(result[1], result[2], 3) != 0) {
|
|
|
|
log_err("ERROR: Different locales have different offsets at the same character\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2001-04-28 00:25:47 +00:00
|
|
|
|
2001-04-30 20:51:22 +00:00
|
|
|
/* Running this test with normalization enabled showed up a bug in the incremental
|
|
|
|
normalization code. */
|
2001-04-28 00:25:47 +00:00
|
|
|
static void TestBug672Normalize() {
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
UChar pattern[20];
|
|
|
|
UChar text[50];
|
|
|
|
int i;
|
|
|
|
int result[3][3];
|
|
|
|
|
|
|
|
u_uastrcpy(pattern, "resume");
|
|
|
|
u_uastrcpy(text, "Time to resume updating my resume.");
|
|
|
|
|
|
|
|
for (i = 0; i < 3; ++ i) {
|
|
|
|
UCollator *coll = ucol_open(LOCALES[i], &status);
|
|
|
|
UCollationElements *pitr = NULL;
|
|
|
|
UCollationElements *titer = NULL;
|
|
|
|
|
|
|
|
ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
|
|
|
|
|
|
|
pitr = ucol_openElements(coll, pattern, -1, &status);
|
|
|
|
titer = ucol_openElements(coll, text, -1, &status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
log_verbose("locale tested %s\n", LOCALES[i]);
|
|
|
|
|
|
|
|
while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
|
|
|
|
U_SUCCESS(status)) {
|
|
|
|
}
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("ERROR: reversing collation iterator :%s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
ucol_reset(pitr);
|
|
|
|
|
|
|
|
ucol_setOffset(titer, u_strlen(pattern), &status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("ERROR: setting offset in collator :%s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
result[i][0] = ucol_getOffset(titer);
|
|
|
|
log_verbose("Text iterator set to offset %d\n", result[i][0]);
|
|
|
|
|
|
|
|
/* Use previous() */
|
|
|
|
ucol_previous(titer, &status);
|
|
|
|
result[i][1] = ucol_getOffset(titer);
|
|
|
|
log_verbose("Current offset %d after previous\n", result[i][1]);
|
|
|
|
|
|
|
|
/* Add one to index */
|
|
|
|
log_verbose("Adding one to current offset...\n");
|
|
|
|
ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("ERROR: setting offset in collator :%s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
result[i][2] = ucol_getOffset(titer);
|
|
|
|
log_verbose("Current offset in text = %d\n", result[i][2]);
|
2001-05-01 17:14:49 +00:00
|
|
|
ucol_closeElements(pitr);
|
|
|
|
ucol_closeElements(titer);
|
2001-04-28 00:25:47 +00:00
|
|
|
ucol_close(coll);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (uprv_memcmp(result[0], result[1], 3) != 0 ||
|
|
|
|
uprv_memcmp(result[1], result[2], 3) != 0) {
|
|
|
|
log_err("ERROR: Different locales have different offsets at the same character\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2001-02-23 23:41:16 +00:00
|
|
|
/**
|
|
|
|
* Test for CollationElementIterator previous and next for the whole set of
|
|
|
|
* unicode characters.
|
|
|
|
*/
|
|
|
|
static void TestUnicodeChar()
|
|
|
|
{
|
2001-03-16 18:21:49 +00:00
|
|
|
UChar source[0x100];
|
2001-02-23 23:41:16 +00:00
|
|
|
UCollator *en_us;
|
|
|
|
UCollationElements *iter;
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
UChar codepoint;
|
|
|
|
|
|
|
|
UChar *test;
|
|
|
|
en_us = ucol_open("en_US", &status);
|
2001-04-26 01:14:28 +00:00
|
|
|
if (U_FAILURE(status)){
|
|
|
|
log_err("ERROR: in creation of collation data using ucol_open()\n %s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-02-23 23:41:16 +00:00
|
|
|
for (codepoint = 1; codepoint < 0xFFFE;)
|
|
|
|
{
|
|
|
|
test = source;
|
|
|
|
|
2001-04-18 19:31:05 +00:00
|
|
|
while (codepoint % 0xFF != 0)
|
2001-02-23 23:41:16 +00:00
|
|
|
{
|
|
|
|
if (u_isdefined(codepoint))
|
|
|
|
*(test ++) = codepoint;
|
|
|
|
codepoint ++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (u_isdefined(codepoint))
|
|
|
|
*(test ++) = codepoint;
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-02-23 23:41:16 +00:00
|
|
|
if (codepoint != 0xFFFF)
|
|
|
|
codepoint ++;
|
|
|
|
|
2001-04-18 19:31:05 +00:00
|
|
|
*test = 0;
|
2001-02-23 23:41:16 +00:00
|
|
|
iter=ucol_openElements(en_us, source, u_strlen(source), &status);
|
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
2001-02-23 23:41:16 +00:00
|
|
|
myErrorName(status));
|
2001-04-26 01:14:28 +00:00
|
|
|
ucol_close(en_us);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* A basic test to see if it's working at all */
|
2001-05-17 01:07:10 +00:00
|
|
|
log_verbose("codepoint testing %x\n", codepoint);
|
2001-04-26 01:14:28 +00:00
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
|
|
|
|
/* null termination test */
|
|
|
|
iter=ucol_openElements(en_us, source, -1, &status);
|
|
|
|
if(U_FAILURE(status)){
|
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
ucol_close(en_us);
|
2001-02-23 23:41:16 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* A basic test to see if it's working at all */
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
}
|
|
|
|
|
|
|
|
ucol_close(en_us);
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2001-04-17 17:27:37 +00:00
|
|
|
/**
|
|
|
|
* Test for CollationElementIterator previous and next for the whole set of
|
|
|
|
* unicode characters with normalization on.
|
|
|
|
*/
|
|
|
|
static void TestNormalizedUnicodeChar()
|
|
|
|
{
|
|
|
|
UChar source[0x100];
|
|
|
|
UCollator *th_th;
|
|
|
|
UCollationElements *iter;
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
UChar codepoint;
|
|
|
|
|
|
|
|
UChar *test;
|
|
|
|
/* thai should have normalization on */
|
|
|
|
th_th = ucol_open("th_TH", &status);
|
2001-04-26 01:14:28 +00:00
|
|
|
if (U_FAILURE(status)){
|
|
|
|
log_err("ERROR: in creation of thai collation using ucol_open()\n %s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
2001-05-14 18:58:36 +00:00
|
|
|
|
2001-04-17 17:27:37 +00:00
|
|
|
for (codepoint = 1; codepoint < 0xFFFE;)
|
|
|
|
{
|
|
|
|
test = source;
|
|
|
|
|
2001-04-18 19:31:05 +00:00
|
|
|
while (codepoint % 0xFF != 0)
|
2001-04-17 17:27:37 +00:00
|
|
|
{
|
|
|
|
if (u_isdefined(codepoint))
|
|
|
|
*(test ++) = codepoint;
|
|
|
|
codepoint ++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (u_isdefined(codepoint))
|
|
|
|
*(test ++) = codepoint;
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-04-17 17:27:37 +00:00
|
|
|
if (codepoint != 0xFFFF)
|
|
|
|
codepoint ++;
|
|
|
|
|
2001-04-18 19:31:05 +00:00
|
|
|
*test = 0;
|
2001-04-17 17:27:37 +00:00
|
|
|
iter=ucol_openElements(th_th, source, u_strlen(source), &status);
|
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
2001-04-17 17:27:37 +00:00
|
|
|
myErrorName(status));
|
|
|
|
ucol_close(th_th);
|
|
|
|
return;
|
|
|
|
}
|
2001-05-14 18:58:36 +00:00
|
|
|
|
2001-04-17 17:27:37 +00:00
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
2001-04-26 01:14:28 +00:00
|
|
|
|
|
|
|
iter=ucol_openElements(th_th, source, -1, &status);
|
|
|
|
if(U_FAILURE(status)){
|
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
ucol_close(th_th);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
2001-04-17 17:27:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ucol_close(th_th);
|
|
|
|
}
|
|
|
|
|
2001-04-26 01:14:28 +00:00
|
|
|
/**
|
|
|
|
* Test the incremental normalization
|
|
|
|
*/
|
|
|
|
static void TestNormalization()
|
|
|
|
{
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
const char *str =
|
|
|
|
"&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
|
|
|
|
UCollator *coll;
|
|
|
|
UChar rule[50];
|
|
|
|
int rulelen = u_unescape(str, rule, 50);
|
|
|
|
int count = 0;
|
|
|
|
const char *testdata[] =
|
2001-05-14 18:58:36 +00:00
|
|
|
{"\\u1ED9", "o\\u0323\\u0302",
|
|
|
|
"\\u0300\\u0315", "\\u0315\\u0300",
|
2001-04-26 01:14:28 +00:00
|
|
|
"A\\u0300\\u0315B", "A\\u0315\\u0300B",
|
|
|
|
"A\\u0316\\u0315B", "A\\u0315\\u0316B",
|
|
|
|
"\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
|
|
|
|
"A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
|
|
|
|
"\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
|
2001-05-21 23:41:44 +00:00
|
|
|
int32_t srclen;
|
2001-05-14 18:58:36 +00:00
|
|
|
UChar source[10];
|
|
|
|
UCollationElements *iter;
|
2001-05-01 17:14:49 +00:00
|
|
|
|
2001-04-26 01:14:28 +00:00
|
|
|
coll = ucol_openRules(rule, rulelen, UNORM_NFD, UCOL_TERTIARY, &status);
|
|
|
|
ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
|
|
|
|
if (U_FAILURE(status)){
|
|
|
|
log_err("ERROR: in creation of collator using ucol_openRules()\n %s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2001-05-14 18:58:36 +00:00
|
|
|
srclen = u_unescape(testdata[0], source, 10);
|
|
|
|
iter = ucol_openElements(coll, source, srclen - 1, &status);
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
|
|
|
|
srclen = u_unescape(testdata[1], source, 10);
|
|
|
|
iter = ucol_openElements(coll, source, srclen - 1, &status);
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
|
2001-04-26 01:14:28 +00:00
|
|
|
while (count < 12) {
|
|
|
|
srclen = u_unescape(testdata[count], source, 10);
|
|
|
|
iter = ucol_openElements(coll, source, srclen - 1, &status);
|
|
|
|
|
|
|
|
if (U_FAILURE(status)){
|
|
|
|
log_err("ERROR: in creation of collator element iterator\n %s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
|
|
|
|
iter = ucol_openElements(coll, source, -1, &status);
|
|
|
|
|
|
|
|
if (U_FAILURE(status)){
|
|
|
|
log_err("ERROR: in creation of collator element iterator\n %s\n",
|
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
count ++;
|
|
|
|
}
|
|
|
|
ucol_close(coll);
|
|
|
|
}
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
/**
|
|
|
|
* Test for CollationElementIterator.previous()
|
|
|
|
*
|
|
|
|
* @bug 4108758 - Make sure it works with contracting characters
|
2001-04-18 19:31:05 +00:00
|
|
|
*
|
1999-08-16 21:50:52 +00:00
|
|
|
*/
|
2000-11-21 04:05:39 +00:00
|
|
|
static void TestPrevious()
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2001-03-09 18:16:37 +00:00
|
|
|
UCollator *coll=NULL;
|
1999-08-16 21:50:52 +00:00
|
|
|
UChar rule[50];
|
|
|
|
UChar *source;
|
|
|
|
UCollator *c1, *c2, *c3;
|
|
|
|
UCollationElements *iter;
|
1999-10-07 00:07:53 +00:00
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
2001-04-18 19:31:05 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
test1=(UChar*)malloc(sizeof(UChar) * 50);
|
|
|
|
test2=(UChar*)malloc(sizeof(UChar) * 50);
|
|
|
|
u_uastrcpy(test1, "What subset of all possible test cases?");
|
|
|
|
u_uastrcpy(test2, "has the highest probability of detecting");
|
2001-04-12 00:07:00 +00:00
|
|
|
coll = ucol_open("en_US", &status);
|
2000-08-28 21:43:03 +00:00
|
|
|
|
2001-03-09 18:16:37 +00:00
|
|
|
iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
|
2001-05-01 17:14:49 +00:00
|
|
|
log_verbose("English locale testing back and forth\n");
|
1999-10-18 22:48:32 +00:00
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
2001-03-09 18:16:37 +00:00
|
|
|
ucol_close(coll);
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* A basic test to see if it's working at all */
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
2001-03-09 18:16:37 +00:00
|
|
|
ucol_close(coll);
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
/* Test with a contracting character sequence */
|
2001-03-03 04:09:09 +00:00
|
|
|
u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
|
1999-08-16 21:50:52 +00:00
|
|
|
c1 = ucol_openRules(rule, u_strlen(rule), UCOL_NO_NORMALIZATION, UCOL_DEFAULT_STRENGTH, &status);
|
2001-02-26 20:02:00 +00:00
|
|
|
|
2001-05-01 17:14:49 +00:00
|
|
|
log_verbose("Contraction rule testing back and forth with no normalization\n");
|
|
|
|
|
1999-10-18 22:48:32 +00:00
|
|
|
if (c1 == NULL || U_FAILURE(status))
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
source=(UChar*)malloc(sizeof(UChar) * 20);
|
|
|
|
u_uastrcpy(source, "abchdcba");
|
|
|
|
iter=ucol_openElements(c1, source, u_strlen(source), &status);
|
1999-10-18 22:48:32 +00:00
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
ucol_close(c1);
|
2001-04-26 01:14:28 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
/* Test with an expanding character sequence */
|
2001-03-03 04:09:09 +00:00
|
|
|
u_uastrcpy(rule, "&a < b < c/abd < d");
|
1999-08-16 21:50:52 +00:00
|
|
|
c2 = ucol_openRules(rule, u_strlen(rule), UCOL_NO_NORMALIZATION, UCOL_DEFAULT_STRENGTH, &status);
|
2001-05-01 17:14:49 +00:00
|
|
|
log_verbose("Expansion rule testing back and forth with no normalization\n");
|
1999-10-18 22:48:32 +00:00
|
|
|
if (c2 == NULL || U_FAILURE(status))
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
u_uastrcpy(source, "abcd");
|
|
|
|
iter=ucol_openElements(c2, source, u_strlen(source), &status);
|
1999-10-18 22:48:32 +00:00
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
ucol_close(c2);
|
|
|
|
/* Now try both */
|
2001-03-03 04:09:09 +00:00
|
|
|
u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
|
1999-08-16 21:50:52 +00:00
|
|
|
c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT_NORMALIZATION, UCOL_DEFAULT_STRENGTH, &status);
|
2001-05-01 17:14:49 +00:00
|
|
|
log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
|
|
|
|
|
1999-10-18 22:48:32 +00:00
|
|
|
if (c3 == NULL || U_FAILURE(status))
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
u_uastrcpy(source, "abcdbchdc");
|
|
|
|
iter=ucol_openElements(c3, source, u_strlen(source), &status);
|
1999-10-18 22:48:32 +00:00
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
ucol_close(c3);
|
2001-03-09 18:16:37 +00:00
|
|
|
source[0] = 0x0e41;
|
|
|
|
source[1] = 0x0e02;
|
|
|
|
source[2] = 0x0e41;
|
|
|
|
source[3] = 0x0e02;
|
|
|
|
source[4] = 0x0e27;
|
|
|
|
source[5] = 0x61;
|
|
|
|
source[6] = 0x62;
|
|
|
|
source[7] = 0x63;
|
|
|
|
source[8] = 0;
|
|
|
|
|
|
|
|
coll = ucol_open("th_TH", &status);
|
2001-05-01 17:14:49 +00:00
|
|
|
log_verbose("Thai locale testing back and forth with normalization\n");
|
2001-03-09 18:16:37 +00:00
|
|
|
iter=ucol_openElements(coll, source, u_strlen(source), &status);
|
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
2001-03-09 18:16:37 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
ucol_close(coll);
|
|
|
|
|
2001-03-09 23:10:06 +00:00
|
|
|
/* prev test */
|
|
|
|
source[0] = 0x0061;
|
|
|
|
source[1] = 0x30CF;
|
|
|
|
source[2] = 0x3099;
|
|
|
|
source[3] = 0x30FC;
|
|
|
|
source[4] = 0;
|
|
|
|
|
|
|
|
coll = ucol_open("ja_JP", &status);
|
2001-05-01 17:14:49 +00:00
|
|
|
log_verbose("Japanese locale testing back and forth with normalization\n");
|
2001-03-09 23:10:06 +00:00
|
|
|
iter=ucol_openElements(coll, source, u_strlen(source), &status);
|
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
2001-03-09 23:10:06 +00:00
|
|
|
myErrorName(status));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
backAndForth(iter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
ucol_close(coll);
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
free(source);
|
|
|
|
free(test1);
|
|
|
|
free(test2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Test for getOffset() and setOffset()
|
|
|
|
*/
|
2000-11-21 04:05:39 +00:00
|
|
|
static void TestOffset()
|
2001-04-18 19:31:05 +00:00
|
|
|
{
|
1999-10-07 00:07:53 +00:00
|
|
|
UErrorCode status= U_ZERO_ERROR;
|
2001-02-28 22:23:00 +00:00
|
|
|
UCollator *en_us=NULL;
|
1999-08-16 21:50:52 +00:00
|
|
|
UCollationElements *iter, *pristine;
|
|
|
|
int32_t offset;
|
|
|
|
int32_t *orders;
|
|
|
|
int32_t orderLength=0;
|
|
|
|
test1=(UChar*)malloc(sizeof(UChar) * 50);
|
|
|
|
test2=(UChar*)malloc(sizeof(UChar) * 50);
|
|
|
|
u_uastrcpy(test1, "What subset of all possible test cases?");
|
|
|
|
u_uastrcpy(test2, "has the highest probability of detecting");
|
|
|
|
en_us = ucol_open("en_US", &status);
|
|
|
|
log_verbose("Testing getOffset and setOffset for CollationElements\n");
|
|
|
|
iter=ucol_openElements(en_us, test1, u_strlen(test1), &status);
|
1999-10-18 22:48:32 +00:00
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
2001-02-28 22:23:00 +00:00
|
|
|
ucol_close(en_us);
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* Run all the way through the iterator, then get the offset */
|
|
|
|
|
|
|
|
orders = getOrders(iter, &orderLength);
|
|
|
|
|
|
|
|
offset = ucol_getOffset(iter);
|
|
|
|
|
|
|
|
if (offset != u_strlen(test1))
|
|
|
|
{
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("offset at end != length %d vs %d\n", offset,
|
1999-08-16 21:50:52 +00:00
|
|
|
u_strlen(test1) );
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now set the offset back to the beginning and see if it works */
|
|
|
|
pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
|
1999-10-18 22:48:32 +00:00
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
2001-02-28 22:23:00 +00:00
|
|
|
ucol_close(en_us);
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|
1999-10-07 00:07:53 +00:00
|
|
|
status = U_ZERO_ERROR;
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
ucol_setOffset(iter, 0, &status);
|
1999-10-18 22:48:32 +00:00
|
|
|
if (U_FAILURE(status))
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
log_err("setOffset failed. %s\n", myErrorName(status));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
assertEqual(iter, pristine);
|
|
|
|
}
|
2000-08-28 21:43:03 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
ucol_closeElements(pristine);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
free(orders);
|
|
|
|
ucol_close(en_us);
|
|
|
|
free(test1);
|
|
|
|
free(test2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Test for setText()
|
|
|
|
*/
|
2000-11-21 04:05:39 +00:00
|
|
|
static void TestSetText()
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
int32_t c,i;
|
1999-10-07 00:07:53 +00:00
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
2001-02-28 22:23:00 +00:00
|
|
|
UCollator *en_us=NULL;
|
1999-08-16 21:50:52 +00:00
|
|
|
UCollationElements *iter1, *iter2;
|
|
|
|
test1=(UChar*)malloc(sizeof(UChar) * 50);
|
|
|
|
test2=(UChar*)malloc(sizeof(UChar) * 50);
|
|
|
|
u_uastrcpy(test1, "What subset of all possible test cases?");
|
|
|
|
u_uastrcpy(test2, "has the highest probability of detecting");
|
|
|
|
en_us = ucol_open("en_US", &status);
|
|
|
|
log_verbose("testing setText for Collation elements\n");
|
|
|
|
iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
|
1999-10-18 22:48:32 +00:00
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
2001-02-28 22:23:00 +00:00
|
|
|
ucol_close(en_us);
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
|
1999-10-18 22:48:32 +00:00
|
|
|
if(U_FAILURE(status)){
|
2001-04-18 19:31:05 +00:00
|
|
|
log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
|
1999-08-16 21:50:52 +00:00
|
|
|
myErrorName(status));
|
2001-02-28 22:23:00 +00:00
|
|
|
ucol_close(en_us);
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|
2001-04-18 19:31:05 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
/* Run through the second iterator just to exercise it */
|
|
|
|
c = ucol_next(iter2, &status);
|
|
|
|
i = 0;
|
|
|
|
|
2001-02-23 23:41:16 +00:00
|
|
|
while ( ++i < 10 && (c != UCOL_NULLORDER))
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
1999-10-18 22:48:32 +00:00
|
|
|
if (U_FAILURE(status))
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
log_err("iter2->next() returned an error. %s\n", myErrorName(status));
|
|
|
|
ucol_closeElements(iter2);
|
|
|
|
ucol_closeElements(iter1);
|
2001-02-28 22:23:00 +00:00
|
|
|
ucol_close(en_us);
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
c = ucol_next(iter2, &status);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now set it to point to the same string as the first iterator */
|
|
|
|
ucol_setText(iter2, test1, u_strlen(test1), &status);
|
1999-10-18 22:48:32 +00:00
|
|
|
if (U_FAILURE(status))
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
assertEqual(iter1, iter2);
|
|
|
|
}
|
2001-04-18 19:31:05 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
ucol_closeElements(iter2);
|
|
|
|
ucol_closeElements(iter1);
|
2000-11-21 04:05:39 +00:00
|
|
|
ucol_close(en_us);
|
1999-08-16 21:50:52 +00:00
|
|
|
free(test1);
|
|
|
|
free(test2);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2000-11-21 04:05:39 +00:00
|
|
|
static void backAndForth(UCollationElements *iter)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
/* Run through the iterator forwards and stick it into an array */
|
|
|
|
int32_t index, o;
|
1999-10-07 00:07:53 +00:00
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
1999-08-16 21:50:52 +00:00
|
|
|
int32_t orderLength = 0;
|
|
|
|
int32_t *orders;
|
|
|
|
orders= getOrders(iter, &orderLength);
|
2001-04-18 19:31:05 +00:00
|
|
|
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
/* Now go through it backwards and make sure we get the same values */
|
|
|
|
index = orderLength;
|
2001-02-23 23:41:16 +00:00
|
|
|
ucol_reset(iter);
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-02-23 23:41:16 +00:00
|
|
|
/* synwee : changed */
|
1999-08-16 21:50:52 +00:00
|
|
|
while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
|
|
|
|
{
|
2001-02-23 23:41:16 +00:00
|
|
|
if (o != orders[-- index])
|
|
|
|
{
|
|
|
|
if (o == 0)
|
|
|
|
index ++;
|
|
|
|
else
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2001-02-23 23:41:16 +00:00
|
|
|
while (index > 0 && orders[-- index] == 0)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
if (o != orders[index])
|
|
|
|
{
|
2001-04-26 01:14:28 +00:00
|
|
|
log_err("Mismatch at index : 0x%x\n", index);
|
2001-04-17 17:27:37 +00:00
|
|
|
return;
|
2001-02-23 23:41:16 +00:00
|
|
|
}
|
2001-05-14 18:58:36 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
2001-02-23 23:41:16 +00:00
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2001-03-09 23:10:06 +00:00
|
|
|
while (index != 0 && orders[index - 1] == 0) {
|
|
|
|
index --;
|
2001-03-09 18:16:37 +00:00
|
|
|
}
|
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
if (index != 0)
|
|
|
|
{
|
|
|
|
log_err("Didn't get back to beginning - index is %d\n", index);
|
|
|
|
|
|
|
|
ucol_reset(iter);
|
|
|
|
log_err("\nnext: ");
|
2001-04-30 20:51:22 +00:00
|
|
|
if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2001-04-26 01:14:28 +00:00
|
|
|
log_err("Error at %x\n", o);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
log_err("\nprev: ");
|
2001-04-30 20:51:22 +00:00
|
|
|
if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2001-04-26 01:14:28 +00:00
|
|
|
log_err("Error at %x\n", o);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
log_verbose("\n");
|
|
|
|
}
|
|
|
|
|
2001-04-18 19:31:05 +00:00
|
|
|
free(orders);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
2001-03-09 18:16:37 +00:00
|
|
|
|
2000-06-22 19:04:32 +00:00
|
|
|
/** @bug 4108762
|
|
|
|
* Test for getMaxExpansion()
|
|
|
|
*/
|
2000-11-21 04:05:39 +00:00
|
|
|
static void TestMaxExpansion()
|
2000-06-22 19:04:32 +00:00
|
|
|
{
|
2001-04-18 19:31:05 +00:00
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
2001-03-27 00:34:10 +00:00
|
|
|
UCollator *coll ;/*= ucol_open("en_US", &status);*/
|
|
|
|
UChar ch = 0;
|
|
|
|
UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-03-27 00:34:10 +00:00
|
|
|
UChar rule[256];
|
|
|
|
u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
|
2001-04-18 19:31:05 +00:00
|
|
|
coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT_NORMALIZATION,
|
2001-03-27 00:34:10 +00:00
|
|
|
UCOL_DEFAULT_STRENGTH, &status);
|
|
|
|
iter = ucol_openElements(coll, &ch, 1, &status);
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-03-27 00:34:10 +00:00
|
|
|
while (ch < 0xFFFF && U_SUCCESS(status)) {
|
|
|
|
int count = 1;
|
|
|
|
uint32_t order;
|
|
|
|
ch++;
|
|
|
|
ucol_setText(iter, &ch, 1, &status);
|
|
|
|
order = ucol_previous(iter, &status);
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-03-27 00:34:10 +00:00
|
|
|
/* thai management */
|
|
|
|
if (order == 0)
|
|
|
|
order = ucol_previous(iter, &status);
|
2001-04-18 19:31:05 +00:00
|
|
|
|
|
|
|
while (U_SUCCESS(status) &&
|
2001-03-27 00:34:10 +00:00
|
|
|
ucol_previous(iter, &status) != UCOL_NULLORDER) {
|
2001-04-18 19:31:05 +00:00
|
|
|
count ++;
|
2001-03-27 00:34:10 +00:00
|
|
|
}
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-03-27 00:34:10 +00:00
|
|
|
if (U_FAILURE(status) && ucol_getMaxExpansion(iter, order) < count) {
|
|
|
|
log_err("Failure at codepoint %d, maximum expansion count < %d\n",
|
|
|
|
ch, count);
|
|
|
|
}
|
2000-06-22 19:04:32 +00:00
|
|
|
}
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-03-27 00:34:10 +00:00
|
|
|
ucol_closeElements(iter);
|
|
|
|
ucol_close(coll);
|
2000-06-22 19:04:32 +00:00
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Return an integer array containing all of the collation orders
|
|
|
|
* returned by calls to next on the specified iterator
|
|
|
|
*/
|
2000-11-21 04:05:39 +00:00
|
|
|
static int32_t* getOrders(UCollationElements *iter, int32_t *orderLength)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
UErrorCode status;
|
|
|
|
int32_t order;
|
|
|
|
int32_t maxSize = 100;
|
|
|
|
int32_t size = 0;
|
|
|
|
int32_t *temp;
|
|
|
|
int32_t *orders =(int32_t*)malloc(sizeof(int32_t) * maxSize);
|
1999-10-07 00:07:53 +00:00
|
|
|
status= U_ZERO_ERROR;
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-08-28 21:43:03 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
|
|
|
|
{
|
|
|
|
if (size == maxSize)
|
|
|
|
{
|
|
|
|
maxSize *= 2;
|
|
|
|
temp = (int32_t*)malloc(sizeof(int32_t) * maxSize);
|
|
|
|
|
|
|
|
memcpy(temp, orders, size * sizeof(int32_t));
|
|
|
|
free(orders);
|
|
|
|
orders = temp;
|
2001-04-18 19:31:05 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
2000-08-28 21:43:03 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
orders[size++] = order;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (maxSize > size)
|
|
|
|
{
|
2001-04-12 00:07:00 +00:00
|
|
|
if (size == 0) {
|
|
|
|
size = 1;
|
|
|
|
temp = (int32_t*)malloc(sizeof(int32_t) * size);
|
|
|
|
temp[0] = 0;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
temp = (int32_t*)malloc(sizeof(int32_t) * size);
|
|
|
|
memcpy(temp, orders, size * sizeof(int32_t));
|
|
|
|
}
|
2000-08-28 21:43:03 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
free(orders);
|
|
|
|
orders = temp;
|
|
|
|
}
|
2001-03-03 04:09:09 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
*orderLength = size;
|
|
|
|
return orders;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2000-11-21 04:05:39 +00:00
|
|
|
static void assertEqual(UCollationElements *i1, UCollationElements *i2)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
int32_t c1, c2;
|
|
|
|
int32_t count = 0;
|
1999-10-07 00:07:53 +00:00
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
c1 = ucol_next(i1, &status);
|
|
|
|
c2 = ucol_next(i2, &status);
|
2000-08-28 21:43:03 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
if (c1 != c2)
|
|
|
|
{
|
|
|
|
log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
count += 1;
|
|
|
|
}
|
|
|
|
while (c1 != UCOL_NULLORDER);
|
|
|
|
}
|
|
|
|
|
2001-04-18 19:31:05 +00:00
|
|
|
/**
|
2001-04-17 17:27:37 +00:00
|
|
|
* Testing iterators with extremely small buffers
|
|
|
|
*/
|
|
|
|
static void TestSmallBuffer()
|
|
|
|
{
|
2001-04-18 19:31:05 +00:00
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
2001-04-17 17:27:37 +00:00
|
|
|
UCollator *coll;
|
2001-04-19 21:49:37 +00:00
|
|
|
UCollationElements *testiter,
|
|
|
|
*iter;
|
2001-04-25 21:11:53 +00:00
|
|
|
int32_t count = 0;
|
|
|
|
int32_t *testorders,
|
2001-04-19 21:49:37 +00:00
|
|
|
*orders;
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-04-19 21:49:37 +00:00
|
|
|
UChar teststr[500];
|
|
|
|
UChar str[] = {0x300, 0x31A, 0};
|
2001-04-18 19:31:05 +00:00
|
|
|
/*
|
2001-04-17 17:27:37 +00:00
|
|
|
creating a long string of decomposable characters,
|
|
|
|
since by default the writable buffer is of size 256
|
|
|
|
*/
|
|
|
|
while (count < 500) {
|
|
|
|
if ((count & 1) == 0) {
|
2001-04-19 21:49:37 +00:00
|
|
|
teststr[count ++] = 0x300;
|
2001-04-17 17:27:37 +00:00
|
|
|
}
|
|
|
|
else {
|
2001-04-19 21:49:37 +00:00
|
|
|
teststr[count ++] = 0x31A;
|
2001-04-17 17:27:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
coll = ucol_open("th_TH", &status);
|
2001-04-19 21:49:37 +00:00
|
|
|
testiter = ucol_openElements(coll, teststr, 500, &status);
|
|
|
|
iter = ucol_openElements(coll, str, 2, &status);
|
|
|
|
|
|
|
|
orders = getOrders(iter, &count);
|
|
|
|
if (count != 2) {
|
|
|
|
log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
|
|
|
|
}
|
2001-04-17 17:27:37 +00:00
|
|
|
|
2001-04-18 19:31:05 +00:00
|
|
|
/*
|
2001-04-17 17:27:37 +00:00
|
|
|
this will rearrange the string data to 250 characters of 0x300 first then
|
|
|
|
250 characters of 0x031A
|
|
|
|
*/
|
2001-04-19 21:49:37 +00:00
|
|
|
testorders = getOrders(testiter, &count);
|
2001-04-17 17:27:37 +00:00
|
|
|
|
|
|
|
if (count != 500) {
|
|
|
|
log_err("Error decomposition does not give the right sized collation elements\n");
|
|
|
|
}
|
2001-03-09 18:16:37 +00:00
|
|
|
|
2001-04-17 17:27:37 +00:00
|
|
|
while (count != 0) {
|
|
|
|
/* UCA collation element for 0x0F76 */
|
2001-04-19 21:49:37 +00:00
|
|
|
if ((count > 250 && testorders[-- count] != orders[1]) ||
|
|
|
|
(count <= 250 && testorders[-- count] != orders[0])) {
|
2001-04-17 17:27:37 +00:00
|
|
|
log_err("Error decomposition does not give the right collation element at %d count\n", count);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2001-04-18 19:31:05 +00:00
|
|
|
|
2001-04-19 21:49:37 +00:00
|
|
|
free(testorders);
|
2001-04-17 17:27:37 +00:00
|
|
|
free(orders);
|
2001-04-19 21:49:37 +00:00
|
|
|
ucol_closeElements(testiter);
|
2001-04-17 17:27:37 +00:00
|
|
|
ucol_closeElements(iter);
|
|
|
|
ucol_close(coll);
|
|
|
|
}
|
2001-05-14 18:58:36 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Sniplets of code from genuca
|
|
|
|
*/
|
|
|
|
static int32_t hex2num(char hex) {
|
|
|
|
if(hex>='0' && hex <='9') {
|
|
|
|
return hex-'0';
|
|
|
|
} else if(hex>='a' && hex<='f') {
|
|
|
|
return hex-'a'+10;
|
|
|
|
} else if(hex>='A' && hex<='F') {
|
|
|
|
return hex-'A'+10;
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Getting codepoints from a string
|
|
|
|
* @param str character string contain codepoints seperated by space and ended
|
|
|
|
* by a semicolon
|
|
|
|
* @param codepoints array for storage, assuming size > 5
|
|
|
|
* @return position at the end of the codepoint section
|
|
|
|
*/
|
|
|
|
static char * getCodePoints(char *str, UChar *codepoints) {
|
|
|
|
char *pStartCP = str;
|
|
|
|
char *pEndCP = str + 4;
|
|
|
|
|
|
|
|
*codepoints = (UChar)((hex2num(*pStartCP) << 12) |
|
|
|
|
(hex2num(*(pStartCP + 1)) << 8) |
|
|
|
|
(hex2num(*(pStartCP + 2)) << 4) |
|
|
|
|
(hex2num(*(pStartCP + 3))));
|
|
|
|
codepoints ++;
|
|
|
|
while (*pEndCP != ';') {
|
|
|
|
pStartCP = pEndCP + 1;
|
|
|
|
*codepoints = (UChar)((hex2num(*pStartCP) << 12) |
|
|
|
|
(hex2num(*(pStartCP + 1)) << 8) |
|
|
|
|
(hex2num(*(pStartCP + 2)) << 4) |
|
|
|
|
(hex2num(*(pStartCP + 3))));
|
|
|
|
codepoints ++;
|
|
|
|
pEndCP = pStartCP + 4;
|
|
|
|
}
|
|
|
|
*codepoints = 0;
|
|
|
|
return pEndCP + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sniplets of code from genuca
|
|
|
|
*/
|
2001-05-21 23:41:44 +00:00
|
|
|
static int32_t
|
|
|
|
readElement(char **from, char *to, char separator, UErrorCode *status)
|
|
|
|
{
|
2001-05-14 18:58:36 +00:00
|
|
|
if (U_SUCCESS(*status)) {
|
|
|
|
char buffer[1024];
|
|
|
|
int32_t i = 0;
|
|
|
|
while (**from != separator) {
|
|
|
|
if (**from != ' ') {
|
|
|
|
*(buffer+i++) = **from;
|
|
|
|
}
|
|
|
|
(*from)++;
|
|
|
|
}
|
|
|
|
(*from)++;
|
|
|
|
*(buffer + i) = 0;
|
|
|
|
strcpy(to, buffer);
|
|
|
|
return i/2;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sniplets of code from genuca
|
|
|
|
*/
|
2001-05-21 23:41:44 +00:00
|
|
|
static uint32_t
|
|
|
|
getSingleCEValue(char *primary, char *secondary, char *tertiary,
|
|
|
|
UErrorCode *status)
|
|
|
|
{
|
2001-05-14 18:58:36 +00:00
|
|
|
if (U_SUCCESS(*status)) {
|
|
|
|
uint32_t value = 0;
|
|
|
|
char primsave = '\0';
|
|
|
|
char secsave = '\0';
|
|
|
|
char tersave = '\0';
|
|
|
|
char *primend = primary+4;
|
|
|
|
char *secend = secondary+2;
|
|
|
|
char *terend = tertiary+2;
|
|
|
|
uint32_t primvalue;
|
|
|
|
uint32_t secvalue;
|
|
|
|
uint32_t tervalue;
|
|
|
|
|
2001-05-21 23:41:44 +00:00
|
|
|
if (uprv_strlen(primary) > 4) {
|
2001-05-14 18:58:36 +00:00
|
|
|
primsave = *primend;
|
|
|
|
*primend = '\0';
|
|
|
|
}
|
|
|
|
|
2001-05-21 23:41:44 +00:00
|
|
|
if (uprv_strlen(secondary) > 2) {
|
2001-05-14 18:58:36 +00:00
|
|
|
secsave = *secend;
|
|
|
|
*secend = '\0';
|
|
|
|
}
|
|
|
|
|
2001-05-21 23:41:44 +00:00
|
|
|
if (uprv_strlen(tertiary) > 2) {
|
2001-05-14 18:58:36 +00:00
|
|
|
tersave = *terend;
|
|
|
|
*terend = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
primvalue = (*primary!='\0')?strtoul(primary, &primend, 16):0;
|
|
|
|
secvalue = (*secondary!='\0')?strtoul(secondary, &secend, 16):0;
|
|
|
|
tervalue = (*tertiary!='\0')?strtoul(tertiary, &terend, 16):0;
|
|
|
|
if(primvalue <= 0xFF) {
|
|
|
|
primvalue <<= 8;
|
|
|
|
}
|
|
|
|
|
|
|
|
value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
|
|
|
|
| ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
|
|
|
|
| (tervalue & UCOL_TERTIARYORDERMASK);
|
|
|
|
|
|
|
|
if(primsave!='\0') {
|
|
|
|
*primend = primsave;
|
|
|
|
}
|
|
|
|
if(secsave!='\0') {
|
|
|
|
*secend = secsave;
|
|
|
|
}
|
|
|
|
if(tersave!='\0') {
|
|
|
|
*terend = tersave;
|
|
|
|
}
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Getting collation elements generated from a string
|
|
|
|
* @param str character string contain collation elements contained in [] and
|
|
|
|
* seperated by space
|
|
|
|
* @param ce array for storage, assuming size > 20
|
|
|
|
* @param status error status
|
|
|
|
* @return position at the end of the codepoint section
|
|
|
|
*/
|
|
|
|
static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
|
|
|
|
char *pStartCP = uprv_strchr(str, '[');
|
|
|
|
int count = 0;
|
|
|
|
char *pEndCP;
|
|
|
|
char primary[100];
|
|
|
|
char secondary[100];
|
|
|
|
char tertiary[100];
|
|
|
|
|
|
|
|
while (*pStartCP == '[') {
|
|
|
|
uint32_t primarycount = 0;
|
|
|
|
uint32_t secondarycount = 0;
|
|
|
|
uint32_t tertiarycount = 0;
|
|
|
|
uint32_t CEi = 1;
|
|
|
|
pEndCP = strchr(pStartCP, ']');
|
|
|
|
if(pEndCP == NULL) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
pStartCP ++;
|
|
|
|
|
|
|
|
primarycount = readElement(&pStartCP, primary, ',', status);
|
|
|
|
secondarycount = readElement(&pStartCP, secondary, ',', status);
|
|
|
|
tertiarycount = readElement(&pStartCP, tertiary, ']', status);
|
|
|
|
|
|
|
|
/* I want to get the CEs entered right here, including continuation */
|
|
|
|
ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
|
|
|
|
if (U_FAILURE(*status)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (2 * CEi < primarycount || CEi < secondarycount ||
|
|
|
|
CEi < tertiarycount) {
|
|
|
|
uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
|
|
|
|
if (2 * CEi < primarycount) {
|
|
|
|
value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
|
|
|
|
value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (2 * CEi + 1 < primarycount) {
|
|
|
|
value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
|
|
|
|
value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (CEi < secondarycount) {
|
|
|
|
value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
|
|
|
|
value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (CEi < tertiarycount) {
|
|
|
|
value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
|
|
|
|
value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
|
|
|
|
}
|
|
|
|
|
|
|
|
CEi ++;
|
|
|
|
ces[count ++] = value;
|
|
|
|
}
|
|
|
|
|
|
|
|
pStartCP = pEndCP + 1;
|
|
|
|
}
|
|
|
|
ces[count] = 0;
|
|
|
|
return pStartCP;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Testing the CEs returned by the iterator
|
|
|
|
*/
|
|
|
|
static void TestCEs() {
|
2001-05-14 20:53:58 +00:00
|
|
|
char dir[150];
|
2001-05-14 18:58:36 +00:00
|
|
|
FileStream *file = NULL;
|
|
|
|
char line[300];
|
|
|
|
char *pDir = dir;
|
|
|
|
char *str;
|
|
|
|
UChar codepoints[5];
|
|
|
|
uint32_t ces[20];
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
UCollator *coll = ucol_open("", &status);
|
|
|
|
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("Error in opening root collator\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2001-05-14 20:53:58 +00:00
|
|
|
uprv_strcpy(pDir, getenv("ICU_DATA"));
|
2001-05-14 18:58:36 +00:00
|
|
|
pDir += uprv_strlen(pDir);
|
2001-05-17 01:07:10 +00:00
|
|
|
if (*(pDir - 1) != U_FILE_SEP_CHAR) {
|
2001-05-15 21:51:52 +00:00
|
|
|
*pDir = U_FILE_SEP_CHAR;
|
|
|
|
pDir ++;
|
2001-05-14 18:58:36 +00:00
|
|
|
}
|
|
|
|
|
2001-05-14 20:53:58 +00:00
|
|
|
/* dirty : because some platforms might not return the full path */
|
2001-05-14 18:58:36 +00:00
|
|
|
#ifdef XP_MAC
|
2001-05-14 20:53:58 +00:00
|
|
|
uprv_strcpy(pDir, "..:..:data:unidata:FractionalUCA.txt");
|
2001-05-14 18:58:36 +00:00
|
|
|
#elif defined(WIN32) || defined(OS2)
|
2001-05-14 20:53:58 +00:00
|
|
|
uprv_strcpy(pDir, "..\\..\\data\\unidata\\FractionalUCA.txt");
|
2001-05-14 18:58:36 +00:00
|
|
|
#else
|
2001-05-14 20:53:58 +00:00
|
|
|
uprv_strcpy(pDir, "../../data/unidata/FractionalUCA.txt");
|
2001-05-14 18:58:36 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
file = T_FileStream_open(dir, "r");
|
|
|
|
|
|
|
|
if (file == NULL) {
|
|
|
|
log_err("*** unable to open input FractionalUCA.txt file ***\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
|
|
|
|
int count = 0;
|
|
|
|
UCollationElements *iter;
|
|
|
|
/* skip this line if it is empty or a comment or is a return value
|
|
|
|
or start of some variable section */
|
|
|
|
if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
|
|
|
|
line[0] == '[') {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
str = getCodePoints(line, codepoints);
|
|
|
|
getCEs(str, ces, &status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("Error in parsing collation elements in FractionalUCA.txt\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
iter = ucol_openElements(coll, codepoints, -1, &status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("Error in opening collation elements\n");
|
|
|
|
break;
|
|
|
|
}
|
2001-05-21 23:41:44 +00:00
|
|
|
for (;;) {
|
2001-05-14 18:58:36 +00:00
|
|
|
uint32_t ce = (uint32_t)ucol_next(iter, &status);
|
|
|
|
if (ce == 0xFFFFFFFF) {
|
|
|
|
ce = 0;
|
|
|
|
}
|
|
|
|
if (ce != ces[count] || U_FAILURE(status)) {
|
|
|
|
log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (ces[count] == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
count ++;
|
|
|
|
}
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
}
|
|
|
|
|
|
|
|
T_FileStream_close(file);
|
|
|
|
ucol_close(coll);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Testing the discontigous contractions
|
|
|
|
*/
|
|
|
|
static void TestDiscontiguos() {
|
|
|
|
const char *rulestr =
|
|
|
|
"&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
|
|
|
|
UChar rule[50];
|
|
|
|
int rulelen = u_unescape(rulestr, rule, 50);
|
|
|
|
const char *src[] = {
|
|
|
|
"ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
|
|
|
|
/* base character blocked */
|
|
|
|
"XD\\u0300", "XD\\u0300\\u0315",
|
|
|
|
/* non blocking combining character */
|
|
|
|
"X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
|
|
|
|
/* blocking combining character */
|
|
|
|
"X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
|
|
|
|
/* contraction prefix */
|
|
|
|
"ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
|
|
|
|
"X\\u0300\\u031A\\u0315",
|
|
|
|
/* ends not with a contraction character */
|
|
|
|
"X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
|
|
|
|
"X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
|
|
|
|
};
|
|
|
|
const char *tgt[] = {
|
|
|
|
/* non blocking combining character */
|
|
|
|
"A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
|
|
|
|
/* base character blocked */
|
|
|
|
"X D \\u0300", "X D \\u0300\\u0315",
|
|
|
|
/* non blocking combining character */
|
|
|
|
"X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
|
|
|
|
/* blocking combining character */
|
|
|
|
"X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
|
|
|
|
/* contraction prefix */
|
|
|
|
"AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
|
|
|
|
"X\\u0300 \\u031A \\u0315",
|
|
|
|
/* ends not with a contraction character */
|
|
|
|
"X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
|
|
|
|
"X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
|
|
|
|
};
|
|
|
|
int size = 20;
|
|
|
|
UCollator *coll;
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
int count = 0;
|
|
|
|
UCollationElements *iter;
|
|
|
|
UCollationElements *resultiter;
|
|
|
|
|
2001-05-21 23:41:44 +00:00
|
|
|
coll = ucol_openRules(rule, rulelen, UCOL_NO_NORMALIZATION,
|
2001-05-14 18:58:36 +00:00
|
|
|
UCOL_DEFAULT_STRENGTH, &status);
|
|
|
|
iter = ucol_openElements(coll, rule, 1, &status);
|
|
|
|
resultiter = ucol_openElements(coll, rule, 1, &status);
|
|
|
|
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("Error opening collation rules\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (count < size) {
|
|
|
|
UChar str[20];
|
|
|
|
UChar tstr[20];
|
2001-05-21 23:41:44 +00:00
|
|
|
int strLen = u_unescape(src[count], str, 20);
|
2001-05-14 18:58:36 +00:00
|
|
|
UChar *s;
|
|
|
|
|
2001-05-21 23:41:44 +00:00
|
|
|
ucol_setText(iter, str, strLen, &status);
|
2001-05-14 18:58:36 +00:00
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("Error opening collation iterator\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
u_unescape(tgt[count], tstr, 20);
|
|
|
|
s = tstr;
|
2001-05-17 01:07:10 +00:00
|
|
|
|
|
|
|
log_verbose("count %d\n", count);
|
2001-05-14 18:58:36 +00:00
|
|
|
|
2001-05-21 23:41:44 +00:00
|
|
|
for (;;) {
|
2001-05-14 18:58:36 +00:00
|
|
|
uint32_t ce;
|
|
|
|
UChar *e = u_strchr(s, ' ');
|
|
|
|
if (e == 0) {
|
|
|
|
e = u_strchr(s, 0);
|
|
|
|
}
|
|
|
|
ucol_setText(resultiter, s, e - s, &status);
|
|
|
|
ce = ucol_next(resultiter, &status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("Error manipulating collation iterator\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
while (ce != UCOL_NULLORDER) {
|
|
|
|
if (ce != (uint32_t)ucol_next(iter, &status) ||
|
|
|
|
U_FAILURE(status)) {
|
|
|
|
log_err("Discontiguos contraction test mismatch\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
ce = ucol_next(resultiter, &status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("Error getting next collation element\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s = e + 1;
|
|
|
|
if (*e == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ucol_reset(iter);
|
|
|
|
backAndForth(iter);
|
|
|
|
count ++;
|
|
|
|
}
|
|
|
|
ucol_closeElements(resultiter);
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
ucol_close(coll);
|
|
|
|
}
|
|
|
|
|
2001-05-17 21:35:14 +00:00
|
|
|
static void TestCEBufferOverflow()
|
|
|
|
{
|
|
|
|
UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
UChar rule[10];
|
|
|
|
UCollator *coll;
|
|
|
|
UCollationElements *iter;
|
|
|
|
|
|
|
|
u_uastrcpy(rule, "&z < AB");
|
|
|
|
coll = ucol_openRules(rule, u_strlen(rule), UCOL_NO_NORMALIZATION,
|
|
|
|
UCOL_DEFAULT_STRENGTH, &status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
|
|
log_err("Rule based collator not created for testing ce buffer overflow\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 0xE0E0 is a private character hence deemed unsafe by the heuristic
|
|
|
|
test. this will cause an overflow in getPrev */
|
|
|
|
str[0] = 'A';
|
|
|
|
uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
|
|
|
|
str[UCOL_EXPAND_CE_BUFFER_SIZE] = 'B';
|
|
|
|
iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
|
|
|
|
&status);
|
|
|
|
if (ucol_previous(iter, &status) != UCOL_NULLORDER ||
|
|
|
|
status != U_BUFFER_OVERFLOW_ERROR) {
|
|
|
|
log_err("CE buffer expected to overflow with long string of private characters\n");
|
|
|
|
}
|
|
|
|
ucol_closeElements(iter);
|
|
|
|
ucol_close(coll);
|
|
|
|
}
|