2005-05-22 04:58:36 +00:00
|
|
|
/********************************************************************
|
|
|
|
* COPYRIGHT:
|
|
|
|
* Copyright (c) 2005, International Business Machines Corporation and
|
|
|
|
* others. All Rights Reserved.
|
|
|
|
********************************************************************/
|
|
|
|
/************************************************************************
|
|
|
|
* Tests for the UText and UTextIterator text abstraction classses
|
|
|
|
*
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <unicode/utext.h>
|
2005-05-31 03:43:53 +00:00
|
|
|
#include <unicode/utf8.h>
|
|
|
|
#include <unicode/ustring.h>
|
2005-05-22 04:58:36 +00:00
|
|
|
#include "utxttest.h"
|
|
|
|
|
|
|
|
UBool gFailed = FALSE;
|
|
|
|
#define TEST_ASSERT(x) \
|
|
|
|
{if ((x)==FALSE) {errln("Test failure in file %s at line %d\n", __FILE__, __LINE__);\
|
|
|
|
gFailed = TRUE;\
|
|
|
|
}}
|
|
|
|
|
2005-05-31 03:43:53 +00:00
|
|
|
|
|
|
|
#define TEST_SUCCESS(status) \
|
|
|
|
{if (U_FAILURE(status)) {errln("Test failure in file %s at line %d. Error = \"%s\"\n", \
|
|
|
|
__FILE__, __LINE__, u_errorName(status)); \
|
|
|
|
gFailed = TRUE;\
|
|
|
|
}}
|
|
|
|
|
2005-05-22 04:58:36 +00:00
|
|
|
UTextTest::UTextTest() {
|
|
|
|
}
|
|
|
|
|
|
|
|
UTextTest::~UTextTest() {
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
UTextTest::runIndexedTest(int32_t index, UBool exec,
|
|
|
|
const char* &name, char* /*par*/) {
|
|
|
|
switch (index) {
|
|
|
|
case 0: name = "TextTest";
|
|
|
|
if(exec) TextTest(); break;
|
|
|
|
default: name = ""; break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void UTextTest::TextTest() {
|
|
|
|
TestString("abcd\\U00010001xyz");
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// mapping between native indexes and code points.
|
|
|
|
// native indexes could be utf-8, utf-16, utf32, or some code page.
|
|
|
|
// The general purpose UText test funciton takes an array of these as
|
|
|
|
// expected contents of the text being accessed.
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
|
|
void UTextTest::TestString(const UnicodeString &s) {
|
|
|
|
int i;
|
|
|
|
int j;
|
|
|
|
UChar32 c;
|
|
|
|
int cpCount = 0;
|
2005-05-31 03:43:53 +00:00
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
2005-05-22 04:58:36 +00:00
|
|
|
|
|
|
|
UnicodeString sa = s.unescape();
|
|
|
|
|
|
|
|
//
|
|
|
|
// Build up the mapping between code points and UTF-16 code unit indexes.
|
|
|
|
//
|
2005-06-09 16:33:04 +00:00
|
|
|
m *cpMap = new m[sa.length() + 1];
|
2005-05-22 04:58:36 +00:00
|
|
|
j = 0;
|
|
|
|
for (i=0; i<sa.length(); i=sa.moveIndex32(i, 1)) {
|
|
|
|
c = sa.char32At(i);
|
|
|
|
cpMap[j].nativeIdx = i;
|
|
|
|
cpMap[j].cp = c;
|
|
|
|
j++;
|
|
|
|
cpCount++;
|
|
|
|
}
|
2005-05-31 03:43:53 +00:00
|
|
|
cpMap[j].nativeIdx = i; // position following the last char in utf-16 string.
|
2005-05-22 04:58:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
// UChar * test, null term
|
|
|
|
|
|
|
|
// UChar * test, with length
|
|
|
|
|
|
|
|
// const UChar * test, null term
|
|
|
|
|
|
|
|
|
|
|
|
// const UChar * test, length
|
|
|
|
|
|
|
|
// UnicodeString test
|
2005-05-31 03:43:53 +00:00
|
|
|
UText *ut;
|
|
|
|
ut = utext_openUnicodeString(NULL, &sa, &status);
|
|
|
|
TEST_SUCCESS(status);
|
|
|
|
TestAccess(sa, ut, cpCount, cpMap);
|
|
|
|
utext_close(ut);
|
2005-05-22 04:58:36 +00:00
|
|
|
|
2005-05-31 03:43:53 +00:00
|
|
|
//
|
2005-05-22 04:58:36 +00:00
|
|
|
// UTF-8 test
|
2005-05-31 03:43:53 +00:00
|
|
|
//
|
|
|
|
|
|
|
|
// Convert the test string from UnicodeString to (char *) in utf-8 format
|
|
|
|
int u8Len = sa.extract(0, sa.length(), NULL, 0, "utf-8");
|
|
|
|
char *u8String = new char[u8Len + 1];
|
|
|
|
sa.extract(0, sa.length(), u8String, u8Len+1, "utf-8");
|
|
|
|
|
|
|
|
// Build up the map of code point indices in the utf-8 string
|
|
|
|
m * u8Map = new m[sa.length() + 1];
|
|
|
|
i = 0; // native utf-8 index
|
|
|
|
for (j=0; j<cpCount ; j++) { // code point number
|
|
|
|
u8Map[j].nativeIdx = i;
|
|
|
|
U8_NEXT(u8String, i, u8Len, c)
|
|
|
|
u8Map[j].cp = c;
|
|
|
|
}
|
|
|
|
u8Map[cpCount].nativeIdx = u8Len; // position following the last char in utf-8 string.
|
|
|
|
|
|
|
|
// Do the test itself
|
|
|
|
status = U_ZERO_ERROR;
|
|
|
|
ut = utext_openUTF8(NULL, (uint8_t *)u8String, -1, &status);
|
|
|
|
TEST_SUCCESS(status);
|
|
|
|
TestAccess(sa, ut, cpCount, u8Map);
|
|
|
|
utext_close(ut);
|
2005-05-22 04:58:36 +00:00
|
|
|
|
|
|
|
// UTF-32 test
|
|
|
|
|
|
|
|
// Code Page test
|
|
|
|
|
|
|
|
// Replaceable test
|
|
|
|
|
2005-06-09 16:33:04 +00:00
|
|
|
delete []cpMap;
|
|
|
|
delete []u8Map;
|
|
|
|
delete []u8String;
|
2005-05-22 04:58:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-05-31 03:43:53 +00:00
|
|
|
void UTextTest::TestAccess(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Check the length from the UText
|
|
|
|
//
|
|
|
|
int expectedLen = cpMap[cpCount].nativeIdx;
|
|
|
|
int utlen = ut->length(ut);
|
|
|
|
TEST_ASSERT(expectedLen == utlen);
|
|
|
|
|
2005-05-22 04:58:36 +00:00
|
|
|
//
|
|
|
|
// Iterate forwards, verify that we get the correct code points
|
|
|
|
// at the correct native offsets.
|
|
|
|
//
|
|
|
|
int i = 0;
|
|
|
|
int index;
|
2005-05-31 18:27:39 +00:00
|
|
|
int expectedIndex = 0;
|
|
|
|
int foundIndex = 0;
|
2005-05-22 04:58:36 +00:00
|
|
|
UChar32 expectedC;
|
|
|
|
UChar32 foundC;
|
|
|
|
int32_t len;
|
|
|
|
|
|
|
|
for (i=0; i<cpCount; i++) {
|
|
|
|
expectedIndex = cpMap[i].nativeIdx;
|
2005-06-13 05:12:32 +00:00
|
|
|
foundIndex = utext_getIndex(ut);
|
2005-05-22 04:58:36 +00:00
|
|
|
TEST_ASSERT(expectedIndex == foundIndex);
|
|
|
|
expectedC = cpMap[i].cp;
|
2005-06-13 05:12:32 +00:00
|
|
|
foundC = utext_next32(ut);
|
2005-05-22 04:58:36 +00:00
|
|
|
TEST_ASSERT(expectedC == foundC);
|
|
|
|
if (gFailed) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2005-06-13 05:12:32 +00:00
|
|
|
foundC = utext_next32(ut);
|
2005-05-22 04:58:36 +00:00
|
|
|
TEST_ASSERT(foundC == U_SENTINEL);
|
|
|
|
|
2005-06-13 05:12:32 +00:00
|
|
|
// Repeat above, using macros
|
|
|
|
utext_setIndex(ut, 0);
|
|
|
|
for (i=0; i<cpCount; i++) {
|
|
|
|
expectedIndex = cpMap[i].nativeIdx;
|
|
|
|
foundIndex = utext_getIndex(ut);
|
|
|
|
TEST_ASSERT(expectedIndex == foundIndex);
|
|
|
|
expectedC = cpMap[i].cp;
|
|
|
|
foundC = UTEXT_NEXT32(ut);
|
|
|
|
TEST_ASSERT(expectedC == foundC);
|
|
|
|
if (gFailed) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
foundC = utext_next32(ut);
|
|
|
|
TEST_ASSERT(foundC == U_SENTINEL);
|
|
|
|
|
2005-05-22 04:58:36 +00:00
|
|
|
//
|
|
|
|
// Forward iteration (above) should have left index at the
|
|
|
|
// end of the input, which should == length().
|
|
|
|
//
|
2005-06-13 05:12:32 +00:00
|
|
|
len = utext_length(ut);
|
|
|
|
foundIndex = utext_getIndex(ut);
|
2005-05-22 04:58:36 +00:00
|
|
|
TEST_ASSERT(len == foundIndex);
|
|
|
|
|
|
|
|
//
|
|
|
|
// Iterate backwards over entire test string
|
|
|
|
//
|
2005-06-13 05:12:32 +00:00
|
|
|
len = utext_getIndex(ut);
|
|
|
|
utext_setIndex(ut, len);
|
2005-05-22 04:58:36 +00:00
|
|
|
for (i=cpCount-1; i>=0; i--) {
|
|
|
|
expectedC = cpMap[i].cp;
|
|
|
|
expectedIndex = cpMap[i].nativeIdx;
|
2005-06-13 05:12:32 +00:00
|
|
|
foundC = utext_previous32(ut);
|
|
|
|
foundIndex = utext_getIndex(ut);
|
2005-05-22 04:58:36 +00:00
|
|
|
TEST_ASSERT(expectedIndex == foundIndex);
|
|
|
|
TEST_ASSERT(expectedC == foundC);
|
|
|
|
if (gFailed) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Backwards iteration, above, should have left our iterator
|
|
|
|
// position at zero, and continued backwards iterationshould fail.
|
|
|
|
//
|
2005-06-13 05:12:32 +00:00
|
|
|
foundIndex = utext_getIndex(ut);
|
2005-05-22 04:58:36 +00:00
|
|
|
TEST_ASSERT(foundIndex == 0);
|
|
|
|
|
2005-06-13 05:12:32 +00:00
|
|
|
foundC = utext_previous32(ut);
|
2005-05-22 04:58:36 +00:00
|
|
|
TEST_ASSERT(foundC == U_SENTINEL);
|
2005-06-13 05:12:32 +00:00
|
|
|
foundIndex = utext_getIndex(ut);
|
2005-05-22 04:58:36 +00:00
|
|
|
TEST_ASSERT(foundIndex == 0);
|
|
|
|
if (gFailed) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
2005-05-31 03:43:53 +00:00
|
|
|
// next32From(), prevous32From(), Iterate in a somewhat random order.
|
2005-05-22 04:58:36 +00:00
|
|
|
//
|
|
|
|
int cpIndex = 0;
|
|
|
|
for (i=0; i<cpCount; i++) {
|
|
|
|
cpIndex = (cpIndex + 9973) % cpCount;
|
|
|
|
index = cpMap[cpIndex].nativeIdx;
|
|
|
|
expectedC = cpMap[cpIndex].cp;
|
2005-06-13 05:12:32 +00:00
|
|
|
foundC = utext_next32From(ut, index);
|
2005-05-22 04:58:36 +00:00
|
|
|
TEST_ASSERT(expectedC == foundC);
|
|
|
|
TEST_ASSERT(expectedIndex == foundIndex);
|
|
|
|
if (gFailed) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cpIndex = 0;
|
|
|
|
for (i=0; i<cpCount; i++) {
|
|
|
|
cpIndex = (cpIndex + 9973) % cpCount;
|
2005-05-31 03:43:53 +00:00
|
|
|
index = cpMap[cpIndex+1].nativeIdx;
|
2005-05-22 04:58:36 +00:00
|
|
|
expectedC = cpMap[cpIndex].cp;
|
2005-06-13 05:12:32 +00:00
|
|
|
foundC = utext_previous32From(ut, index);
|
2005-05-22 04:58:36 +00:00
|
|
|
TEST_ASSERT(expectedC == foundC);
|
|
|
|
TEST_ASSERT(expectedIndex == foundIndex);
|
|
|
|
if (gFailed) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-05-23 05:31:17 +00:00
|
|
|
//
|
|
|
|
// moveIndex(int32_t delta);
|
|
|
|
//
|
2005-05-31 03:43:53 +00:00
|
|
|
|
|
|
|
// Walk through frontwards, incrementing by one
|
2005-06-13 05:12:32 +00:00
|
|
|
utext_setIndex(ut, 0);
|
2005-05-31 03:43:53 +00:00
|
|
|
for (i=1; i<=cpCount; i++) {
|
2005-06-13 05:12:32 +00:00
|
|
|
utext_moveIndex(ut, 1);
|
|
|
|
index = utext_getIndex(ut);
|
2005-05-31 03:43:53 +00:00
|
|
|
expectedIndex = cpMap[i].nativeIdx;
|
|
|
|
TEST_ASSERT(expectedIndex == index);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Walk through frontwards, incrementing by two
|
2005-06-13 05:12:32 +00:00
|
|
|
utext_setIndex(ut, 0);
|
2005-05-23 05:31:17 +00:00
|
|
|
for (i=2; i<cpCount; i+=2) {
|
2005-06-13 05:12:32 +00:00
|
|
|
utext_moveIndex(ut, 2);
|
|
|
|
index = utext_getIndex(ut);
|
2005-05-23 05:31:17 +00:00
|
|
|
expectedIndex = cpMap[i].nativeIdx;
|
|
|
|
TEST_ASSERT(expectedIndex == index);
|
|
|
|
}
|
|
|
|
|
2005-05-31 03:43:53 +00:00
|
|
|
// walk through the string backwards, decrementing by one.
|
|
|
|
i = cpMap[cpCount].nativeIdx;
|
2005-06-13 05:12:32 +00:00
|
|
|
utext_setIndex(ut, i);
|
2005-05-31 03:43:53 +00:00
|
|
|
for (i=cpCount; i>=0; i--) {
|
|
|
|
expectedIndex = cpMap[i].nativeIdx;
|
2005-06-13 05:12:32 +00:00
|
|
|
index = utext_getIndex(ut);
|
2005-05-31 03:43:53 +00:00
|
|
|
TEST_ASSERT(expectedIndex == index);
|
2005-06-13 05:12:32 +00:00
|
|
|
utext_moveIndex(ut, -1);
|
2005-05-31 03:43:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// walk through backwards, decrementing by three
|
|
|
|
i = cpMap[cpCount].nativeIdx;
|
2005-06-13 05:12:32 +00:00
|
|
|
utext_setIndex(ut, i);
|
2005-05-31 03:43:53 +00:00
|
|
|
for (i=cpCount; i>=0; i-=3) {
|
2005-05-23 05:31:17 +00:00
|
|
|
expectedIndex = cpMap[i].nativeIdx;
|
2005-06-13 05:12:32 +00:00
|
|
|
index = utext_getIndex(ut);
|
2005-05-23 05:31:17 +00:00
|
|
|
TEST_ASSERT(expectedIndex == index);
|
2005-06-13 05:12:32 +00:00
|
|
|
utext_moveIndex(ut, -3);
|
2005-05-23 05:31:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-05-31 03:43:53 +00:00
|
|
|
//
|
|
|
|
// Extract
|
|
|
|
//
|
|
|
|
int bufSize = us.length() + 10;
|
|
|
|
UChar *buf = new UChar[bufSize];
|
|
|
|
status = U_ZERO_ERROR;
|
|
|
|
expectedLen = us.length();
|
2005-06-13 05:12:32 +00:00
|
|
|
len = utext_extract(ut, 0, utlen, buf, bufSize, &status);
|
2005-05-31 03:43:53 +00:00
|
|
|
TEST_SUCCESS(status);
|
|
|
|
TEST_ASSERT(len == expectedLen);
|
|
|
|
int compareResult = us.compare(buf, -1);
|
|
|
|
TEST_ASSERT(compareResult == 0);
|
|
|
|
|
|
|
|
status = U_ZERO_ERROR;
|
2005-06-13 05:12:32 +00:00
|
|
|
len = utext_extract(ut, 0, utlen, NULL, 0, &status);
|
2005-05-31 03:43:53 +00:00
|
|
|
TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR)
|
|
|
|
TEST_ASSERT(len == expectedLen);
|
|
|
|
|
|
|
|
status = U_ZERO_ERROR;
|
|
|
|
u_memset(buf, 0x5555, bufSize);
|
2005-06-13 05:12:32 +00:00
|
|
|
len = utext_extract(ut, 0, utlen, buf, 1, &status);
|
2005-05-31 03:43:53 +00:00
|
|
|
if (us.length() == 0) {
|
|
|
|
TEST_SUCCESS(status);
|
|
|
|
TEST_ASSERT(buf[0] == 0);
|
|
|
|
} else {
|
|
|
|
TEST_ASSERT(buf[0] == us.charAt(0));
|
|
|
|
TEST_ASSERT(buf[1] == 0x5555);
|
|
|
|
if (us.length() == 1) {
|
|
|
|
TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
|
|
|
|
} else {
|
|
|
|
TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
|
|
|
|
}
|
|
|
|
}
|
2005-05-23 05:31:17 +00:00
|
|
|
|
2005-05-31 03:43:53 +00:00
|
|
|
delete buf;
|
2005-05-23 05:31:17 +00:00
|
|
|
|
2005-05-22 04:58:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|