ICU-4669 Add tests for UText CharacterIterator provider, and fix errors uncovered by them.

X-SVN-Rev: 20016
This commit is contained in:
Andy Heninger 2006-08-10 05:28:32 +00:00
parent c6898528d5
commit 4ce961aa13
2 changed files with 36 additions and 26 deletions

View File

@ -785,7 +785,7 @@ shallowTextClone(UText * dest, const UText * src, UErrorCode * status) {
} }
// //
// Relocate any pointers in the target that refer to the UText itself. // Relocate any pointers in the target that refer to the UText itself
// to point to the cloned copy rather than the original source. // to point to the cloned copy rather than the original source.
// //
adjustPointer(dest, &dest->context, src); adjustPointer(dest, &dest->context, src);
@ -931,7 +931,7 @@ utf8TextAccess(UText *ut, int64_t index, UBool forward) {
while (ut->c<ix && s8[ut->c]!=0) { while (ut->c<ix && s8[ut->c]!=0) {
ut->c++; ut->c++;
} }
// TODO: check for null terminated string length > 32 bits. // TODO: support for null terminated string length > 32 bits.
if (s8[ut->c] == 0) { if (s8[ut->c] == 0) {
// We just found the actual length of the string. // We just found the actual length of the string.
// Trim the requested index back to that. // Trim the requested index back to that.
@ -1536,14 +1536,15 @@ utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
// First do a generic shallow clone. Does everything needed for the UText struct itself. // First do a generic shallow clone. Does everything needed for the UText struct itself.
dest = shallowTextClone(dest, src, status); dest = shallowTextClone(dest, src, status);
// TODO: fix up pointers in the structs in extra.
// For deep clones, make a copy of the string. // For deep clones, make a copy of the string.
// The copied storage is owned by the newly created clone. // The copied storage is owned by the newly created clone.
// A non-NULL pointer in UText.p is the signal to the close() function to delete
// it.
// //
// TODO: what to do about that non-const native length? // TODO: There is an isssue with using utext_nativeLength().
// That function is non-const in cases where the input was NUL terminated
// and the length has not yet been determined.
// This function (clone()) is const.
// There potentially a thread safety issue lurking here.
//
if (deep && U_SUCCESS(*status)) { if (deep && U_SUCCESS(*status)) {
int32_t len = (int32_t)utext_nativeLength((UText *)src); int32_t len = (int32_t)utext_nativeLength((UText *)src);
char *copyStr = (char *)uprv_malloc(len+1); char *copyStr = (char *)uprv_malloc(len+1);
@ -1696,8 +1697,6 @@ repTextAccess(UText *ut, int64_t index, UBool forward) {
int32_t index32 = pinIndex(index, length); int32_t index32 = pinIndex(index, length);
U_ASSERT(index<=INT32_MAX); U_ASSERT(index<=INT32_MAX);
// TODO: check if requested location is in chunk already.
/* /*
* Compute start/limit boundaries around index, for a segment of text * Compute start/limit boundaries around index, for a segment of text
@ -1927,7 +1926,7 @@ repTextCopy(UText *ut,
int32_t limit32 = pinIndex(limit, length); int32_t limit32 = pinIndex(limit, length);
int32_t destIndex32 = pinIndex(destIndex, length); int32_t destIndex32 = pinIndex(destIndex, length);
// TODO: snap everything to code point boundaries. // TODO: snap input parameters to code point boundaries.
if(move) { if(move) {
// move: copy to destIndex, then replace original with nothing // move: copy to destIndex, then replace original with nothing
@ -2200,7 +2199,7 @@ unistrTextCopy(UText *ut,
// Iteration position to end of the newly inserted text. // Iteration position to end of the newly inserted text.
ut->chunkOffset = destIndex32+limit32-start32; ut->chunkOffset = destIndex32+limit32-start32;
if (move && destIndex32>start32) { //TODO: backwards? check. if (move && destIndex32>start32) {
ut->chunkOffset = destIndex32; ut->chunkOffset = destIndex32;
} }
@ -2393,7 +2392,7 @@ ucstrTextAccess(UText *ut, int64_t index, UBool forward) {
if (chunkLimit == INT32_MAX) { if (chunkLimit == INT32_MAX) {
// Scanned to the limit of a 32 bit length. // Scanned to the limit of a 32 bit length.
// Forceably trim the overlength string back so length fits in int32 // Forceably trim the overlength string back so length fits in int32
// TODO: add support for longer strings. // TODO: add support for 64 bit strings.
ut->a = chunkLimit; ut->a = chunkLimit;
ut->chunkLength = chunkLimit; ut->chunkLength = chunkLimit;
ut->nativeIndexingLimit = chunkLimit; ut->nativeIndexingLimit = chunkLimit;
@ -2642,9 +2641,9 @@ charIterTextAccess(UText *ut, int64_t index, UBool forward) {
ut->chunkLength = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart); ut->chunkLength = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart);
} }
ut->nativeIndexingLimit = ut->chunkLength; ut->nativeIndexingLimit = ut->chunkLength;
ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;
U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize); U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize);
} }
ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;
UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>0); UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>0);
return success; return success;
} }
@ -2655,15 +2654,9 @@ charIterTextClone(UText *dest, const UText *src, UBool deep, UErrorCode * status
return NULL; return NULL;
} }
// For deep clones, make a copy of the string.
// The copied storage is owned by the newly created clone.
// A non-NULL pointer in UText.p is the signal to the close() function to delete
// it.
//
if (deep) { if (deep) {
// TODO // There is no CharacterIterator API for cloning the underlying text storage.
U_ASSERT(FALSE); *status = U_UNSUPPORTED_ERROR;
return NULL; return NULL;
} else { } else {
CharacterIterator *srcCI =(CharacterIterator *)src->context; CharacterIterator *srcCI =(CharacterIterator *)src->context;
@ -2711,9 +2704,8 @@ charIterTextExtract(UText *ut,
} }
srci += len; srci += len;
} }
if (desti<destCapacity) {
dest[desti] = 0; u_terminateUChars(dest, destCapacity, desti, status);
}
return desti; return desti;
} }
U_CDECL_END U_CDECL_END

View File

@ -16,6 +16,7 @@
#include <unicode/utext.h> #include <unicode/utext.h>
#include <unicode/utf8.h> #include <unicode/utf8.h>
#include <unicode/ustring.h> #include <unicode/ustring.h>
#include <unicode/uchriter.h>
#include "utxttest.h" #include "utxttest.h"
static UBool gFailed = FALSE; static UBool gFailed = FALSE;
@ -225,6 +226,18 @@ void UTextTest::TestString(const UnicodeString &s) {
TestCMR(sa, ut, cpCount, cpMap, cpMap); TestCMR(sa, ut, cpCount, cpMap, cpMap);
utext_close(ut); utext_close(ut);
// Character Iterator Tests
status = U_ZERO_ERROR;
const UChar *cbuf = sa.getBuffer();
CharacterIterator *ci = new UCharCharacterIterator(cbuf, saLen, status);
TEST_SUCCESS(status);
ut = utext_openCharacterIterator(NULL, ci, &status);
TEST_SUCCESS(status);
TestAccess(sa, ut, cpCount, cpMap);
utext_close(ut);
delete ci;
// Fragmented UnicodeString (Chunk size of one) // Fragmented UnicodeString (Chunk size of one)
// //
status = U_ZERO_ERROR; status = U_ZERO_ERROR;
@ -768,7 +781,12 @@ void UTextTest::TestAccess(const UnicodeString &us, UText *ut, int cpCount, m *c
TEST_SUCCESS(status); TEST_SUCCESS(status);
TEST_ASSERT(buf[0] == 0); TEST_ASSERT(buf[0] == 0);
} else { } else {
TEST_ASSERT(buf[0] == us.charAt(0)); // Buf len == 1, extracting a single 16 bit value.
// If the data char is supplementary, it doesn't matter whether the buffer remains unchanged,
// or whether the lead surrogate of the pair is extracted.
// It's a buffer overflow error in either case.
TEST_ASSERT(buf[0] == us.charAt(0) ||
buf[0] == 0x5555 && U_IS_SUPPLEMENTARY(us.char32At(0)));
TEST_ASSERT(buf[1] == 0x5555); TEST_ASSERT(buf[1] == 0x5555);
if (us.length() == 1) { if (us.length() == 1) {
TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);