ICU-12410 test & fix Edits

X-SVN-Rev: 39660
This commit is contained in:
Markus Scherer 2017-02-10 04:44:37 +00:00
parent 91cb8bf24e
commit 4207d7fc26
7 changed files with 312 additions and 49 deletions

View File

@ -40,7 +40,7 @@ Edits::~Edits() {
}
void Edits::reset() {
length = 0;
length = delta = 0;
}
void Edits::addUnchanged(int32_t unchangedLength) {
@ -95,7 +95,8 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
}
int32_t newDelta = newLength - oldLength;
if (newDelta != 0) {
if (newDelta > 0 ? newDelta > (INT32_MAX - delta) : newDelta < (INT32_MIN - delta)) {
if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
(newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
// Integer overflow or underflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return;
@ -193,7 +194,7 @@ UBool Edits::hasChanges() const {
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0),
onlyChanges(oc), coarse(crs),
onlyChanges_(oc), coarse(crs),
changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
@ -203,7 +204,7 @@ int32_t Edits::Iterator::readLength(int32_t head) {
} else if (head < LENGTH_IN_2TRAIL) {
U_ASSERT(index < length);
U_ASSERT(array[index] >= 0x8000);
return array[index++];
return array[index++] & 0x7fff;
} else {
U_ASSERT((index + 2) <= length);
U_ASSERT(array[index] >= 0x8000);
@ -225,12 +226,13 @@ void Edits::Iterator::updateIndexes() {
}
UBool Edits::Iterator::noNext() {
// Empty span beyond the string.
// No change beyond the string.
changed = FALSE;
oldLength_ = newLength_ = 0;
return FALSE;
}
UBool Edits::Iterator::next(UErrorCode &errorCode) {
UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
@ -308,12 +310,12 @@ UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
if (i < srcIndex) {
// Reset the iterator to the start.
index = remaining = srcIndex = replIndex = destIndex = 0;
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
}
while (next(errorCode)) {
while (next(FALSE, errorCode)) {
if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;

View File

@ -106,7 +106,7 @@ public:
* @return TRUE if there is another edit
* @draft ICU 59
*/
UBool next(UErrorCode &errorCode);
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
/**
* Finds the edit that contains the source index.
@ -169,11 +169,12 @@ public:
int32_t readLength(int32_t head);
void updateIndexes();
UBool noNext();
UBool next(UBool onlyChanges, UErrorCode &errorCode);
const uint16_t *array;
int32_t index, length;
int32_t remaining;
UBool onlyChanges, coarse;
UBool onlyChanges_, coarse;
UBool changed;
int32_t oldLength_, newLength_;

View File

@ -19,6 +19,7 @@
*/
#include "unicode/std_string.h"
#include "unicode/edits.h"
#include "unicode/uchar.h"
#include "unicode/ures.h"
#include "unicode/uloc.h"
@ -31,10 +32,50 @@
#include "unicode/tstdtmod.h"
#include "cmemory.h"
struct EditChange {
UBool change;
int32_t oldLength, newLength;
};
class StringCaseTest: public IntlTest {
public:
StringCaseTest();
virtual ~StringCaseTest();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
void TestCaseConversion();
void TestCasingImpl(const UnicodeString &input,
const UnicodeString &output,
int32_t whichCase,
void *iter, const char *localeID, uint32_t options);
void TestCasing();
void TestFullCaseFoldingIterator();
void TestGreekUpper();
void TestLongUpper();
void TestMalformedUTF8();
void TestBufferOverflow();
void TestEdits();
private:
void assertGreekUpper(const char *s, const char *expected);
void checkEditsIter(
const char *name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
EditChange expected[], int32_t expLength, UBool withUnchanged,
UErrorCode &errorCode);
Locale GREEK_LOCALE_;
};
StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
StringCaseTest::~StringCaseTest() {}
extern IntlTest *createStringCaseTest() {
return new StringCaseTest();
}
void
StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if(exec) {
@ -50,6 +91,7 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestLongUpper);
TESTCASE_AUTO(TestMalformedUTF8);
TESTCASE_AUTO(TestBufferOverflow);
TESTCASE_AUTO(TestEdits);
TESTCASE_AUTO_END;
}
@ -848,3 +890,118 @@ void StringCaseTest::TestBufferOverflow() {
errorCode.reset();
#endif // U_HAVE_STD_STRING
}
void StringCaseTest::checkEditsIter(
const char *name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
EditChange expected[], int32_t expLength, UBool withUnchanged,
UErrorCode &errorCode) {
assertFalse(name, ei2.findSourceIndex(-1, errorCode));
char msg[100];
int32_t expSrcIndex = 0;
int32_t expDestIndex = 0;
int32_t expReplIndex = 0;
for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
sprintf(msg, "%s %d", name, (int)expIndex);
if (withUnchanged || expected[expIndex].change) {
assertTrue(msg, ei1.next(errorCode));
assertEquals(msg, expected[expIndex].change, ei1.hasChange());
assertEquals(msg, expected[expIndex].oldLength, ei1.oldLength());
assertEquals(msg, expected[expIndex].newLength, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
}
if (expected[expIndex].oldLength > 0) {
assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode));
assertEquals(msg, expected[expIndex].change, ei2.hasChange());
assertEquals(msg, expected[expIndex].oldLength, ei2.oldLength());
assertEquals(msg, expected[expIndex].newLength, ei2.newLength());
assertEquals(msg, expSrcIndex, ei2.sourceIndex());
assertEquals(msg, expDestIndex, ei2.destinationIndex());
assertEquals(msg, expReplIndex, ei2.replacementIndex());
if (!withUnchanged) {
// For some iterators, move past the current range
// so that findSourceIndex() has to look before the current index.
ei2.next(errorCode);
ei2.next(errorCode);
}
}
expSrcIndex += expected[expIndex].oldLength;
expDestIndex += expected[expIndex].newLength;
if (expected[expIndex].change) {
expReplIndex += expected[expIndex].newLength;
}
}
sprintf(msg, "%s end", name);
assertFalse(msg, ei1.next(errorCode));
assertFalse(msg, ei1.hasChange());
assertEquals(msg, 0, ei1.oldLength());
assertEquals(msg, 0, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode));
}
void StringCaseTest::TestEdits() {
IcuTestErrorCode errorCode(*this, "TestEdits");
Edits edits;
assertFalse("new Edits", edits.hasChanges());
assertEquals("new Edits", 0, edits.lengthDelta());
edits.addUnchanged(1); // multiple unchanged ranges are combined
edits.addUnchanged(10000); // too long, and they are split
edits.addReplace(0, 0);
edits.addUnchanged(2);
assertFalse("unchanged 10003", edits.hasChanges());
assertEquals("unchanged 10003", 0, edits.lengthDelta());
edits.addReplace(1, 1); // multiple short equal-length edits are compressed
edits.addUnchanged(0);
edits.addReplace(1, 1);
edits.addReplace(1, 1);
edits.addReplace(0, 10);
edits.addReplace(100, 0);
edits.addReplace(3000, 4000); // variable-length encoding
edits.addReplace(100000, 100000);
assertTrue("some edits", edits.hasChanges());
assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
UErrorCode outErrorCode = U_ZERO_ERROR;
assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
EditChange coarseExpectedChanges[] = {
{ FALSE, 10003, 10003 },
{ TRUE, 103103, 104013 }
};
checkEditsIter("coarse",
edits.getCoarseIterator(), edits.getCoarseIterator(),
coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
checkEditsIter("coarse changes",
edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
EditChange fineExpectedChanges[] = {
{ FALSE, 10003, 10003 },
{ TRUE, 1, 1 },
{ TRUE, 1, 1 },
{ TRUE, 1, 1 },
{ TRUE, 0, 10 },
{ TRUE, 100, 0 },
{ TRUE, 3000, 4000 },
{ TRUE, 100000, 100000 }
};
checkEditsIter("fine",
edits.getFineIterator(), edits.getFineIterator(),
fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
checkEditsIter("fine changes",
edits.getFineChangesIterator(), edits.getFineChangesIterator(),
fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
edits.reset();
assertFalse("reset", edits.hasChanges());
assertEquals("reset", 0, edits.lengthDelta());
Edits::Iterator ei = edits.getCoarseChangesIterator();
assertFalse("reset then iterator", ei.next(errorCode));
}

View File

@ -30,11 +30,13 @@ using namespace std;
UnicodeStringTest::~UnicodeStringTest() {}
extern IntlTest *createStringCaseTest();
void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
{
if (exec) logln("TestSuite UnicodeStringTest: ");
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO_CLASS(StringCaseTest);
TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
TESTCASE_AUTO(TestBasicManipulation);
TESTCASE_AUTO(TestCompare);
TESTCASE_AUTO(TestExtract);

View File

@ -94,30 +94,4 @@ public:
void TestMoveSwap();
};
class StringCaseTest: public IntlTest {
public:
StringCaseTest();
virtual ~StringCaseTest();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
void TestCaseConversion();
void TestCasingImpl(const UnicodeString &input,
const UnicodeString &output,
int32_t whichCase,
void *iter, const char *localeID, uint32_t options);
void TestCasing();
void TestFullCaseFoldingIterator();
void TestGreekUpper();
void TestLongUpper();
void TestMalformedUTF8();
void TestBufferOverflow();
private:
void assertGreekUpper(const char *s, const char *expected);
Locale GREEK_LOCALE_;
};
#endif

View File

@ -52,7 +52,7 @@ public final class Edits {
* @provisional This API might change or be removed in a future release.
*/
public void reset() {
length = 0;
length = delta = 0;
}
private void setLastUnit(int last) {
@ -125,8 +125,8 @@ public final class Edits {
}
int newDelta = newLength - oldLength;
if (newDelta != 0) {
if (newDelta > 0 ? newDelta > (Integer.MAX_VALUE - delta) :
newDelta < (Integer.MIN_VALUE - delta)) {
if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
(newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) {
// Integer overflow or underflow.
throw new IndexOutOfBoundsException();
}
@ -226,7 +226,7 @@ public final class Edits {
private int index;
private final int length;
private int remaining;
private final boolean onlyChanges, coarse;
private final boolean onlyChanges_, coarse;
private boolean changed;
private int oldLength_, newLength_;
@ -235,7 +235,7 @@ public final class Edits {
private Iterator(char[] a, int len, boolean oc, boolean crs) {
array = a;
length = len;
onlyChanges = oc;
onlyChanges_ = oc;
coarse = crs;
}
@ -245,7 +245,7 @@ public final class Edits {
} else if (head < LENGTH_IN_2TRAIL) {
assert(index < length);
assert(array[index] >= 0x8000);
return array[index++];
return array[index++] & 0x7fff;
} else {
assert((index + 2) <= length);
assert(array[index] >= 0x8000);
@ -267,7 +267,8 @@ public final class Edits {
}
private boolean noNext() {
// Empty span beyond the string.
// No change beyond the string.
changed = false;
oldLength_ = newLength_ = 0;
return false;
}
@ -279,6 +280,10 @@ public final class Edits {
* @provisional This API might change or be removed in a future release.
*/
public boolean next() {
return next(onlyChanges_);
}
private boolean next(boolean onlyChanges) {
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
updateIndexes();
@ -357,10 +362,10 @@ public final class Edits {
* even if normal iteration would skip non-changes.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
* <p>The iterator state before this search logically does not matter.
* (It may affect the performance of the search.)
*
* The iterator state after this search is undefined
* <p>The iterator state after this search is undefined
* if the source index is out of bounds for the source string.
*
* @param i source index
@ -372,12 +377,12 @@ public final class Edits {
if (i < 0) { return false; }
if (i < srcIndex) {
// Reset the iterator to the start.
index = remaining = srcIndex = replIndex = destIndex = 0;
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return true;
}
while (next()) {
while (next(false)) {
if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return true;

View File

@ -24,6 +24,7 @@ import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.Edits;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.ULocale;
@ -708,6 +709,127 @@ public final class UCharacterCaseTest extends TestFmwk
assertGreekUpper("ρωμέικα", "ΡΩΜΕΪΚΑ");
}
private static final class EditChange {
private boolean change;
private int oldLength, newLength;
EditChange(boolean change, int oldLength, int newLength) {
this.change = change;
this.oldLength = oldLength;
this.newLength = newLength;
}
}
private static void checkEditsIter(
String name, Edits.Iterator ei1, Edits.Iterator ei2, // two equal iterators
EditChange[] expected, boolean withUnchanged) {
assertFalse(name, ei2.findSourceIndex(-1));
int expSrcIndex = 0;
int expDestIndex = 0;
int expReplIndex = 0;
for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
String msg = name + ' ' + expIndex;
if (withUnchanged || expected[expIndex].change) {
assertTrue(msg, ei1.next());
assertEquals(msg, expected[expIndex].change, ei1.hasChange());
assertEquals(msg, expected[expIndex].oldLength, ei1.oldLength());
assertEquals(msg, expected[expIndex].newLength, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
}
if (expected[expIndex].oldLength > 0) {
assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
assertEquals(msg, expected[expIndex].change, ei2.hasChange());
assertEquals(msg, expected[expIndex].oldLength, ei2.oldLength());
assertEquals(msg, expected[expIndex].newLength, ei2.newLength());
assertEquals(msg, expSrcIndex, ei2.sourceIndex());
assertEquals(msg, expDestIndex, ei2.destinationIndex());
assertEquals(msg, expReplIndex, ei2.replacementIndex());
if (!withUnchanged) {
// For some iterators, move past the current range
// so that findSourceIndex() has to look before the current index.
ei2.next();
ei2.next();
}
}
expSrcIndex += expected[expIndex].oldLength;
expDestIndex += expected[expIndex].newLength;
if (expected[expIndex].change) {
expReplIndex += expected[expIndex].newLength;
}
}
String msg = name + " end";
assertFalse(msg, ei1.next());
assertFalse(msg, ei1.hasChange());
assertEquals(msg, 0, ei1.oldLength());
assertEquals(msg, 0, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
assertFalse(name, ei2.findSourceIndex(expSrcIndex));
}
@Test
public void TestEdits() {
Edits edits = new Edits();
assertFalse("new Edits", edits.hasChanges());
assertEquals("new Edits", 0, edits.lengthDelta());
edits.addUnchanged(1); // multiple unchanged ranges are combined
edits.addUnchanged(10000); // too long, and they are split
edits.addReplace(0, 0);
edits.addUnchanged(2);
assertFalse("unchanged 10003", edits.hasChanges());
assertEquals("unchanged 10003", 0, edits.lengthDelta());
edits.addReplace(1, 1); // multiple short equal-length edits are compressed
edits.addUnchanged(0);
edits.addReplace(1, 1);
edits.addReplace(1, 1);
edits.addReplace(0, 10);
edits.addReplace(100, 0);
edits.addReplace(3000, 4000); // variable-length encoding
edits.addReplace(100000, 100000);
assertTrue("some edits", edits.hasChanges());
assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
EditChange[] coarseExpectedChanges = new EditChange[] {
new EditChange(false, 10003, 10003),
new EditChange(true, 103103, 104013)
};
checkEditsIter("coarse",
edits.getCoarseIterator(), edits.getCoarseIterator(),
coarseExpectedChanges, true);
checkEditsIter("coarse changes",
edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
coarseExpectedChanges, false);
EditChange[] fineExpectedChanges = new EditChange[] {
new EditChange(false, 10003, 10003),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 0, 10),
new EditChange(true, 100, 0),
new EditChange(true, 3000, 4000),
new EditChange(true, 100000, 100000)
};
checkEditsIter("fine",
edits.getFineIterator(), edits.getFineIterator(),
fineExpectedChanges, true);
checkEditsIter("fine changes",
edits.getFineChangesIterator(), edits.getFineChangesIterator(),
fineExpectedChanges, false);
edits.reset();
assertFalse("reset", edits.hasChanges());
assertEquals("reset", 0, edits.lengthDelta());
Edits.Iterator ei = edits.getCoarseChangesIterator();
assertFalse("reset then iterator", ei.next());
}
// private data members - test data --------------------------------------
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
@ -945,7 +1067,7 @@ public final class UCharacterCaseTest extends TestFmwk
// private methods -------------------------------------------------------
/**
* Converting the hex numbers represented betwee n ';' to Unicode strings
* Converting the hex numbers represented between ';' to Unicode strings
* @param str string to break up into Unicode strings
* @return array of Unicode strings ending with a null
*/