ICU-2902 regexp code review changes.

X-SVN-Rev: 12066
This commit is contained in:
Andy Heninger 2003-05-23 01:33:52 +00:00
parent 9c02edfe2d
commit 862039beb5
2 changed files with 55 additions and 9 deletions

View File

@ -699,10 +699,18 @@ int32_t RegexMatcher::split(const UnicodeString &input,
int i;
int32_t numCaptureGroups = fPattern->fGroupMap->size();
for (i=0; ; i++) {
if (i==destCapacity-1) {
// There is only one output string left.
// Fill it with whatever is left from the input, then exit the loop.
dest[i].setTo(input, nextOutputStringStart, inputLen-nextOutputStringStart);
if (i>=destCapacity-1) {
// There is one or zero output string left.
// Fill the last output string with whatever is left from the input, then exit the loop.
// ( i will be == destCapicity if we filled the output array while processing
// capture groups of the delimiter expression, in which case we will discard the
// last capture group saved in favor of the unprocessed remainder of the
// input string.)
i = destCapacity-1;
int32_t remainingLength = inputLen-nextOutputStringStart;
if (remainingLength > 0) {
dest[i].setTo(input, nextOutputStringStart, remainingLength);
}
break;
}
if (find()) {
@ -728,11 +736,6 @@ int32_t RegexMatcher::split(const UnicodeString &input,
break;
}
if (i==destCapacity-1) {
// We've filled up the last output string with capture group data.
// Give back the last string, to be used for the remainder of the input.
i--;
}
}
else
{

View File

@ -1018,11 +1018,13 @@ void RegexTest::API_Pattern() {
REGEX_ASSERT(fields[2]=="the"); // left over from previous test
fields[1] = "*";
status = U_ZERO_ERROR;
n = pat1->split("Now is the time", fields, 1, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(n==1);
REGEX_ASSERT(fields[0]=="Now is the time");
REGEX_ASSERT(fields[1]=="*");
status = U_ZERO_ERROR;
n = pat1->split(" Now is the time ", fields, 10, status);
REGEX_CHECK_STATUS;
@ -1051,6 +1053,7 @@ void RegexTest::API_Pattern() {
pat1 = RegexPattern::compile("<(\\w*)>", pe, status);
REGEX_CHECK_STATUS;
status = U_ZERO_ERROR;
n = pat1->split("<a>Now is <b>the time<c>", fields, 10, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(n==6);
@ -1061,6 +1064,7 @@ void RegexTest::API_Pattern() {
REGEX_ASSERT(fields[4]=="the time");
REGEX_ASSERT(fields[5]=="c");
REGEX_ASSERT(fields[6]=="");
REGEX_ASSERT(status==U_ZERO_ERROR);
n = pat1->split(" <a>Now is <b>the time<c>", fields, 10, status);
REGEX_CHECK_STATUS;
@ -1073,6 +1077,44 @@ void RegexTest::API_Pattern() {
REGEX_ASSERT(fields[5]=="c");
REGEX_ASSERT(fields[6]=="");
status = U_ZERO_ERROR;
fields[6] = "foo";
n = pat1->split(" <a>Now is <b>the time<c>", fields, 6, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(n==6);
REGEX_ASSERT(fields[0]==" ");
REGEX_ASSERT(fields[1]=="a");
REGEX_ASSERT(fields[2]=="Now is ");
REGEX_ASSERT(fields[3]=="b");
REGEX_ASSERT(fields[4]=="the time");
REGEX_ASSERT(fields[5]=="c");
REGEX_ASSERT(fields[6]=="foo");
status = U_ZERO_ERROR;
fields[5] = "foo";
n = pat1->split(" <a>Now is <b>the time<c>", fields, 5, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(n==5);
REGEX_ASSERT(fields[0]==" ");
REGEX_ASSERT(fields[1]=="a");
REGEX_ASSERT(fields[2]=="Now is ");
REGEX_ASSERT(fields[3]=="b");
REGEX_ASSERT(fields[4]=="the time<c>");
REGEX_ASSERT(fields[5]=="foo");
status = U_ZERO_ERROR;
fields[5] = "foo";
n = pat1->split(" <a>Now is <b>the time", fields, 5, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(n==5);
REGEX_ASSERT(fields[0]==" ");
REGEX_ASSERT(fields[1]=="a");
REGEX_ASSERT(fields[2]=="Now is ");
REGEX_ASSERT(fields[3]=="b");
REGEX_ASSERT(fields[4]=="the time");
REGEX_ASSERT(fields[5]=="foo");
status = U_ZERO_ERROR;
n = pat1->split(" <a>Now is <b>the time<c>", fields, 4, status);
REGEX_CHECK_STATUS;
REGEX_ASSERT(n==4);
@ -1080,6 +1122,7 @@ void RegexTest::API_Pattern() {
REGEX_ASSERT(fields[1]=="a");
REGEX_ASSERT(fields[2]=="Now is ");
REGEX_ASSERT(fields[3]=="the time<c>");
status = U_ZERO_ERROR;
delete pat1;
pat1 = RegexPattern::compile("([-,])", pe, status);