diff --git a/icu4c/source/common/putil.c b/icu4c/source/common/putil.c index c9065ddf77..d7bb084b5c 100644 --- a/icu4c/source/common/putil.c +++ b/icu4c/source/common/putil.c @@ -2665,7 +2665,8 @@ _uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = { "U_BRK_UNDEFINED_VARIABLE", "U_BRK_INIT_ERROR", "U_BRK_RULE_EMPTY_SET", - "U_BRK_UNRECOGNIZED_OPTION" + "U_BRK_UNRECOGNIZED_OPTION", + "U_BRK_MALFORMED_RULE_TAG" }; static const char * const diff --git a/icu4c/source/common/rbbiscan.cpp b/icu4c/source/common/rbbiscan.cpp index 272f4a2654..832099cc9a 100644 --- a/icu4c/source/common/rbbiscan.cpp +++ b/icu4c/source/common/rbbiscan.cpp @@ -463,6 +463,10 @@ UBool RBBIRuleScanner::doParseActions(EParseAction action) fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); break; + case doTagExpectedError: + error(U_BRK_MALFORMED_RULE_TAG); + returnVal = FALSE; + break; case doOptionStart: // Scanning a !!option. At the start of string. diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h index 608d4f8373..d2040eb1a1 100644 --- a/icu4c/source/common/unicode/utypes.h +++ b/icu4c/source/common/unicode/utypes.h @@ -634,6 +634,7 @@ typedef enum UErrorCode { U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */ U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */ U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */ + U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */ U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */ /* diff --git a/icu4c/source/test/intltest/rbbiapts.cpp b/icu4c/source/test/intltest/rbbiapts.cpp index 29536b7ffd..4750896f84 100644 --- a/icu4c/source/test/intltest/rbbiapts.cpp +++ b/icu4c/source/test/intltest/rbbiapts.cpp @@ -27,6 +27,11 @@ */ +#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ +errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} + +#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ +errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}} void RBBIAPITest::TestCloneEquals() { @@ -598,6 +603,13 @@ void RBBIAPITest::TestRuleStatus() { errln("FAIL: incorrect tag value %d at position %d", tag, pos); break; } + + // Check that we get the same tag values from getRuleStatusVec() + int32_t vec[10]; + int t = bi->getRuleStatusVec(vec, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(t==1); + TEST_ASSERT(vec[0] == tag); } } delete bi; @@ -649,6 +661,87 @@ void RBBIAPITest::TestRuleStatus() { } +// +// TestRuleStatusVec +// Test the vector form of break rule status. +// +void RBBIAPITest::TestRuleStatusVec() { + UnicodeString rulesString = "[A-N]{100}; \n" + "[a-w]{200}; \n" + "[\\p{L}]{300}; \n" + "[\\p{N}]{400}; \n" + "[0-5]{500}; \n" + "!.*;\n"; + UnicodeString testString1 = "Aapz5?"; + int32_t statusVals[10]; + int32_t numStatuses; + int32_t pos; + + UErrorCode status=U_ZERO_ERROR; + UParseError parseError; + + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); + TEST_ASSERT_SUCCESS(status); + if (U_SUCCESS(status)) { + bi->setText(testString1); + + // A + pos = bi->next(); + TEST_ASSERT(pos==1); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == 100); + TEST_ASSERT(statusVals[1] == 300); + + // a + pos = bi->next(); + TEST_ASSERT(pos==2); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == 200); + TEST_ASSERT(statusVals[1] == 300); + + // p + pos = bi->next(); + TEST_ASSERT(pos==3); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == 200); + TEST_ASSERT(statusVals[1] == 300); + + // z + pos = bi->next(); + TEST_ASSERT(pos==4); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 1); + TEST_ASSERT(statusVals[0] == 300); + + // 5 + pos = bi->next(); + TEST_ASSERT(pos==5); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == 400); + TEST_ASSERT(statusVals[1] == 500); + + // ? + pos = bi->next(); + TEST_ASSERT(pos==6); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 1); + TEST_ASSERT(statusVals[0] == 0); + + } + delete bi; + +} + // // Bug 2190 Regression test. Builder crash on rule consisting of only a // $variable reference @@ -853,10 +946,11 @@ void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, case 7: name = "TestBuilder"; if (exec) TestBuilder(); break; case 8: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; case 9: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; - case 10: name = "TestBug2190"; if (exec) TestBug2190(); break; - case 11: name = "TestRegistration"; if (exec) TestRegistration(); break; - case 12: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; - case 13: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; + case 10: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; + case 11: name = "TestBug2190"; if (exec) TestBug2190(); break; + case 12: name = "TestRegistration"; if (exec) TestRegistration(); break; + case 13: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; + case 14: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; default: name = ""; break; // needed to end loop } diff --git a/icu4c/source/test/intltest/rbbiapts.h b/icu4c/source/test/intltest/rbbiapts.h index 8a6f319c7e..2481eec51e 100644 --- a/icu4c/source/test/intltest/rbbiapts.h +++ b/icu4c/source/test/intltest/rbbiapts.h @@ -72,6 +72,7 @@ public: * Tests word break status returns. */ void TestRuleStatus(); + void TestRuleStatusVec(); void TestBug2190();