diff --git a/icu4c/source/i18n/unicode/uniset.h b/icu4c/source/i18n/unicode/uniset.h index e0a102653a..9ffe53909e 100644 --- a/icu4c/source/i18n/unicode/uniset.h +++ b/icu4c/source/i18n/unicode/uniset.h @@ -287,7 +287,16 @@ class U_I18N_API UnicodeSet : public UnicodeFilter { * "}". Example: "{var}". */ static const UChar VARIABLE_REF_CLOSE; - + + // More special characters... + static const UChar SET_OPEN; + static const UChar SET_CLOSE; + static const UChar HYPHEN; + static const UChar COMPLEMENT; + static const UChar COLON; + static const UChar BACKSLASH; + static const UChar INTERSECTION; + //---------------------------------------------------------------- // Debugging and testing //---------------------------------------------------------------- diff --git a/icu4c/source/i18n/uniset.cpp b/icu4c/source/i18n/uniset.cpp index 60d48bc28c..f880f59156 100644 --- a/icu4c/source/i18n/uniset.cpp +++ b/icu4c/source/i18n/uniset.cpp @@ -34,13 +34,22 @@ const UnicodeString UnicodeSet::CATEGORY_CLOSE = UNICODE_STRING(":]", 2); * Delimiter char beginning a variable reference: * "{". Example: "{var}". */ -const UChar UnicodeSet::VARIABLE_REF_OPEN = '{'; +const UChar UnicodeSet::VARIABLE_REF_OPEN = 0x007B /*{*/; /** * Delimiter char ending a variable reference: * "}". Example: "{var}". */ -const UChar UnicodeSet::VARIABLE_REF_CLOSE = '}'; +const UChar UnicodeSet::VARIABLE_REF_CLOSE = 0x007D /*}*/; + +// Define UChar constants using hex for EBCDIC compatibility +const UChar UnicodeSet::SET_OPEN = 0x005B; /*[*/ +const UChar UnicodeSet::SET_CLOSE = 0x005D; /*]*/ +const UChar UnicodeSet::HYPHEN = 0x002D; /*-*/ +const UChar UnicodeSet::COMPLEMENT = 0x005E; /*^*/ +const UChar UnicodeSet::COLON = 0x003A; /*:*/ +const UChar UnicodeSet::BACKSLASH = 0x005C; /*\*/ +const UChar UnicodeSet::INTERSECTION = 0x0026; /*&*/ //---------------------------------------------------------------- // Debugging and testing @@ -197,7 +206,7 @@ void UnicodeSet::applyPattern(const UnicodeString& pattern, * will produce another set that is equal to this one. */ UnicodeString& UnicodeSet::toPattern(UnicodeString& result) const { - result.remove().append((UChar)'['); + result.remove().append(SET_OPEN); // iterate through the ranges in the UnicodeSet for (int32_t i=0; i= pattern.length()) { status = U_ILLEGAL_ARGUMENT_ERROR; return pairsBuf; @@ -613,10 +622,10 @@ UnicodeString& UnicodeSet::parse(UnicodeString& pairsBuf /*result*/, * subpattern, either a normal pattern or a category pattern. We * recognize these here and set nestedPairs accordingly. */ - else if (!isLiteral && c == '[') { + else if (!isLiteral && c == SET_OPEN) { // Handle "[:...:]", representing a character category UChar d = charAfter(pattern, i); - if (d == ':') { + if (d == COLON) { i += 2; int32_t j = pattern.indexOf(CATEGORY_CLOSE, i); if (j < 0) { @@ -666,10 +675,10 @@ UnicodeString& UnicodeSet::parse(UnicodeString& pairsBuf /*result*/, lastChar = -1; } switch (lastOp) { - case '-': + case HYPHEN: doDifference(pairsBuf, *nestedPairs); break; - case '&': + case INTERSECTION: doIntersection(pairsBuf, *nestedPairs); break; case 0: @@ -677,13 +686,13 @@ UnicodeString& UnicodeSet::parse(UnicodeString& pairsBuf /*result*/, break; } lastOp = 0; - } else if (!isLiteral && c == ']') { + } else if (!isLiteral && c == SET_CLOSE) { // Final closing delimiter. This is the only way we leave this // loop if the pattern is well-formed. break; - } else if (lastOp == 0 && !isLiteral && (c == '-' || c == '&')) { + } else if (lastOp == 0 && !isLiteral && (c == HYPHEN || c == INTERSECTION)) { lastOp = c; - } else if (lastOp == '-') { + } else if (lastOp == HYPHEN) { addPair(pairsBuf, (UChar)lastChar, c); lastOp = 0; lastChar = -1; @@ -702,10 +711,10 @@ UnicodeString& UnicodeSet::parse(UnicodeString& pairsBuf /*result*/, } // Handle unprocessed stuff preceding the closing ']' - if (lastOp == '-') { + if (lastOp == HYPHEN) { // Trailing '-' is treated as literal addPair(pairsBuf, lastOp, lastOp); - } else if (lastOp == '&') { + } else if (lastOp == INTERSECTION) { // throw new IllegalArgumentException("Unquoted trailing " + lastOp); status = U_ILLEGAL_ARGUMENT_ERROR; return pairsBuf; @@ -1072,7 +1081,7 @@ UnicodeString& UnicodeSet::getCategoryPairs(UnicodeString& result, // TO DO: Allocate cat on the heap only if needed. UnicodeString cat(catName); bool_t invert = (catName.length() > 1 && - catName.charAt(0) == '^'); + catName.charAt(0) == COMPLEMENT); if (invert) { cat.remove(0, 1); }