From d96f819545195b72222225c3f66fa5ab99d02bdc Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Fri, 6 Sep 2002 23:37:16 +0000 Subject: [PATCH] ICU-1130 added parsePattern for use by Name-Any X-SVN-Rev: 9850 --- icu4c/source/common/util.cpp | 109 +++++++++++++++++++++++++++++++++++ icu4c/source/common/util.h | 42 ++++++++++++++ 2 files changed, 151 insertions(+) diff --git a/icu4c/source/common/util.cpp b/icu4c/source/common/util.cpp index 4de5f5f1a3..d41e70d895 100644 --- a/icu4c/source/common/util.cpp +++ b/icu4c/source/common/util.cpp @@ -144,6 +144,49 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos, return p; } +/** + * Skip over whitespace in a Replaceable. Whitespace is defined by + * uprv_isRuleWhiteSpace(). Skipping may be done in the forward or + * reverse direction. In either case, the leftmost index will be + * inclusive, and the rightmost index will be exclusive. That is, + * given a range defined as [start, limit), the call + * skipWhitespace(text, start, limit) will advance start past leading + * whitespace, whereas the call skipWhitespace(text, limit, start), + * will back up limit past trailing whitespace. + * @param text the text to be analyzed + * @param pos either the start or limit of a range of 'text', to skip + * leading or trailing whitespace, respectively + * @param stop either the limit or start of a range of 'text', to skip + * leading or trailing whitespace, respectively + * @return the new start or limit, depending on what was passed in to + * 'pos' + */ +//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. +//?int32_t ICU_Utility::skipWhitespace(const Replaceable& text, +//? int32_t pos, int32_t stop) { +//? UChar32 c; +//? UBool isForward = (stop >= pos); +//? +//? if (!isForward) { +//? --pos; // pos is a limit, so back up by one +//? } +//? +//? while (pos != stop && +//? uprv_isRuleWhiteSpace(c = text.char32At(pos))) { +//? if (isForward) { +//? pos += UTF_CHAR_LENGTH(c); +//? } else { +//? pos -= UTF_CHAR_LENGTH(c); +//? } +//? } +//? +//? if (!isForward) { +//? ++pos; // make pos back into a limit +//? } +//? +//? return pos; +//?} + /** * Parse a single non-whitespace character 'ch', optionally * preceded by whitespace. @@ -231,6 +274,72 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_ return pos; } +/** + * Parse a pattern string within the given Replaceable and a parsing + * pattern. Characters are matched literally and case-sensitively + * except for the following special characters: + * + * ~ zero or more uprv_isRuleWhiteSpace chars + * + * If end of pattern is reached with all matches along the way, + * pos is advanced to the first unparsed index and returned. + * Otherwise -1 is returned. + * @param pat pattern that controls parsing + * @param text text to be parsed, starting at index + * @param index offset to first character to parse + * @param limit offset after last character to parse + * @return index after last parsed character, or -1 on parse failure. + */ +int32_t ICU_Utility::parsePattern(const UnicodeString& pat, + const Replaceable& text, + int32_t index, + int32_t limit) { + int32_t ipat = 0; + + // empty pattern matches immediately + if (ipat == pat.length()) { + return index; + } + + UChar32 cpat = pat.char32At(ipat); + + while (index < limit) { + UChar32 c = text.char32At(index); + + // parse \s* + if (cpat == 126 /*~*/) { + if (uprv_isRuleWhiteSpace(c)) { + index += UTF_CHAR_LENGTH(c); + continue; + } else { + if (++ipat == pat.length()) { + return index; // success; c unparsed + } + // fall thru; process c again with next cpat + } + } + + // parse literal + else if (c == cpat) { + index += UTF_CHAR_LENGTH(c); + ipat += UTF_CHAR_LENGTH(cpat); + if (ipat == pat.length()) { + return index; // success; c parsed + } + // fall thru; get next cpat + } + + // match failure of literal + else { + return -1; + } + + cpat = pat.char32At(ipat); + } + + return -1; // text ended before end of pat +} + static const UChar ZERO_X[] = {48, 120, 0}; // "0x" /** diff --git a/icu4c/source/common/util.h b/icu4c/source/common/util.h index 5a29da6902..1031d488d0 100644 --- a/icu4c/source/common/util.h +++ b/icu4c/source/common/util.h @@ -86,6 +86,27 @@ class U_COMMON_API ICU_Utility /* not : public UObject because all methods are s static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos, UBool advance = FALSE); + /** + * Skip over whitespace in a Replaceable. Whitespace is defined by + * uprv_isRuleWhiteSpace(). Skipping may be done in the forward or + * reverse direction. In either case, the leftmost index will be + * inclusive, and the rightmost index will be exclusive. That is, + * given a range defined as [start, limit), the call + * skipWhitespace(text, start, limit) will advance start past leading + * whitespace, whereas the call skipWhitespace(text, limit, start), + * will back up limit past trailing whitespace. + * @param text the text to be analyzed + * @param pos either the start or limit of a range of 'text', to skip + * leading or trailing whitespace, respectively + * @param stop either the limit or start of a range of 'text', to skip + * leading or trailing whitespace, respectively + * @return the new start or limit, depending on what was passed in to + * 'pos' + */ +//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. +//? static int32_t skipWhitespace(const Replaceable& text, +//? int32_t pos, int32_t stop); + /** * Parse a single non-whitespace character 'ch', optionally * preceded by whitespace. @@ -122,6 +143,27 @@ class U_COMMON_API ICU_Utility /* not : public UObject because all methods are s static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit, const UnicodeString& pattern, int32_t* parsedInts); + /** + * Parse a pattern string within the given Replaceable and a parsing + * pattern. Characters are matched literally and case-sensitively + * except for the following special characters: + * + * ~ zero or more uprv_isRuleWhiteSpace chars + * + * If end of pattern is reached with all matches along the way, + * pos is advanced to the first unparsed index and returned. + * Otherwise -1 is returned. + * @param pat pattern that controls parsing + * @param text text to be parsed, starting at index + * @param index offset to first character to parse + * @param limit offset after last character to parse + * @return index after last parsed character, or -1 on parse failure. + */ + static int32_t parsePattern(const UnicodeString& pat, + const Replaceable& text, + int32_t index, + int32_t limit); + /** * Parse an integer at pos, either of the form \d+ or of the form * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,