ICU-1130 added parsePattern for use by Name-Any

X-SVN-Rev: 9850
2002-09-06 23:37:16 +00:00 · 2002-09-06 23:37:16 +00:00 · d96f819545
commit d96f819545
parent 85d23479b6
2 changed files with 151 additions and 0 deletions
--- a/icu4c/source/common/util.cpp
+++ b/icu4c/source/common/util.cpp
@ -144,6 +144,49 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
    return p;
 }

+/**
+ * Skip over whitespace in a Replaceable.  Whitespace is defined by
+ * uprv_isRuleWhiteSpace().  Skipping may be done in the forward or
+ * reverse direction.  In either case, the leftmost index will be
+ * inclusive, and the rightmost index will be exclusive.  That is,
+ * given a range defined as [start, limit), the call
+ * skipWhitespace(text, start, limit) will advance start past leading
+ * whitespace, whereas the call skipWhitespace(text, limit, start),
+ * will back up limit past trailing whitespace.
+ * @param text the text to be analyzed
+ * @param pos either the start or limit of a range of 'text', to skip
+ * leading or trailing whitespace, respectively
+ * @param stop either the limit or start of a range of 'text', to skip
+ * leading or trailing whitespace, respectively
+ * @return the new start or limit, depending on what was passed in to
+ * 'pos'
+ */
+//?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
+//?int32_t ICU_Utility::skipWhitespace(const Replaceable& text,
+//?                                    int32_t pos, int32_t stop) {
+//?    UChar32 c;
+//?    UBool isForward = (stop >= pos);
+//?
+//?    if (!isForward) {
+//?        --pos; // pos is a limit, so back up by one
+//?    }
+//?    
+//?    while (pos != stop &&
+//?           uprv_isRuleWhiteSpace(c = text.char32At(pos))) {
+//?        if (isForward) {
+//?            pos += UTF_CHAR_LENGTH(c);
+//?        } else {
+//?            pos -= UTF_CHAR_LENGTH(c);
+//?        }
+//?    }
+//?
+//?    if (!isForward) {
+//?        ++pos; // make pos back into a limit
+//?    }
+//?
+//?    return pos;
+//?}
+
 /**
 * Parse a single non-whitespace character 'ch', optionally
 * preceded by whitespace.
@ -231,6 +274,72 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_
    return pos;
 }

+/**
+ * Parse a pattern string within the given Replaceable and a parsing
+ * pattern.  Characters are matched literally and case-sensitively
+ * except for the following special characters:
+ *
+ * ~  zero or more uprv_isRuleWhiteSpace chars
+ *
+ * If end of pattern is reached with all matches along the way,
+ * pos is advanced to the first unparsed index and returned.
+ * Otherwise -1 is returned.
+ * @param pat pattern that controls parsing
+ * @param text text to be parsed, starting at index
+ * @param index offset to first character to parse
+ * @param limit offset after last character to parse
+ * @return index after last parsed character, or -1 on parse failure.
+ */
+int32_t ICU_Utility::parsePattern(const UnicodeString& pat,
+                                  const Replaceable& text,
+                                  int32_t index,
+                                  int32_t limit) {
+    int32_t ipat = 0;
+
+    // empty pattern matches immediately
+    if (ipat == pat.length()) {
+        return index;
+    }
+
+    UChar32 cpat = pat.char32At(ipat);
+
+    while (index < limit) {
+        UChar32 c = text.char32At(index);
+
+        // parse \s*
+        if (cpat == 126 /*~*/) {
+            if (uprv_isRuleWhiteSpace(c)) {
+                index += UTF_CHAR_LENGTH(c);
+                continue;
+            } else {
+                if (++ipat == pat.length()) {
+                    return index; // success; c unparsed
+                }
+                // fall thru; process c again with next cpat
+            }
+        }
+
+        // parse literal
+        else if (c == cpat) {
+            index += UTF_CHAR_LENGTH(c);
+            ipat += UTF_CHAR_LENGTH(cpat);
+            if (ipat == pat.length()) {
+                return index; // success; c parsed
+            }
+            // fall thru; get next cpat
+        }
+
+        // match failure of literal
+        else {
+            return -1;
+        }
+
+        cpat = pat.char32At(ipat);
+    }
+
+    return -1; // text ended before end of pat
+}
+
 static const UChar ZERO_X[] = {48, 120, 0}; // "0x"

 /**
--- a/icu4c/source/common/util.h
+++ b/icu4c/source/common/util.h
@ -86,6 +86,27 @@ class U_COMMON_API ICU_Utility /* not : public UObject because all methods are s
    static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
                                  UBool advance = FALSE);

+    /**
+     * Skip over whitespace in a Replaceable.  Whitespace is defined by
+     * uprv_isRuleWhiteSpace().  Skipping may be done in the forward or
+     * reverse direction.  In either case, the leftmost index will be
+     * inclusive, and the rightmost index will be exclusive.  That is,
+     * given a range defined as [start, limit), the call
+     * skipWhitespace(text, start, limit) will advance start past leading
+     * whitespace, whereas the call skipWhitespace(text, limit, start),
+     * will back up limit past trailing whitespace.
+     * @param text the text to be analyzed
+     * @param pos either the start or limit of a range of 'text', to skip
+     * leading or trailing whitespace, respectively
+     * @param stop either the limit or start of a range of 'text', to skip
+     * leading or trailing whitespace, respectively
+     * @return the new start or limit, depending on what was passed in to
+     * 'pos'
+     */
+//?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
+//?    static int32_t skipWhitespace(const Replaceable& text,
+//?                                  int32_t pos, int32_t stop);
+
    /**
     * Parse a single non-whitespace character 'ch', optionally
     * preceded by whitespace.
@ -122,6 +143,27 @@ class U_COMMON_API ICU_Utility /* not : public UObject because all methods are s
    static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
                                const UnicodeString& pattern, int32_t* parsedInts);
        
+    /**
+     * Parse a pattern string within the given Replaceable and a parsing
+     * pattern.  Characters are matched literally and case-sensitively
+     * except for the following special characters:
+     *
+     * ~  zero or more uprv_isRuleWhiteSpace chars
+     *
+     * If end of pattern is reached with all matches along the way,
+     * pos is advanced to the first unparsed index and returned.
+     * Otherwise -1 is returned.
+     * @param pat pattern that controls parsing
+     * @param text text to be parsed, starting at index
+     * @param index offset to first character to parse
+     * @param limit offset after last character to parse
+     * @return index after last parsed character, or -1 on parse failure.
+     */
+    static int32_t parsePattern(const UnicodeString& pat,
+                                const Replaceable& text,
+                                int32_t index,
+                                int32_t limit);
+
    /**
     * Parse an integer at pos, either of the form \d+ or of the form
     * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,