ICU-8305 doxygen cleanups for regular expression API.

X-SVN-Rev: 29812
This commit is contained in:
Andy Heninger 2011-04-16 01:27:00 +00:00
parent 8148726df2
commit 6d21ef8ca3
2 changed files with 207 additions and 21 deletions

View File

@ -901,8 +901,8 @@ public:
/** /**
* Returns a shallow clone of the entire live input string with the UText current native index * Returns a shallow clone of the entire live input string with the UText current native index
* set to the beginning of the requested group. * set to the beginning of the requested group.
* Note that copying the entire input string may cause significant performance and memory issues. *
* @param dest The UText into which the input should be copied, or NULL to create a new UText * @param dest The UText into which the input should be cloned, or NULL to create a new UText
* @param group_len A reference to receive the length of the desired capture group * @param group_len A reference to receive the length of the desired capture group
* @param status A reference to a UErrorCode to receive any errors. * @param status A reference to a UErrorCode to receive any errors.
* Possible errors are U_REGEX_INVALID_STATE if no match * Possible errors are U_REGEX_INVALID_STATE if no match
@ -915,6 +915,18 @@ public:
virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const; virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
/** /**
* Returns a shallow clone of the entire live input string with the UText current native index
* set to the beginning of the requested group.
*
* @param group_Num The capture group number.
* @param dest The UText into which the input should be cloned, or NULL to create a new UText.
* @param group_len A reference to receive the length of the desired capture group
* @param status A reference to a UErrorCode to receive any errors.
* Possible errors are U_REGEX_INVALID_STATE if no match
* has been attempted or the last match failed and
* U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
* @return dest if non-NULL, a shallow copy of the input text otherwise
*
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const; virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
@ -947,6 +959,10 @@ public:
virtual int32_t start(UErrorCode &status) const; virtual int32_t start(UErrorCode &status) const;
/** /**
* Returns the index in the input string of the start of the text matched
* during the previous match operation.
* @param status a reference to a UErrorCode to receive any errors.
* @return The (native) position in the input string of the start of the last match.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
virtual int64_t start64(UErrorCode &status) const; virtual int64_t start64(UErrorCode &status) const;
@ -968,6 +984,16 @@ public:
virtual int32_t start(int32_t group, UErrorCode &status) const; virtual int32_t start(int32_t group, UErrorCode &status) const;
/** /**
* Returns the index in the input string of the start of the text matched by the
* specified capture group during the previous match operation. Return -1 if
* the capture group exists in the pattern, but was not part of the last match.
*
* @param group the capture group number.
* @param status A reference to a UErrorCode to receive any errors. Possible
* errors are U_REGEX_INVALID_STATE if no match has been
* attempted or the last match failed, and
* U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
* @return the (native) start position of substring matched by the specified group.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
virtual int64_t start64(int32_t group, UErrorCode &status) const; virtual int64_t start64(int32_t group, UErrorCode &status) const;
@ -976,6 +1002,7 @@ public:
/** /**
* Returns the index in the input string of the first character following the * Returns the index in the input string of the first character following the
* text matched during the previous match operation. * text matched during the previous match operation.
*
* @param status A reference to a UErrorCode to receive any errors. Possible * @param status A reference to a UErrorCode to receive any errors. Possible
* errors are U_REGEX_INVALID_STATE if no match has been * errors are U_REGEX_INVALID_STATE if no match has been
* attempted or the last match failed. * attempted or the last match failed.
@ -988,6 +1015,16 @@ public:
virtual int32_t end(UErrorCode &status) const; virtual int32_t end(UErrorCode &status) const;
/** /**
* Returns the index in the input string of the first character following the
* text matched during the previous match operation.
*
* @param status A reference to a UErrorCode to receive any errors. Possible
* errors are U_REGEX_INVALID_STATE if no match has been
* attempted or the last match failed.
* @return the index of the last character matched, plus one.
* The index value returned is a native index, corresponding to
* code units for the underlying encoding type, for example,
* a byte index for UTF-8.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
virtual int64_t end64(UErrorCode &status) const; virtual int64_t end64(UErrorCode &status) const;
@ -996,6 +1033,7 @@ public:
/** /**
* Returns the index in the input string of the character following the * Returns the index in the input string of the character following the
* text matched by the specified capture group during the previous match operation. * text matched by the specified capture group during the previous match operation.
*
* @param group the capture group number * @param group the capture group number
* @param status A reference to a UErrorCode to receive any errors. Possible * @param status A reference to a UErrorCode to receive any errors. Possible
* errors are U_REGEX_INVALID_STATE if no match has been * errors are U_REGEX_INVALID_STATE if no match has been
@ -1012,6 +1050,20 @@ public:
virtual int32_t end(int32_t group, UErrorCode &status) const; virtual int32_t end(int32_t group, UErrorCode &status) const;
/** /**
* Returns the index in the input string of the character following the
* text matched by the specified capture group during the previous match operation.
*
* @param group the capture group number
* @param status A reference to a UErrorCode to receive any errors. Possible
* errors are U_REGEX_INVALID_STATE if no match has been
* attempted or the last match failed and
* U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
* @return the index of the first character following the text
* captured by the specified group during the previous match operation.
* Return -1 if the capture group exists in the pattern but was not part of the match.
* The index value returned is a native index, corresponding to
* code units for the underlying encoding type, for example,
* a byte index for UTF8.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
virtual int64_t end64(int32_t group, UErrorCode &status) const; virtual int64_t end64(int32_t group, UErrorCode &status) const;
@ -1198,6 +1250,11 @@ public:
virtual int32_t regionStart() const; virtual int32_t regionStart() const;
/** /**
* Reports the start index of this matcher's region. The searches this matcher
* conducts are limited to finding matches within regionStart (inclusive) and
* regionEnd (exclusive).
*
* @return The starting (native) index of this matcher's region.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
virtual int64_t regionStart64() const; virtual int64_t regionStart64() const;
@ -1214,6 +1271,11 @@ public:
virtual int32_t regionEnd() const; virtual int32_t regionEnd() const;
/** /**
* Reports the end (limit) index (exclusive) of this matcher's region. The searches
* this matcher conducts are limited to finding matches within regionStart
* (inclusive) and regionEnd (exclusive).
*
* @return The ending point (native) of this matcher's region.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
virtual int64_t regionEnd64() const; virtual int64_t regionEnd64() const;

View File

@ -452,6 +452,23 @@ uregex_matches(URegularExpression *regexp,
/** /**
* 64bit version of uregex_matches. * 64bit version of uregex_matches.
* Attempts to match the input string against the pattern.
* To succeed, the match must extend to the end of the string,
* or cover the complete match region.
*
* If startIndex >= zero the match operation starts at the specified
* index and must extend to the end of the input string. Any region
* that has been specified is reset.
*
* If startIndex == -1 the match must cover the input region, or the entire
* input string if no region has been set. This directly corresponds to
* Matcher.matches() in Java
*
* @param regexp The compiled regular expression.
* @param startIndex The input string (native) index at which to begin matching, or -1
* to match the input Region.
* @param status Receives errors detected by this function.
* @return TRUE if there is a match
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
U_DRAFT UBool U_EXPORT2 U_DRAFT UBool U_EXPORT2
@ -489,6 +506,26 @@ uregex_lookingAt(URegularExpression *regexp,
/** /**
* 64bit version of uregex_lookingAt. * 64bit version of uregex_lookingAt.
* Attempts to match the input string, starting from the specified index, against the pattern.
* The match may be of any length, and is not required to extend to the end
* of the input string. Contrast with uregex_matches().
*
* <p>If startIndex is >= 0 any input region that was set for this
* URegularExpression is reset before the operation begins.
*
* <p>If the specified starting index == -1 the match begins at the start of the input
* region, or at the start of the full string if no region has been specified.
* This corresponds directly with Matcher.lookingAt() in Java.
*
* <p>If the match succeeds then more information can be obtained via the
* <code>uregexp_start()</code>, <code>uregexp_end()</code>,
* and <code>uregexp_group()</code> functions.</p>
*
* @param regexp The compiled regular expression.
* @param startIndex The input string (native) index at which to begin matching, or
* -1 to match the Input Region
* @param status A reference to a UErrorCode to receive any errors.
* @return TRUE if there is a match.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
U_DRAFT UBool U_EXPORT2 U_DRAFT UBool U_EXPORT2
@ -522,6 +559,22 @@ uregex_find(URegularExpression *regexp,
/** /**
* 64bit version of uregex_find. * 64bit version of uregex_find.
* Find the first matching substring of the input string that matches the pattern.
* If startIndex is >= zero the search for a match begins at the specified index,
* and any match region is reset. This corresponds directly with
* Matcher.find(startIndex) in Java.
*
* If startIndex == -1 the search begins at the start of the input region,
* or at the start of the full string if no region has been specified.
*
* If a match is found, <code>uregex_start(), uregex_end()</code>, and
* <code>uregex_group()</code> will provide more information regarding the match.
*
* @param regexp The compiled regular expression.
* @param startIndex The position (native) in the input string to begin the search, or
* -1 to search within the Input Region.
* @param status A reference to a UErrorCode to receive any errors.
* @return TRUE if a match is found.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
U_DRAFT UBool U_EXPORT2 U_DRAFT UBool U_EXPORT2
@ -655,6 +708,17 @@ uregex_start(URegularExpression *regexp,
/** /**
* 64bit version of uregex_start. * 64bit version of uregex_start.
* Returns the index in the input string of the start of the text matched by the
* specified capture group during the previous match operation. Return -1 if
* the capture group was not part of the last match.
* Group #0 refers to the complete range of matched text.
* Group #1 refers to the text matched by the first set of capturing parentheses.
*
* @param regexp The compiled regular expression.
* @param groupNum The capture group number
* @param status A reference to a UErrorCode to receive any errors.
* @return the starting (native) position in the input of the text matched
* by the specified group.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
U_DRAFT int64_t U_EXPORT2 U_DRAFT int64_t U_EXPORT2
@ -682,6 +746,16 @@ uregex_end(URegularExpression *regexp,
/** /**
* 64bit version of uregex_end. * 64bit version of uregex_end.
* Returns the index in the input string of the position following the end
* of the text matched by the specified capture group.
* Return -1 if the capture group was not part of the last match.
* Group #0 refers to the complete range of matched text.
* Group #1 refers to the text matched by the first set of capturing parentheses.
*
* @param regexp The compiled regular expression.
* @param groupNum The capture group number
* @param status A reference to a UErrorCode to receive any errors.
* @return the (native) index of the position following the last matched character.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
U_DRAFT int64_t U_EXPORT2 U_DRAFT int64_t U_EXPORT2
@ -709,6 +783,16 @@ uregex_reset(URegularExpression *regexp,
/** /**
* 64bit version of uregex_reset. * 64bit version of uregex_reset.
* Reset any saved state from the previous match. Has the effect of
* causing uregex_findNext to begin at the specified index, and causing
* uregex_start(), uregex_end() and uregex_group() to return an error
* indicating that there is no match information available. Clears any
* match region that may have been set.
*
* @param regexp The compiled regular expression.
* @param index The position (native) in the text at which a
* uregex_findNext() should begin searching.
* @param status A reference to a UErrorCode to receive any errors.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
U_DRAFT void U_EXPORT2 U_DRAFT void U_EXPORT2
@ -716,7 +800,8 @@ uregex_reset64(URegularExpression *regexp,
int64_t index, int64_t index,
UErrorCode *status); UErrorCode *status);
/** Sets the limits of the matching region for this URegularExpression. /**
* Sets the limits of the matching region for this URegularExpression.
* The region is the part of the input string that will be considered when matching. * The region is the part of the input string that will be considered when matching.
* Invoking this method resets any saved state from the previous match, * Invoking this method resets any saved state from the previous match,
* then sets the region to start at the index specified by the start parameter * then sets the region to start at the index specified by the start parameter
@ -743,6 +828,23 @@ uregex_setRegion(URegularExpression *regexp,
/** /**
* 64bit version of uregex_setRegion. * 64bit version of uregex_setRegion.
* Sets the limits of the matching region for this URegularExpression.
* The region is the part of the input string that will be considered when matching.
* Invoking this method resets any saved state from the previous match,
* then sets the region to start at the index specified by the start parameter
* and end at the index specified by the end parameter.
*
* Depending on the transparency and anchoring being used (see useTransparentBounds
* and useAnchoringBounds), certain constructs such as anchors may behave differently
* at or around the boundaries of the region
*
* The function will fail if start is greater than limit, or if either index
* is less than zero or greater than the length of the string being matched.
*
* @param regexp The compiled regular expression.
* @param regionStart The (native) index to begin searches at.
* @param regionLimit The (native) index to end searches at (exclusive).
* @param status A pointer to a UErrorCode to receive any errors.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
U_DRAFT void U_EXPORT2 U_DRAFT void U_EXPORT2
@ -752,8 +854,17 @@ uregex_setRegion64(URegularExpression *regexp,
UErrorCode *status); UErrorCode *status);
/** /**
* Variation on uregex_setRegion to set the region without resetting the start index * Set the matching region and the starting index for subsequent matches
* without resetting the position for subsequent matches. * in a single operation.
* This is useful because the usual function for setting the starting
* index, urgex_reset(), also resets any region limits.
*
* @param regexp The compiled regular expression.
* @param regionStart The (native) index to begin searches at.
* @param regionLimit The (native) index to end searches at (exclusive).
* @param startIndex The index in the input text at which the next
* match operation should begin.
* @param status A pointer to a UErrorCode to receive any errors.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
U_DRAFT void U_EXPORT2 U_DRAFT void U_EXPORT2
@ -778,6 +889,12 @@ uregex_regionStart(const URegularExpression *regexp,
/** /**
* 64bit version of uregex_regionStart. * 64bit version of uregex_regionStart.
* Reports the start index of the matching region. Any matches found are limited to
* to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
*
* @param regexp The compiled regular expression.
* @param status A pointer to a UErrorCode to receive any errors.
* @return The starting (native) index of this matcher's region.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
U_DRAFT int64_t U_EXPORT2 U_DRAFT int64_t U_EXPORT2
@ -800,6 +917,13 @@ uregex_regionEnd(const URegularExpression *regexp,
/** /**
* 64bit version of uregex_regionEnd. * 64bit version of uregex_regionEnd.
* Reports the end index (exclusive) of the matching region for this URegularExpression.
* Any matches found are limited to to the region bounded by regionStart (inclusive)
* and regionEnd (exclusive).
*
* @param regexp The compiled regular expression.
* @param status A pointer to a UErrorCode to receive any errors.
* @return The ending point (native) of this matcher's region.
* @draft ICU 4.6 * @draft ICU 4.6
*/ */
U_DRAFT int64_t U_EXPORT2 U_DRAFT int64_t U_EXPORT2