/*
**********************************************************************
* Copyright (C) 1999-2000 IBM and others. All rights reserved.
**********************************************************************
* Date Name Description
* 03/22/2000 helena Creation.
**********************************************************************
*/
#ifndef SRCHITER_H
#define SRCHITER_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/brkiter.h"
/**
* SearchIterator
is an abstract base class that provides methods
* to search for a pattern within a text string. Instances of
* SearchIterator
maintain a current position and scan over
* the target text, returning the indices the pattern is matched
* and the length of each match.
*
* SearchIterator
is an abstract base class that defines a
* protocol for text searching. Subclasses provide concrete implementations of
* various search algorithms. For example, {@link StringSearch}
* implements language-sensitive pattern matching based on the comparison rules
* defined in a {@link RuleBasedCollator} object.
*
* Internally, SearchIterator
scans text using a
* {@link CharacterIterator}, and is thus able to scan text held
* by any object implementing that protocol. A StringCharacterIterator
* is used to scan String
objects passed to setText
.
*
* SearchIterator
provides an API that is similar to that of
* other text iteration classes such as BreakIterator
. Using this
* class, it is easy to scan through text looking for all occurances of a
* given pattern. The following example uses a StringSearch
object to
* find all instances of "fox" in the target string. Any other subclass of
* SearchIterator
can be used in an identical manner.
*
* UnicodeString target("The quick brown fox jumped over the lazy fox");
* UnicodeString pattern("fox");
*
* SearchIterator *iter = new StringSearch(pattern, target);
*
* for (int pos = iter->first(); pos != SearchIterator::DONE; pos = iter->next()) {
* printf("Found match at %d pos, length is %d\n", pos, iter.getMatchLength());
* }
*
*
* @see StringSearch
*/
class SearchIterator {
public:
/**
* DONE is returned by previous() and next() after all valid
* matches have been returned, and by first() and last() if
* there are no matches at all.
*/
static const int32_t DONE;
//=======================================================================
// boilerplate
//=======================================================================
/**
* Destructor
*/
virtual ~SearchIterator();
/** copy constructor */
SearchIterator(const SearchIterator& other);
/**
* Equality operator. Returns TRUE if both BreakIterators are of the
* same class, have the same behavior, and iterate over the same text.
*/
virtual UBool operator==(const SearchIterator& that) const;
/**
* Not-equal operator. If operator== returns TRUE, this returns FALSE,
* and vice versa.
*/
UBool operator!=(const SearchIterator& that) const;
/**
* Returns a newly-constructed RuleBasedBreakIterator with the same
* behavior, and iterating over the same text, as this one.
*/
virtual SearchIterator* clone(void) const = 0;
/**
* Return a polymorphic class ID for this object. Different subclasses
* will return distinct unequal values.
* @stable
*/
virtual UClassID getDynamicClassID(void) const = 0;
/**
* Return the first index at which the target text matches the search
* pattern. The iterator is adjusted so that its current index
* (as returned by {@link #getIndex}) is the match posisition if one was found
* and DONE
if one was not.
*
* @return The character index of the first match, or DONE
if there
* are no matches.
*/
int32_t first(void);
/**
* Return the first index greater than pos at which the target
* text matches the search pattern. The iterator is adjusted so that its current index
* (as returned by {@link #getIndex}) is the match posisition if one was found
* and DONE
if one was not.
*
* @return The character index of the first match following pos
,
* or DONE if there are no matches.
*/
int32_t following(int32_t pos);
/**
* Return the last index in the target text at which it matches
* the search pattern and adjusts the iteration to point to that position.
*
* @return The index of the first match, or DONE if there
* are no matches.
*/
int32_t last(void);
/**
* Return the first index less than pos
at which the target
* text matches the search pattern. The iterator is adjusted so that its current index
* (as returned by {@link #getIndex}) is the match posisition if one was found
* and DONE if one was not.
*
* @return The character index of the first match preceding pos
,
* or DONE
if there are no matches.
*/
int32_t preceding(int32_t pos);
/**
* Return the index of the next point at which the text matches the
* search pattern, starting from the current position
*
* @return The index of the next match after the current position,
* or DONE
if there are no more matches.
*
* @see #first
*/
int32_t next(void);
/**
* Return the index of the previous point at which the text matches
* the search pattern, starting at the current position
*
* @return The index of the previous match before the current position,
* or DONE
if there are no more matches.
*/
int32_t previous(void);
/**
* Return the current index in the text being searched.
* If the iteration has gone past the end of the text
* (or past the beginning for a backwards search),
* {@link #DONE} is returned.
*/
int32_t getIndex(void) const;
/**
* Determines whether overlapping matches are returned. If this
* property is true
, matches that begin within the
* boundry of the previous match are considered valid and will
* be returned. For example, when searching for "abab" in the
* target text "ababab", both offsets 0 and 2 will be returned
* as valid matches if this property is true
.
*
* The default setting of this property is true
*/
void setOverlapping(UBool allowOverlap);
/**
* Determines whether overlapping matches are returned.
*
* @see #setOverlapping
*/
UBool isOverlapping(void) const;
/**
* Returns the length of text in the target which matches the search
* pattern. This call returns a valid result only after a successful
* call to {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
* Just after construction, or after a searching method returns
* DONE, this method will return 0.
*
* @return The length of the match in the target text, or 0 if there
* is no match currently.
*/
int32_t getMatchLength(void) const;
/**
* Set the BreakIterator that will be used to restrict the points
* at which matches are detected.
*
* @param breaker A {@link java.text.BreakIterator BreakIterator}
* that will be used to restrict the points
* at which matches are detected. If a match is found, but the match's start
* or end index is not a boundary as determined by
* the BreakIterator, the match will be rejected and
* another will be searched for.
*
* If this parameter is null, no break
* detection is attempted.
*
* @see #getBreakIterator
*/
/* HSYS : Check, aliasing or owning */
void setBreakIterator(const BreakIterator* iterator);
/**
* Returns the BreakIterator that is used to restrict the points
* at which matches are detected. This will be the same object
* that was passed to the constructor or to setBreakIterator
.
* Note that null is a legal value; it means that break
* detection should not be attempted.
*
* @see #setBreakIterator
*/
const BreakIterator& getBreakIterator(void) const;
/**
* Set the target text which should be searched and resets the
* iterator's position to point before the start of the target text.
* This method is useful if you want to re-use an iterator to
* search for the same pattern within a different body of text.
*
* @see #getTarget
*/
virtual void setTarget(const UnicodeString& newText);
/**
* Set the target text which should be searched and resets the
* iterator's position to point before the start of the target text.
* This method is useful if you want to re-use an iterator to
* search for the same pattern within a different body of text.
*
* @see #getTarget
*/
virtual void adoptTarget(CharacterIterator* iterator);
/**
* Return the target text which is being searched
*
* @see #setTarget
*/
const CharacterIterator& getTarget(void) const;
/** Reset the iteration.
*/
virtual void reset(void);
/**
* Returns the text that was matched by the most recent call to
* {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
* If the iterator is not pointing at a valid match (e.g. just after
* construction or after DONE has been returned, returns
* an empty string.
*/
void getMatchedText(UnicodeString& result);
//-------------------------------------------------------------------
// Protected interface for subclasses
//-------------------------------------------------------------------
protected:
SearchIterator();
/**
* Constructor for use by subclasses
*
* @param target The target text to be searched. This is for internal * use by this class. Subclasses need to maintain their * own reference to or iterator over the target text * for use by their {@link #handleNext handleNext} and * {@link #handlePrev handlePrev} methods. The target will * be adopted and owned by the SearchIterator object. * * @param breaker A {@link BreakIterator} that is used to restrict the points * at which matches are detected. If handleNext or * handlePrev finds a match, but the match's start * or end index is not a boundary as determined by * the BreakIterator, the match is rejected and * handleNext or handlePrev is called again. * If this parameter is null, no break * detection is attempted. * */ SearchIterator(CharacterIterator* target, BreakIterator* breaker); /** * Abstract method which subclasses override to provide the mechanism * for finding the next match in the target text. This allows different * subclasses to provide different search algorithms. *
* If a match is found, the implementation should return the index at * which the match starts and should call {@link #setMatchLength setMatchLength} * with the number of characters in the target * text that make up the match. If no match is found, the method * should return DONE and should not call setMatchLength. *
* @param startAt The index in the target text at which the search * should start. * * @see #setMatchLength */ virtual int32_t handleNext(int32_t startAt, UErrorCode& status) = 0; /** * Abstract method which subclasses override to provide the mechanism * for finding the previous match in the target text. This allows different * subclasses to provide different search algorithms. *
* If a match is found, the implementation should return the index at * which the match starts and should call {@link #setMatchLength setMatchLength} * with the number of characters in the target * text that make up the match. If no match is found, the method * should return DONE and should not call setMatchLength. *
* @param startAt The index in the target text at which the search
* should start.
*
* @see #setMatchLength
*/
virtual int32_t handlePrev(int32_t startAt, UErrorCode& status) = 0;
/**
* Sets the length of the currently matched string in the target text.
* Subclasses' handleNext
and handlePrev
* methods should call this when they find a match in the target text.
*/
void setMatchLength(int32_t length);
//-------------------------------------------------------------------
// Privates
//
private:
/**
* Class ID
*/
static char fgClassID;
private:
/**
* Private value indicating that the iterator is pointing
* before the beginning of the target text.
*/
static const int32_t BEFORE;
/**
* Internal method used by preceding and following. Sets the index
* to point to the given position, and clears any state that's
* affected.
*/
void setIndex(int32_t pos);
/**
* Determine whether the target text bounded by start
and
* end
is one or more whole units of text as determined by
* the current BreakIterator
.
*/
UBool isBreakUnit(int32_t start, int32_t end);
//-------------------------------------------------------------------------
// Private data...
//-------------------------------------------------------------------------
int32_t index; // Current position in the target text
int32_t length; // Length of matched text, or 0
UBool overlap; // Return overlapping matches?
CharacterIterator* target; // Target text to be searched
BreakIterator* breaker; // Break iterator to constrain matches
UBool backward;
};
inline UBool SearchIterator::operator!=(const SearchIterator& that) const
{
return !operator==(that);
}
#endif