1999-12-28 23:39:02 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
********************************************************************
|
|
|
|
*
|
|
|
|
* Copyright (C) 1997-1999, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
********************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef CHARITER_H
|
|
|
|
#define CHARITER_H
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/unistr.h"
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Abstract class defining a protcol for accessing characters in a text-storage object.
|
|
|
|
<P>Examples:<P>
|
|
|
|
|
|
|
|
Function processing characters, in this example simple output
|
|
|
|
<pre>
|
|
|
|
. void processChar( UChar c )
|
|
|
|
. {
|
|
|
|
. cout << " " << c;
|
|
|
|
. }
|
|
|
|
</pre>
|
|
|
|
Traverse the text from start to finish
|
|
|
|
<pre>
|
|
|
|
. void traverseForward(CharacterIterator& iter)
|
|
|
|
. {
|
|
|
|
. for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
|
|
|
|
. processChar(c);
|
|
|
|
. }
|
|
|
|
. }
|
|
|
|
</pre>
|
|
|
|
Traverse the text backwards, from end to start
|
|
|
|
<pre>
|
|
|
|
. void traverseBackward(CharacterIterator& iter)
|
|
|
|
. {
|
|
|
|
. for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
|
|
|
|
. processChar(c);
|
|
|
|
. }
|
|
|
|
. }
|
|
|
|
</pre>
|
|
|
|
Traverse both forward and backward from a given position in the text.
|
|
|
|
Calls to notBoundary() in this example represents some additional stopping criteria.
|
|
|
|
<pre>
|
|
|
|
. void traverseOut(CharacterIterator& iter, UTextOffset pos)
|
|
|
|
. {
|
|
|
|
. UChar c;
|
|
|
|
. for (c = iter.setIndex(pos);
|
|
|
|
. c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
|
|
|
. c = iter.next()) {}
|
|
|
|
. UTextOffset end = iter.getIndex();
|
|
|
|
. for (c = iter.setIndex(pos);
|
|
|
|
. c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
|
|
|
. c = iter.previous()) {}
|
|
|
|
. UTextOffset start = iter.getIndex() + 1;
|
|
|
|
.
|
|
|
|
. cout << "start: " << start << " end: " << end << endl;
|
|
|
|
. for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
|
|
|
|
. processChar(c);
|
|
|
|
. }
|
|
|
|
. }
|
|
|
|
</pre>
|
|
|
|
Creating a StringCharacterIteratorand calling the test functions
|
|
|
|
<pre>
|
|
|
|
. void CharacterIterator_Example( void )
|
|
|
|
. {
|
|
|
|
. cout << endl << "===== CharacterIterator_Example: =====" << endl;
|
|
|
|
. UnicodeString text("Ein kleiner Satz.");
|
|
|
|
. StringCharacterIterator iterator(text);
|
|
|
|
. cout << "----- traverseForward: -----------" << endl;
|
|
|
|
. traverseForward( iterator );
|
|
|
|
. cout << endl << endl << "----- traverseBackward: ----------" << endl;
|
|
|
|
. traverseBackward( iterator );
|
|
|
|
. cout << endl << endl << "----- traverseOut: ---------------" << endl;
|
|
|
|
. traverseOut( iterator, 7 );
|
|
|
|
. cout << endl << endl << "-----" << endl;
|
|
|
|
. }
|
|
|
|
</pre>
|
|
|
|
*/
|
|
|
|
class U_COMMON_API CharacterIterator
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/**
|
|
|
|
* Value returned by most of CharacterIterator's functions
|
|
|
|
* when the iterator has reached the limits of its iteration. */
|
2000-04-12 19:36:30 +00:00
|
|
|
enum { DONE = 0xffff };
|
1999-12-28 23:39:02 +00:00
|
|
|
|
|
|
|
/**
|
2000-03-22 18:31:40 +00:00
|
|
|
* Destructor.
|
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual ~CharacterIterator();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true when both iterators refer to the same
|
2000-03-22 18:31:40 +00:00
|
|
|
* character in the same character-storage object.
|
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual bool_t operator==(const CharacterIterator& that) const = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true when the iterators refer to different
|
|
|
|
* text-storage objects, or to different characters in the
|
2000-03-22 18:31:40 +00:00
|
|
|
* same text-storage object.
|
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
bool_t operator!=(const CharacterIterator& that) const { return !operator==(that); }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a pointer to a new CharacterIterator of the same
|
|
|
|
* concrete class as this one, and referring to the same
|
|
|
|
* character in the same text-storage object as this one. The
|
2000-03-22 18:31:40 +00:00
|
|
|
* caller is responsible for deleting the new clone.
|
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual CharacterIterator*
|
|
|
|
clone(void) const = 0;
|
|
|
|
|
|
|
|
/**
|
2000-03-22 18:31:40 +00:00
|
|
|
* Generates a hash code for this iterator.
|
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual int32_t hashCode(void) const = 0;
|
|
|
|
|
|
|
|
/**
|
2000-04-12 19:36:30 +00:00
|
|
|
* Sets the iterator to refer to the first code unit in its
|
|
|
|
* iteration range, and returns that code unit,
|
2000-03-22 18:31:40 +00:00
|
|
|
* @draft
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual UChar first(void) = 0;
|
|
|
|
|
|
|
|
/**
|
2000-04-12 19:36:30 +00:00
|
|
|
* Sets the iterator to refer to the first code point in its
|
|
|
|
* iteration range, and returns that code unit,
|
|
|
|
* @draft
|
|
|
|
*/
|
|
|
|
virtual UChar32 first32(void) = 0;
|
|
|
|
|
|
|
|
virtual UTextOffset setToStart() = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sets the iterator to refer to the last code unit in its
|
|
|
|
* iteration range, and returns that code unit.
|
2000-03-22 18:31:40 +00:00
|
|
|
* @draft
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual UChar last(void) = 0;
|
|
|
|
|
|
|
|
/**
|
2000-04-12 19:36:30 +00:00
|
|
|
* Sets the iterator to refer to the last code point in its
|
|
|
|
* iteration range, and returns that code unit.
|
|
|
|
* @draft
|
|
|
|
*/
|
|
|
|
virtual UChar32 last32(void) = 0;
|
|
|
|
|
|
|
|
virtual UTextOffset setToEnd() = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sets the iterator to refer to the "position"-th code unit
|
1999-12-28 23:39:02 +00:00
|
|
|
* in the text-storage object the iterator refers to, and
|
2000-04-12 19:36:30 +00:00
|
|
|
* returns that code unit.
|
2000-03-22 18:31:40 +00:00
|
|
|
* @draft
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual UChar setIndex(UTextOffset position) = 0;
|
|
|
|
|
|
|
|
/**
|
2000-04-12 19:36:30 +00:00
|
|
|
* Sets the iterator to refer to the beginning of the code point
|
|
|
|
* that contains the "position"-th code unit
|
|
|
|
* in the text-storage object the iterator refers to, and
|
|
|
|
* returns that code point.
|
|
|
|
* @draft
|
|
|
|
*/
|
|
|
|
virtual UChar32 setIndex32(UTextOffset position) = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the code unit the iterator currently refers to.
|
2000-03-22 18:31:40 +00:00
|
|
|
* @draft
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual UChar current(void) const = 0;
|
|
|
|
|
|
|
|
/**
|
2000-04-12 19:36:30 +00:00
|
|
|
* Returns the code point the iterator currently refers to.
|
|
|
|
* @draft
|
|
|
|
*/
|
|
|
|
virtual UChar32 current32(void) const = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Advances to the next code unit in the iteration range
|
|
|
|
* (toward last()), and returns that code unit. If there are
|
|
|
|
* no more code units to return, returns DONE.
|
2000-03-22 18:31:40 +00:00
|
|
|
* @draft
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual UChar next(void) = 0;
|
|
|
|
|
|
|
|
/**
|
2000-04-12 19:36:30 +00:00
|
|
|
* Gets the current code unit for returning and advances to the next code unit
|
|
|
|
* in the iteration range
|
|
|
|
* (toward last()). If there are
|
|
|
|
* no more code units to return, returns DONE.
|
|
|
|
* @draft
|
|
|
|
*/
|
|
|
|
virtual UChar nextPostInc(void) = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Advances to the next code point in the iteration range
|
|
|
|
* (toward last()), and returns that code point. If there are
|
|
|
|
* no more code points to return, returns DONE.
|
|
|
|
* @draft
|
|
|
|
*/
|
|
|
|
virtual UChar32 next32(void) = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the current code point for returning and advances to the next code point
|
|
|
|
* in the iteration range
|
|
|
|
* (toward last()). If there are
|
|
|
|
* no more code points to return, returns DONE.
|
|
|
|
* @draft
|
|
|
|
*/
|
|
|
|
virtual UChar32 next32PostInc(void) = 0;
|
|
|
|
|
|
|
|
virtual bool_t hasNext() = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Advances to the previous code unit in the iteration rance
|
|
|
|
* (toward first()), and returns that code unit. If there are
|
|
|
|
* no more code units to return, returns DONE.
|
2000-03-22 18:31:40 +00:00
|
|
|
* @draft
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual UChar previous(void) = 0;
|
|
|
|
|
2000-04-12 19:36:30 +00:00
|
|
|
/**
|
|
|
|
* Advances to the previous code point in the iteration rance
|
|
|
|
* (toward first()), and returns that code point. If there are
|
|
|
|
* no more code points to return, returns DONE.
|
|
|
|
* @draft
|
|
|
|
*/
|
|
|
|
virtual UChar32 previous32(void) = 0;
|
|
|
|
|
|
|
|
virtual bool_t hasPrevious() = 0;
|
|
|
|
|
1999-12-28 23:39:02 +00:00
|
|
|
/**
|
|
|
|
* Returns the numeric index in the underlying text-storage
|
|
|
|
* object of the character returned by first(). Since it's
|
|
|
|
* possible to create an iterator that iterates across only
|
|
|
|
* part of a text-storage object, this number isn't
|
2000-03-22 18:31:40 +00:00
|
|
|
* necessarily 0.
|
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual UTextOffset startIndex(void) const = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the numeric index in the underlying text-storage
|
|
|
|
* object of the position immediately BEYOND the character
|
2000-03-22 18:31:40 +00:00
|
|
|
* returned by last().
|
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual UTextOffset endIndex(void) const = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the numeric index in the underlying text-storage
|
|
|
|
* object of the character the iterator currently refers to
|
2000-03-22 18:31:40 +00:00
|
|
|
* (i.e., the character returned by current()).
|
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual UTextOffset getIndex(void) const = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Copies the text under iteration into the UnicodeString
|
2000-03-22 18:31:40 +00:00
|
|
|
* referred to by "result".
|
|
|
|
* @param result Receives a copy of the text under iteration.
|
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual void getText(UnicodeString& result) = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a UClassID for this CharacterIterator ("poor man's
|
|
|
|
* RTTI").<P> Despite the fact that this function is public,
|
2000-03-22 18:31:40 +00:00
|
|
|
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
|
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:39:02 +00:00
|
|
|
virtual UClassID getDynamicClassID(void) const = 0;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
CharacterIterator() {}
|
|
|
|
CharacterIterator(const CharacterIterator&) {}
|
|
|
|
CharacterIterator& operator=(const CharacterIterator&) { return *this; }
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|