91db18b7b7
X-SVN-Rev: 17648
802 lines
24 KiB
C++
802 lines
24 KiB
C++
/*
|
|
*******************************************************************************
|
|
*
|
|
* Copyright (C) 2004-2005, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*
|
|
*******************************************************************************
|
|
* file name: utext.h
|
|
* encoding: US-ASCII
|
|
* tab size: 8 (not used)
|
|
* indentation:4
|
|
*
|
|
* created on: 2004oct06
|
|
* created by: Markus W. Scherer
|
|
*/
|
|
|
|
#ifndef __UTEXT_H__
|
|
#define __UTEXT_H__
|
|
|
|
/**
|
|
* \file
|
|
* \brief C API: Abstract Unicode Text API
|
|
*
|
|
* TBD
|
|
*
|
|
* Text chunks must begin and end on Unicode code point boundaries.
|
|
* That is, a chunk boundary must not fall between a leading and a trailing
|
|
* surrogate.
|
|
*
|
|
* If an input index points not at a code point boundary, then the API behaves as if
|
|
* the index is first adjusted to the immediately preceding code point boundary.
|
|
*
|
|
* Valid indexes into the text must begin at 0 (start of the text) and
|
|
* must strictly increase going forward through the text.
|
|
* (No reordering, and valid indexes must be non-negative.)
|
|
*
|
|
* Issues:
|
|
* - Code point boundary adjustment if index points not already to a boundary
|
|
* + Currently, see above: always adjusts to the immediately preceding code point boundary.
|
|
* (For example, using U8_SET_CP_START().)
|
|
* + Alternatively: Could adjust to preceding boundary when going forward (U8_SET_CP_START())
|
|
* and to the following boundary when going backward (U8_SET_CP_LIMIT()).
|
|
* Result: next32From(index) and previous32From(index) would return the same
|
|
* character.
|
|
* - Error handling - add UErrorCode parameters? Add UBool return values to void functions?
|
|
* + Add UErrorCode to extract()?
|
|
* - This version does not expose NUL-termination to the caller.
|
|
* - This version assumes option 2 (index mapping done by provider functions).
|
|
* - This version uses one API for read-only as well as read-write access,
|
|
* with a way to find out whether the text object is writable or not.
|
|
* - This version does not support absolute UTF-16 indexes when native indexes are used.
|
|
* - Should the copy() function have a UBool for whether to copy or move the text?
|
|
* - replace() needs a way to indicate that the current chunk
|
|
* (which would need to be passed in) became invalid during the operation.
|
|
* Same for copy().
|
|
*
|
|
* - Single text iterator only. Because UText owns the buffer for non-UTF-16 sources, there
|
|
* can only be a single UTextIterator on a UText. A second iterator could cause buffer
|
|
* contents to be moved, while info about what's in the buffer in the first iterator would
|
|
* not be updated.
|
|
*
|
|
* - Add some kind of failure status to construction of UTextIterator, to prevent
|
|
* two from existing? Seems hostile to developers.
|
|
* - Make UText.clone() be shallow. Don't clone the text, do clone the buffer so that
|
|
* a second UTextIterator can be instantiated. Developer hostile, again.
|
|
* - Move the buffer from the UText to the UTextIterator.
|
|
* - developer-friendly.
|
|
* - NOP (good performance) on utf-16 strings. (all can share a buffer)
|
|
* - Extra allocation for buffer on utf-8, codepage data, etc.
|
|
* - Use buffer in UText if available, otherwise allocate another
|
|
* Complicated implementation.
|
|
* Threading model?
|
|
*
|
|
*
|
|
* @see UText
|
|
*/
|
|
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/rep.h"
|
|
#include "unicode/unistr.h"
|
|
|
|
#ifndef U_HIDE_DRAFT_API
|
|
|
|
U_CDECL_BEGIN
|
|
|
|
struct UText;
|
|
typedef struct UText UText; /**< C typedef for struct UText. @draft ICU 3.4 */
|
|
|
|
struct UTextChunk;
|
|
typedef struct UTextChunk UTextChunk; /**< C typedef for struct UTextChunk. @draft ICU 3.4 */
|
|
|
|
|
|
|
|
/***************************************************************************************
|
|
*
|
|
* C Functions for creating UText wrappers around various kinds of text strings.
|
|
*
|
|
* TODO: Have a single generic close function
|
|
* utext_close(UText *t)
|
|
* so client code doesn't need to keep track of how one was opened.
|
|
*
|
|
****************************************************************************************/
|
|
|
|
|
|
|
|
/**
|
|
* Open a read-only UText implementation for UTF-8 strings.
|
|
*/
|
|
U_DRAFT UText * U_EXPORT2
|
|
utext_openUTF8(const uint8_t *s, int32_t length, UErrorCode *pErrorCode);
|
|
|
|
U_DRAFT void U_EXPORT2
|
|
utext_closeUTF8(UText *t);
|
|
|
|
U_DRAFT void U_EXPORT2
|
|
utext_resetUTF8(UText *t, const uint8_t *s, int32_t length, UErrorCode *pErrorCode);
|
|
|
|
/**
|
|
* Open a read-only UText implementation for SBCS strings.
|
|
* The implementation converts 1:1 according to the provided mapping table.
|
|
* Supplementary code points are not supported.
|
|
*
|
|
* @param toU Mapping table for conversion from SBCS to Unicode (BMP only).
|
|
* The mapping table must be available during the lifetime of the
|
|
* UText object.
|
|
*/
|
|
U_DRAFT UText * U_EXPORT2
|
|
utext_openSBCS(const UChar toU[256],
|
|
const char *s, int32_t length,
|
|
UErrorCode *pErrorCode);
|
|
|
|
U_DRAFT void U_EXPORT2
|
|
utext_closeSBCS(UText *t);
|
|
|
|
U_DRAFT void U_EXPORT2
|
|
utext_resetSBCS(UText *t, const char *s, int32_t length, UErrorCode *pErrorCode);
|
|
|
|
|
|
|
|
/**
|
|
* Set the UText object to handle a writable UnicodeString.
|
|
*/
|
|
U_DRAFT void U_EXPORT2
|
|
utext_setUnicodeString(UText *t, UnicodeString *s);
|
|
|
|
|
|
#if 0 // initially commented out to reduce testing
|
|
|
|
/**
|
|
* Open a writable UText implementation for Replaceable objects.
|
|
*/
|
|
U_DRAFT UText * U_EXPORT2
|
|
utext_openReplaceable(Replaceable *rep, UErrorCode *pErrorCode);
|
|
|
|
U_DRAFT void U_EXPORT2
|
|
utext_closeReplaceable(UText *t);
|
|
|
|
U_DRAFT void U_EXPORT2
|
|
utext_resetReplaceable(UText *t, Replaceable *rep, UErrorCode *pErrorCode);
|
|
|
|
#endif
|
|
|
|
|
|
struct UTextChunk {
|
|
/** Pointer to contents of text chunk. */
|
|
const UChar *contents;
|
|
/** Number of UChars in the chunk. */
|
|
int32_t length;
|
|
/** (Native) text index corresponding to the start of the chunk. */
|
|
int32_t start;
|
|
/** (Native) text index corresponding to the end of the chunk (contents+length). */
|
|
int32_t limit;
|
|
/** If TRUE, then non-UTF-16 indexes are used in this chunk. */
|
|
UBool nonUTF16Indexes;
|
|
/** Unused. */
|
|
UBool padding;
|
|
/** Contains sizeof(UTextChunk) and allows the future addition of fields. */
|
|
uint16_t sizeOfStruct;
|
|
};
|
|
|
|
|
|
/**
|
|
* UText provider properties (bit field indexes).
|
|
*
|
|
* @see UText
|
|
* @draft ICU 3.4
|
|
*/
|
|
enum {
|
|
/**
|
|
* The provider works with non-UTF-16 ("native") text indexes.
|
|
* For example, byte indexes into UTF-8 text or UTF-32 indexes into UTF-32 text.
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTEXT_PROVIDER_NON_UTF16_INDEXES,
|
|
/**
|
|
* The provider can return the text length inexpensively.
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTEXT_PROVIDER_LENGTH_IS_INEXPENSIVE,
|
|
/**
|
|
* Text chunks remain valid and usable until the text object is modified or
|
|
* deleted, not just until the next time the access() function is called
|
|
* (which is the default).
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTEXT_PROVIDER_STABLE_CHUNKS,
|
|
/**
|
|
* The provider supports modifying the text via the replace() and copy()
|
|
* functions.
|
|
* @see Replaceable
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTEXT_PROVIDER_WRITABLE,
|
|
/**
|
|
* There is meta data associated with the text.
|
|
* @see Replaceable::hasMetaData()
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTEXT_PROVIDER_HAS_META_DATA
|
|
};
|
|
|
|
/**
|
|
* Function type declaration for UText.clone().
|
|
*
|
|
* clone this UText.
|
|
* Text providers are not required to support clone.
|
|
* Applications must be prepared for the possibility that clone is not supported.
|
|
* TODO: should we just drop clone altogether?
|
|
*
|
|
* @return a pointer to the newly created copy of the UTex object.
|
|
* May return NULL if the object cannot be cloned.
|
|
*
|
|
* @see UText
|
|
* @draft ICU 3.4
|
|
*/
|
|
typedef UText * U_CALLCONV
|
|
UTextClone(const UText *t);
|
|
|
|
/**
|
|
* Function type declaration for UText.GetProperties().
|
|
*
|
|
* Gets the provider properties for this UText.
|
|
*
|
|
* @return Provider properties bit field.
|
|
*
|
|
* @see UText
|
|
* @draft ICU 3.4
|
|
*/
|
|
typedef int32_t U_CALLCONV
|
|
UTextGetProperties(UText *t);
|
|
|
|
/**
|
|
* Function type declaration for UText.length().
|
|
*
|
|
* TBD
|
|
*
|
|
* @see UText
|
|
* @draft ICU 3.4
|
|
*/
|
|
typedef int32_t U_CALLCONV
|
|
UTextLength(UText *t);
|
|
|
|
/**
|
|
* Function type declaration for UText.access().
|
|
*
|
|
* @param index Requested (native) index.
|
|
* @param forward If TRUE, then the returned chunk must contain text
|
|
* starting from the index, so that start<=index<limit.
|
|
* If FALSE, then the returned chunk must contain text
|
|
* before the index, so that start<index<=limit.
|
|
* @return Chunk-relative UTF-16 offset corresponding to the requested index.
|
|
* Negative value if a chunk cannot be accessed
|
|
* (the requested index is out of bounds).
|
|
*
|
|
* @see UText
|
|
* @draft ICU 3.4
|
|
*/
|
|
typedef int32_t U_CALLCONV
|
|
UTextAccess(UText *t, int32_t index, UBool forward, UTextChunk *chunk);
|
|
|
|
/**
|
|
* Function type declaration for UText.extract().
|
|
*
|
|
* TBD
|
|
*
|
|
* The extracted string must be NUL-terminated if possible.
|
|
*
|
|
* @return Number of UChars extracted.
|
|
* If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
|
|
* preflighting.
|
|
* If U_INDEX_OUTOFBOUNDS_ERROR: Start and limit do not specify
|
|
* accessible text. Return value undefined.
|
|
*
|
|
* @see UText
|
|
* @draft ICU 3.4
|
|
*/
|
|
typedef int32_t U_CALLCONV
|
|
UTextExtract(UText *t,
|
|
int32_t start, int32_t limit,
|
|
UChar *dest, int32_t destCapacity,
|
|
UErrorCode *pErrorCode);
|
|
|
|
/**
|
|
* Function type declaration for UText.replace().
|
|
*
|
|
* TBD
|
|
*
|
|
* If chunk is not NULL and the chunk contents outside of start..limit is
|
|
* modified, other than moving text after limit,
|
|
* then the chunk->contents pointer is set to NULL.
|
|
*
|
|
* @return Delta between the limit of the replacement text and the limit argument,
|
|
* that is, the signed number of (native) storage units by which
|
|
* the old and the new pieces of text differ.
|
|
*
|
|
* @see UText
|
|
* @draft ICU 3.4
|
|
*/
|
|
typedef int32_t U_CALLCONV
|
|
UTextReplace(UText *t,
|
|
int32_t start, int32_t limit,
|
|
const UChar *src, int32_t length,
|
|
UTextChunk *chunk,
|
|
UErrorCode *pErrorCode);
|
|
|
|
/**
|
|
* Function type declaration for UText.copy().
|
|
*
|
|
* Copies a substring of this object, retaining metadata.
|
|
* This method is used to duplicate or reorder substrings.
|
|
* The destination index must not overlap the source range.
|
|
*
|
|
* TBD
|
|
*
|
|
* If chunk is not NULL and the chunk contents outside of start..limit is
|
|
* modified, other than moving text after limit,
|
|
* then the chunk->contents pointer is set to NULL.
|
|
*
|
|
* @param move If TRUE, then the substring is moved, not copied/duplicated.
|
|
*
|
|
* @see UText
|
|
* @draft ICU 3.4
|
|
*/
|
|
typedef void U_CALLCONV
|
|
UTextCopy(UText *t,
|
|
int32_t start, int32_t limit,
|
|
int32_t destIndex,
|
|
UBool move,
|
|
UTextChunk *chunk,
|
|
UErrorCode *pErrorCode);
|
|
|
|
/**
|
|
* Function type declaration for UText.mapOffsetToNative().
|
|
*
|
|
* TBD
|
|
*
|
|
* @param offset UTF-16 offset relative to the current text chunk,
|
|
* 0<=offset<=chunk->length.
|
|
* @return Absolute (native) index corresponding to the UTF-16 offset
|
|
* relative to the current text chunk.
|
|
*
|
|
* @see UText
|
|
* @draft ICU 3.4
|
|
*/
|
|
typedef int32_t U_CALLCONV
|
|
UTextMapOffsetToNative(UText *t, UTextChunk *chunk, int32_t offset);
|
|
|
|
/**
|
|
* Function type declaration for UText.mapIndexToUTF16().
|
|
*
|
|
* TBD
|
|
*
|
|
* @param index Absolute (native) text index, chunk->start<=index<=chunk->limit.
|
|
* @return Chunk-relative UTF-16 offset corresponding to the absolute (native)
|
|
* index.
|
|
*
|
|
* @see UText
|
|
* @draft ICU 3.4
|
|
*/
|
|
typedef int32_t U_CALLCONV
|
|
UTextMapIndexToUTF16(UText *t, UTextChunk *chunk, int32_t index);
|
|
|
|
|
|
/**
|
|
* UText struct. Provides the interface between the generic UText access code
|
|
* and the UText provider code that works on specific kinds of
|
|
* text (utf-8, noncontiugous utf-16, whatever.)
|
|
*
|
|
* This needs to be plain C, not C++, for reasons of release-to-
|
|
* release binary compatibility. An application may create a
|
|
* provider for it's own unique text format, and that application
|
|
* binary must continue to work with future versions of ICU.
|
|
*
|
|
* Applications that are using predefined types of text providers
|
|
* to pass text data to ICU services will have no need to view the
|
|
* internals of the UText structs that they open.
|
|
*/
|
|
struct UText {
|
|
/**
|
|
* (protected) Pointer to string or wrapped object or similar.
|
|
* Not used by caller.
|
|
* @draft ICU 3.4
|
|
*/
|
|
const void *context;
|
|
|
|
/**
|
|
* (protected) Pointer fields for use by text provider.
|
|
* Not used by caller.
|
|
* @draft ICU 3.4
|
|
*/
|
|
const void *p, *q, *r;
|
|
|
|
/**
|
|
* (public) sizeOfStruct=sizeof(UText)
|
|
* Allows possible backward compatible extension.
|
|
*
|
|
* @draft ICU 3.4
|
|
*/
|
|
int32_t sizeOfStruct;
|
|
|
|
/**
|
|
* (protected) Integer fields for use by text provider.
|
|
* Not used by caller.
|
|
* @draft ICU 3.4
|
|
*/
|
|
int32_t a, b, c;
|
|
|
|
/**
|
|
* (public) TBD
|
|
*
|
|
* @see UTextClone
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTextClone *clone;
|
|
|
|
/**
|
|
* (public) TBD
|
|
*
|
|
* @see UTextGetProperties
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTextGetProperties *properties;
|
|
|
|
/**
|
|
* (public) Returns the length of the text.
|
|
* May be expensive to compute!
|
|
*
|
|
* @see UTextLength
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTextLength *length;
|
|
|
|
/**
|
|
* (public) Access to a chunk of text.
|
|
* Does not copy text but instead gives access to a portion of it.
|
|
*
|
|
* The intention is that for discontiguous storage the chunk would be an actual
|
|
* storage block used for storing the text.
|
|
* For contiguously stored text with known length, the whole text would be returned.
|
|
* For NUL-terminated text, the implementation may scan forward in exponentially
|
|
* larger chunks instead of finding the NUL right away.
|
|
*
|
|
* In: Text index; the returned chunk of text must contain the index.
|
|
* Out:
|
|
* - Pointer to chunk start
|
|
* - Start and limit indexes corresponding to the chunk;
|
|
* it must be start<=input index<limit
|
|
* - Indication of success: If the input index is negative or >=length then
|
|
* failure needs to be indicated, probably by returning a NULL pointer
|
|
*
|
|
* @see UTextAccess
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTextAccess *access;
|
|
|
|
/**
|
|
* (public) Copy a chunk of text into a buffer.
|
|
* Does it need a return value indicating success/failure?
|
|
* The signature shown here is the same as in UReplaceable.
|
|
* Not strictly minimally necessary; Replaceable has it.
|
|
*
|
|
* @see UTextExtract
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTextExtract *extract;
|
|
|
|
/**
|
|
* (public) TBD
|
|
*
|
|
* @see UTextReplace
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTextReplace *replace;
|
|
|
|
/**
|
|
* (public) TBD
|
|
*
|
|
* @see UTextCopy
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTextCopy *copy;
|
|
|
|
/**
|
|
* (public) TBD
|
|
*
|
|
* @see UTextMapOffsetToNative
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTextMapOffsetToNative *mapOffsetToNative;
|
|
|
|
/**
|
|
* (public) TBD
|
|
*
|
|
* @see UTextMapIndexToUTF16
|
|
* @draft ICU 3.4
|
|
*/
|
|
UTextMapIndexToUTF16 *mapIndexToUTF16;
|
|
};
|
|
|
|
U_CDECL_END
|
|
|
|
|
|
|
|
|
|
#ifdef XP_CPLUSPLUS
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
|
/**
|
|
*
|
|
* UTextIterator is the class used to access the text data that is
|
|
* behind a UText object. Services that receive text in the
|
|
* form of a UText will instanitate a UTextIterator
|
|
* for this purpose.
|
|
*
|
|
* @draft ICU 3.4
|
|
*/
|
|
class U_COMMON_API UTextIterator : public UMemory {
|
|
public:
|
|
// all-inline, and stack-allocatable
|
|
// constructors, get/set UText, etc.
|
|
// needs to have state besides the current chunk: at least the current index
|
|
// for performance, may use a current-position pointer and chunk start/limit
|
|
// pointers and translate back into indexes only when necessary
|
|
|
|
UTextIterator(UText *t);
|
|
|
|
/**
|
|
* Returns the code point at the requested index,
|
|
* or U_SENTINEL (-1) if it is out of bounds.
|
|
*/
|
|
// see next32From() -- inline UChar32 char32At(int32_t index);
|
|
|
|
// U_SENTINEL (-1) if out of bounds
|
|
inline UChar32 next32();
|
|
|
|
/**
|
|
* Move the iterator position to the character (code point) whose
|
|
* index precedes the current position, and return that character.
|
|
* This is a pre-decrement operation.
|
|
* If the initial iterator position is in the interior of a multi-unit
|
|
* character (a utf-8 sequence or a utf-16 surrogate pair, for example),
|
|
* the position will be backed up to the beginning of that character,
|
|
* and that character will be returned.
|
|
*
|
|
* @return the previous UChar32 code point, or U_SENTINEL (-1)
|
|
* if the index is out of bounds
|
|
* @draft ICU 3.4
|
|
*/
|
|
inline UChar32 previous32();
|
|
|
|
/**
|
|
* Set the iteration index for a following next32() or previous32().
|
|
* Does not immediately access text from the provider.
|
|
* If the specified index is less than zero or greater than the
|
|
* length of the text, the position will be set to zero or the length.
|
|
* next32From(index) is more efficient than setIndex()+next32().
|
|
* previous32From(index) is more efficient than setIndex()+previous32().
|
|
*
|
|
* <p>
|
|
* TODO: what should happen if the new index is not on a code point boundary?
|
|
*
|
|
* @param index the new index position to set the iterator to.
|
|
*
|
|
* @draft ICU 3.4
|
|
|
|
*/
|
|
inline void setIndex(int32_t index);
|
|
|
|
/**
|
|
* Set the iteration index, access the text for forward iteration,
|
|
* and return the code point starting at or before that index.
|
|
*
|
|
* @param index Iteration index.
|
|
* @return Code point which starts at or before index,
|
|
* or U_SENTINEL (-1) if it is out of bounds.
|
|
* @draft ICU 3.4
|
|
*/
|
|
inline UChar32 next32From(int32_t index);
|
|
/**
|
|
* Set the iteration index, access the text for backward iteration,
|
|
* and return the code point ending at or before that index.
|
|
*
|
|
* @param index Iteration index.
|
|
* @return Code point which ends at or before index,
|
|
* or U_SENTINEL (-1) if it is out of bounds.
|
|
*
|
|
* @draft ICU 3.4
|
|
*/
|
|
inline UChar32 previous32From(int32_t index);
|
|
|
|
/**
|
|
* Get the current iterator position, which can range from 0 to length.
|
|
* The position is an index into the input text, in whatever format it
|
|
* may have, and may not always correspond to a UChar (UTF-16) index
|
|
* into the text.
|
|
*
|
|
* @return the current index position.
|
|
* @draft ICU 3.4
|
|
*/
|
|
inline int32_t getIndex();
|
|
|
|
/**
|
|
* Move the iterator postion by delta code points. The amount to move
|
|
* is a signed number; a negative delta will move the iterator backwards,
|
|
* towards the start of the text.
|
|
* <p/>
|
|
* Behavior for out-of-bounds indexes:
|
|
* <code>moveIndex</code> pins the input index to 0..length(), i.e.,
|
|
* if the input index<0 then it is pinned to 0;
|
|
* if it is index>length() then it is pinned to length().
|
|
* Afterwards, the index is moved by <code>delta</code> code points
|
|
* forward or backward,
|
|
* but no further backward than to 0 and no further forward than to length().
|
|
* The resulting index value will be in between 0 and length(), inclusive.
|
|
*
|
|
* @return TRUE if the position could be moved the requested number of positions without
|
|
* running of the start or end of the text.
|
|
* @draft ICU 3.4
|
|
*/
|
|
UBool moveIndex(int32_t delta);
|
|
|
|
/**
|
|
* Compare the text starting from the current index with the string
|
|
* argument. The index is modified. In case of a match (zero result),
|
|
* the index is left exactly after the matching segment.
|
|
* Otherwise, the index position is undefined.
|
|
*
|
|
* Negative/positive results mean that the text segment compares
|
|
* lower/higher than the string. A zero result means that the text
|
|
* segment compares equal, even if there is following text after the
|
|
* matching segment.
|
|
* Test for the end of the text using next32()>=0 if necessary.
|
|
*
|
|
* @param codePointOrder Choose between code unit order (FALSE)
|
|
* and code point order (TRUE).
|
|
*
|
|
* @return negative/0/positive as comparison result.
|
|
*
|
|
* TODO: this function seems a little out of place in this class.
|
|
* Probably should be removed to some collection of TextIterator based
|
|
* string utiltity functions.
|
|
* @internal
|
|
*/
|
|
UBool compare(const UChar *s, int32_t length, UBool codePointOrder);
|
|
|
|
/**
|
|
* Get the total length of the text, expressed in the units of the
|
|
* underlying text storage, which is not necessarily utf-16 code units.
|
|
* The length of a utf-8 string, for example, would be returned in bytes.
|
|
* May be expensive to compute.
|
|
*
|
|
* @return the lenght of the text.
|
|
*
|
|
* @draft ICU 3.4
|
|
*/
|
|
int32_t length();
|
|
|
|
// convenience wrappers for access(), extract()?
|
|
// needed at least for extract()/copy() for chunk invalidation
|
|
// getChunkStart(), getChunkLimit() for the current chunk?
|
|
// const UTextChunk *getChunk()?
|
|
|
|
private:
|
|
UText *t;
|
|
UTextChunk chunk;
|
|
int32_t chunkOffset; // Current index within this chunk.
|
|
int32_t providerProperties; // -1 if not known yet
|
|
|
|
void setChunkInvalid(int32_t index);
|
|
|
|
/** Call chunkOffset=t->access() and return TRUE if a chunk is returned. */
|
|
UBool access(int32_t index, UBool forward);
|
|
|
|
UChar32 getSupplementary(); // Get a supplementary char at the current position.
|
|
// position could be on either lead or trail.
|
|
// Position will be adjusted to be on lead.
|
|
// Out-of-line, to reduce amount of inline code.
|
|
};
|
|
|
|
|
|
//--------------------------------------------------------------------------------
|
|
//
|
|
// UTextIterator inline implementations
|
|
//
|
|
//--------------------------------------------------------------------------------
|
|
|
|
|
|
UChar32
|
|
UTextIterator::next32() {
|
|
if(chunkOffset>=chunk.length && !access(chunk.limit, TRUE)) {
|
|
// no chunk available here
|
|
return U_SENTINEL;
|
|
}
|
|
|
|
UChar32 c;
|
|
U16_NEXT(chunk.contents, chunkOffset, chunk.length, c);
|
|
return c;
|
|
}
|
|
|
|
UChar32
|
|
UTextIterator::previous32() {
|
|
if(chunkOffset<=0 && !access(chunk.start, FALSE)) {
|
|
// no chunk available here
|
|
return U_SENTINEL;
|
|
}
|
|
|
|
UChar32 c;
|
|
U16_PREV(chunk.contents, 0, chunkOffset, c);
|
|
return c;
|
|
}
|
|
|
|
void
|
|
UTextIterator::setIndex(int32_t index) {
|
|
if(index<chunk.start || chunk.limit<index) {
|
|
// The desired position is outside of the current chunk. Invalidate it and
|
|
// leave it to next32() or previous32() to access the text
|
|
// in the desired direction
|
|
setChunkInvalid(index);
|
|
} else if(chunk.nonUTF16Indexes) {
|
|
chunkOffset=t->mapIndexToUTF16(t, &chunk, index);
|
|
} else {
|
|
chunkOffset=index-chunk.start;
|
|
}
|
|
}
|
|
|
|
UChar32
|
|
UTextIterator::next32From(int32_t index) {
|
|
if(index<chunk.start || chunk.limit<=index) {
|
|
if(!access(index, TRUE)) {
|
|
// no chunk available here
|
|
return U_SENTINEL;
|
|
}
|
|
} else if(chunk.nonUTF16Indexes) {
|
|
chunkOffset=t->mapIndexToUTF16(t, &chunk, index);
|
|
} else {
|
|
chunkOffset=index-chunk.start;
|
|
}
|
|
|
|
UChar32 c;
|
|
U16_NEXT(chunk.contents, chunkOffset, chunk.length, c);
|
|
return c;
|
|
}
|
|
|
|
UChar32 UTextIterator::previous32From(int32_t index) {
|
|
if(index<=chunk.start || chunk.limit<index) {
|
|
if(!access(index, FALSE)) {
|
|
// no chunk available here
|
|
return U_SENTINEL;
|
|
}
|
|
} else if(chunk.nonUTF16Indexes) {
|
|
chunkOffset=t->mapIndexToUTF16(t, &chunk, index);
|
|
} else {
|
|
chunkOffset=index-chunk.start;
|
|
}
|
|
|
|
UChar32 c;
|
|
chunkOffset--;
|
|
c = chunk.contents[chunkOffset];
|
|
if (U16_IS_SURROGATE(c)) {
|
|
// take supplementary support out-of-line
|
|
c = this->getSupplementary();
|
|
}
|
|
return c;
|
|
}
|
|
|
|
int32_t UTextIterator::getIndex() {
|
|
if(!chunk.nonUTF16Indexes || chunkOffset==0) {
|
|
return chunk.start+chunkOffset;
|
|
} else {
|
|
return t->mapOffsetToNative(t, &chunk, chunkOffset);
|
|
}
|
|
}
|
|
|
|
U_NAMESPACE_END
|
|
|
|
#endif /* C++ */
|
|
|
|
#endif /* U_HIDE_DRAFT_API */
|
|
|
|
#endif
|