2003-01-22 22:00:19 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
|
|
|
* Copyright (C) 2003, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: unorm_it.h
|
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2003jan21
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __UNORM_IT_H__
|
|
|
|
#define __UNORM_IT_H__
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
|
2003-05-06 01:37:52 +00:00
|
|
|
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
|
2003-01-22 22:00:19 +00:00
|
|
|
|
|
|
|
#include "unicode/uiter.h"
|
|
|
|
#include "unicode/unorm.h"
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Normalizing UCharIterator wrapper.
|
|
|
|
* This internal API basically duplicates the functionality of the C++ Normalizer
|
|
|
|
* but
|
|
|
|
* - it actually implements a character iterator (UCharIterator)
|
|
|
|
* with few restrictions (see unorm_setIter())
|
|
|
|
* - it supports UCharIterator getState()/setState()
|
|
|
|
* - it uses lower-level APIs and buffers more text and states,
|
|
|
|
* hopefully resulting in higher performance
|
|
|
|
*
|
2003-01-22 22:13:59 +00:00
|
|
|
* Usage example:
|
|
|
|
* \code
|
|
|
|
* function(UCharIterator *srcIter) {
|
|
|
|
* UNormIterator *uni;
|
|
|
|
* UCharIterator *iter;
|
|
|
|
* UErrorCode errorCode;
|
|
|
|
*
|
|
|
|
* errorCode=U_ZERO_ERROR;
|
|
|
|
* uni=unorm_openIter(&errorCode);
|
|
|
|
* if(U_FAILURE(errorCode)) {
|
|
|
|
* // report error
|
|
|
|
* return;
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode);
|
|
|
|
* if(U_FAILURE(errorCode)) {
|
|
|
|
* // report error
|
|
|
|
* } else {
|
|
|
|
* // use iter to iterate over the canonically ordered
|
|
|
|
* // version of srcIter's text
|
|
|
|
* uint32_t state;
|
|
|
|
*
|
|
|
|
* ...
|
|
|
|
*
|
|
|
|
* state=uiter_getState(iter);
|
|
|
|
* if(state!=UITER_NO_STATE) {
|
|
|
|
* // use valid state, store it, use iter some more
|
|
|
|
* ...
|
|
|
|
*
|
|
|
|
* // later restore iter to the saved state:
|
|
|
|
* uiter_setState(iter, state, &errorCode);
|
|
|
|
*
|
|
|
|
* ...
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* ...
|
|
|
|
* }
|
|
|
|
* unorm_closeIter(uni);
|
|
|
|
* }
|
|
|
|
* \endcode
|
|
|
|
*
|
|
|
|
* See also the ICU test suites.
|
|
|
|
*
|
2003-01-22 22:00:19 +00:00
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
struct UNormIterator;
|
|
|
|
typedef struct UNormIterator UNormIterator;
|
|
|
|
|
2003-03-17 18:32:54 +00:00
|
|
|
/**
|
|
|
|
* Size of a stack buffer to hold a UNormIterator, see the stackMem parameter
|
|
|
|
* of unorm_openIter().
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
#define UNORM_ITER_SIZE 1024
|
|
|
|
|
2003-01-22 22:00:19 +00:00
|
|
|
/**
|
|
|
|
* Open a normalizing iterator. Must be closed later.
|
|
|
|
* Use unorm_setIter().
|
|
|
|
*
|
2003-03-17 18:32:54 +00:00
|
|
|
* @param stackMem Pointer to preallocated (stack-allocated) buffer to hold
|
|
|
|
* the UNormIterator if possible; can be NULL.
|
|
|
|
* @param stackMemSize Number of bytes at stackMem; can be 0,
|
|
|
|
* or should be >= UNORM_ITER_SIZE for a non-NULL stackMem.
|
2003-01-22 22:00:19 +00:00
|
|
|
* @param pErrorCode ICU error code
|
|
|
|
* @return an allocated and pre-initialized UNormIterator
|
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
U_CAPI UNormIterator * U_EXPORT2
|
2003-03-17 18:32:54 +00:00
|
|
|
unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode);
|
2003-01-22 22:00:19 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Close a normalizing iterator.
|
|
|
|
*
|
|
|
|
* @param uni UNormIterator from unorm_openIter()
|
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
unorm_closeIter(UNormIterator *uni);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Set a UCharIterator and a normalization mode for the normalizing iterator
|
|
|
|
* to wrap. The normalizing iterator will read from the character iterator,
|
|
|
|
* normalize the text, and in turn deliver it with its own wrapper UCharIterator
|
|
|
|
* interface which it returns.
|
|
|
|
*
|
|
|
|
* The source iterator remains at its current position through the unorm_setIter()
|
|
|
|
* call but will be used and moved as soon as the
|
|
|
|
* the returned normalizing iterator is.
|
|
|
|
*
|
|
|
|
* The returned interface pointer is valid for as long as the normalizing iterator
|
|
|
|
* is open and until another unorm_setIter() call is made on it.
|
|
|
|
*
|
|
|
|
* The normalizing iterator's UCharIterator interface has the following properties:
|
|
|
|
* - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX
|
|
|
|
* - getState() will return UITER_NO_STATE for unknown states for positions
|
|
|
|
* that are not at normalization boundaries
|
|
|
|
*
|
|
|
|
* @param uni UNormIterator from unorm_openIter()
|
|
|
|
* @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator.
|
|
|
|
* Must support getState() and setState().
|
|
|
|
* @param mode The normalization mode.
|
|
|
|
* @param pErrorCode ICU error code
|
|
|
|
* @return an alias to the normalizing iterator's UCharIterator interface
|
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
U_CAPI UCharIterator * U_EXPORT2
|
|
|
|
unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode);
|
|
|
|
|
2003-05-06 01:37:52 +00:00
|
|
|
#endif /* uconfig.h switches */
|
|
|
|
|
2003-01-22 22:00:19 +00:00
|
|
|
#endif
|