1999-12-28 23:57:50 +00:00
|
|
|
/*
|
2001-03-22 00:09:10 +00:00
|
|
|
* Copyright (C) 1999, International Business Machines Corporation and others. All Rights Reserved.
|
1999-12-28 23:57:50 +00:00
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 11/17/99 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
#ifndef UNIFILT_H
|
|
|
|
#define UNIFILT_H
|
|
|
|
|
2002-02-07 01:07:55 +00:00
|
|
|
#include "unicode/unifunct.h"
|
2001-07-25 18:50:09 +00:00
|
|
|
#include "unicode/unimatch.h"
|
1999-12-28 23:57:50 +00:00
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
2002-05-29 18:36:09 +00:00
|
|
|
/**
|
|
|
|
* U_ETHER is used to represent character values for positions outside
|
|
|
|
* a range. For example, transliterator uses this to represent
|
|
|
|
* characters outside the range contextStart..contextLimit-1. This
|
|
|
|
* allows explicit matching by rules and UnicodeSets of text outside a
|
|
|
|
* defined range.
|
|
|
|
*/
|
|
|
|
#define U_ETHER ((UChar)0xFFFF)
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
/**
|
|
|
|
* <code>UnicodeFilter</code> defines a protocol for selecting a
|
2001-11-20 00:41:01 +00:00
|
|
|
* subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
|
1999-12-28 23:57:50 +00:00
|
|
|
* Currently, filters are used in conjunction with classes like {@link
|
|
|
|
* Transliterator} to only process selected characters through a
|
|
|
|
* transformation.
|
|
|
|
*
|
2001-07-31 18:28:23 +00:00
|
|
|
* <p>Note: UnicodeFilter currently stubs out two pure virtual methods
|
|
|
|
* of its base class, UnicodeMatcher. These methods are toPattern()
|
|
|
|
* and matchesIndexValue(). This is done so that filter classes that
|
|
|
|
* are not actually used as matchers -- specifically, those in the
|
|
|
|
* UnicodeFilterLogic component, and those in tests -- can continue to
|
|
|
|
* work without defining these methods. As long as a filter is not
|
|
|
|
* used in an RBT during real transliteration, these methods will not
|
|
|
|
* be called. However, this breaks the UnicodeMatcher base class
|
|
|
|
* protocol, and it is not a correct solution.
|
|
|
|
*
|
|
|
|
* <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
|
|
|
|
* hierarchy and either redesign it, or simply remove the stubs in
|
|
|
|
* UnicodeFilter and force subclasses to implement the full
|
|
|
|
* UnicodeMatcher protocol.
|
|
|
|
*
|
1999-12-28 23:57:50 +00:00
|
|
|
* @see UnicodeFilterLogic
|
2001-11-14 00:50:39 +00:00
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
2002-05-29 18:36:09 +00:00
|
|
|
class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
|
1999-12-28 23:57:50 +00:00
|
|
|
|
|
|
|
public:
|
2000-03-22 19:19:33 +00:00
|
|
|
/**
|
2001-03-22 00:09:10 +00:00
|
|
|
* Destructor
|
2001-11-14 00:50:39 +00:00
|
|
|
* @stable
|
|
|
|
*/
|
1999-12-28 23:57:50 +00:00
|
|
|
virtual ~UnicodeFilter();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns <tt>true</tt> for characters that are in the selected
|
|
|
|
* subset. In other words, if a character is <b>to be
|
2000-01-18 20:00:56 +00:00
|
|
|
* filtered</b>, then <tt>contains()</tt> returns
|
1999-12-28 23:57:50 +00:00
|
|
|
* <b><tt>false</tt></b>.
|
2001-11-14 00:50:39 +00:00
|
|
|
* @stable
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
2001-07-25 18:50:09 +00:00
|
|
|
virtual UBool contains(UChar32 c) const = 0;
|
1999-12-28 23:57:50 +00:00
|
|
|
|
2002-02-07 01:07:55 +00:00
|
|
|
/**
|
|
|
|
* UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
|
|
|
|
* and return the pointer.
|
|
|
|
*/
|
|
|
|
virtual UnicodeMatcher* toMatcher() const;
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
/**
|
2001-07-27 00:18:53 +00:00
|
|
|
* UnicodeMatcher API. This class stubs this out.
|
|
|
|
*/
|
|
|
|
UnicodeString& toPattern(UnicodeString& result,
|
|
|
|
UBool escapeUnprintable) const;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* UnicodeMatcher API. This class stubs this out.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
2001-07-27 00:18:53 +00:00
|
|
|
UBool matchesIndexValue(uint8_t v) const;
|
1999-12-28 23:57:50 +00:00
|
|
|
|
2001-07-25 18:50:09 +00:00
|
|
|
/**
|
|
|
|
* Implement UnicodeMatcher API.
|
|
|
|
*/
|
|
|
|
virtual UMatchDegree matches(const Replaceable& text,
|
|
|
|
int32_t& offset,
|
|
|
|
int32_t limit,
|
2001-10-30 23:55:09 +00:00
|
|
|
UBool incremental);
|
2001-07-25 18:50:09 +00:00
|
|
|
|
2002-03-20 00:42:02 +00:00
|
|
|
/**
|
|
|
|
* UnicodeFunctor API. Nothing to do.
|
|
|
|
*/
|
|
|
|
virtual void setData(const TransliterationRuleData*) {}
|
|
|
|
|
2002-06-29 00:04:16 +00:00
|
|
|
/**
|
|
|
|
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
|
|
|
*
|
|
|
|
* @draft ICU 2.2
|
|
|
|
*/
|
|
|
|
virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ICU "poor man's RTTI", returns a UClassID for this class.
|
|
|
|
*
|
|
|
|
* @draft ICU 2.2
|
|
|
|
*/
|
|
|
|
static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
|
|
|
|
|
2002-06-28 21:13:54 +00:00
|
|
|
/**
|
|
|
|
* Stubbed out implementation of UnicodeMatcher API.
|
|
|
|
* @param toUnionTo the set into which to union the source characters
|
|
|
|
* @return a reference to toUnionTo
|
|
|
|
*/
|
|
|
|
virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
protected:
|
|
|
|
|
|
|
|
UnicodeFilter();
|
2002-06-29 00:04:16 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The address of this static class variable serves as this class's ID
|
|
|
|
* for ICU "poor man's RTTI".
|
|
|
|
*/
|
|
|
|
static const char fgClassID;
|
1999-12-28 23:57:50 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
inline UnicodeFilter::UnicodeFilter() {}
|
|
|
|
inline UnicodeFilter::~UnicodeFilter() {}
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_END
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
#endif
|