/* * Copyright (C) 1999, International Business Machines Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 11/17/99 aliu Creation. ********************************************************************** */ #ifndef UNIFILT_H #define UNIFILT_H #include "unicode/unifunct.h" #include "unicode/unimatch.h" U_NAMESPACE_BEGIN /** * U_ETHER is used to represent character values for positions outside * a range. For example, transliterator uses this to represent * characters outside the range contextStart..contextLimit-1. This * allows explicit matching by rules and UnicodeSets of text outside a * defined range. */ #define U_ETHER ((UChar)0xFFFF) /** * UnicodeFilter defines a protocol for selecting a * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. * Currently, filters are used in conjunction with classes like {@link * Transliterator} to only process selected characters through a * transformation. * *

Note: UnicodeFilter currently stubs out two pure virtual methods * of its base class, UnicodeMatcher. These methods are toPattern() * and matchesIndexValue(). This is done so that filter classes that * are not actually used as matchers -- specifically, those in the * UnicodeFilterLogic component, and those in tests -- can continue to * work without defining these methods. As long as a filter is not * used in an RBT during real transliteration, these methods will not * be called. However, this breaks the UnicodeMatcher base class * protocol, and it is not a correct solution. * *

In the future we may revisit the UnicodeMatcher / UnicodeFilter * hierarchy and either redesign it, or simply remove the stubs in * UnicodeFilter and force subclasses to implement the full * UnicodeMatcher protocol. * * @see UnicodeFilterLogic * @stable ICU 2.0 */ class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { public: /** * Destructor * @stable ICU 2.0 */ virtual ~UnicodeFilter(); /** * Returns true for characters that are in the selected * subset. In other words, if a character is to be * filtered, then contains() returns * false. * @stable ICU 2.0 */ virtual UBool contains(UChar32 c) const = 0; /** * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer * and return the pointer. */ virtual UnicodeMatcher* toMatcher() const; /** * Implement UnicodeMatcher API. */ virtual UMatchDegree matches(const Replaceable& text, int32_t& offset, int32_t limit, UBool incremental); /** * UnicodeFunctor API. Nothing to do. */ virtual void setData(const TransliterationRuleData*) {} /** * ICU "poor man's RTTI", returns a UClassID for the actual class. * * @draft ICU 2.2 */ virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); } /** * ICU "poor man's RTTI", returns a UClassID for this class. * * @draft ICU 2.2 */ static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; } protected: UnicodeFilter(); private: /** * The address of this static class variable serves as this class's ID * for ICU "poor man's RTTI". */ static const char fgClassID; }; inline UnicodeFilter::UnicodeFilter() {} inline UnicodeFilter::~UnicodeFilter() {} U_NAMESPACE_END #endif