1999-12-28 23:57:50 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
2001-03-22 00:09:10 +00:00
|
|
|
* Copyright (C) 1999, International Business Machines Corporation and others. All Rights Reserved.
|
1999-12-28 23:57:50 +00:00
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 11/17/99 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
#ifndef UNITOHEX_H
|
|
|
|
#define UNITOHEX_H
|
|
|
|
|
2002-09-20 01:54:48 +00:00
|
|
|
#include "unicode/utypes.h"
|
|
|
|
|
|
|
|
#if !UCONFIG_NO_TRANSLITERATION
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
#include "unicode/translit.h"
|
|
|
|
#include "unicode/unistr.h"
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
class UnicodeFilter;
|
|
|
|
|
|
|
|
/**
|
2001-03-22 00:09:10 +00:00
|
|
|
* A transliterator that converts from Unicode characters to
|
1999-12-28 23:57:50 +00:00
|
|
|
* hexadecimal Unicode escape sequences. It outputs a
|
|
|
|
* prefix specified in the constructor and optionally converts the hex
|
|
|
|
* digits to uppercase.
|
|
|
|
*
|
2000-03-08 19:26:17 +00:00
|
|
|
* <p>The format of the output is set by a pattern. This pattern
|
|
|
|
* follows the same syntax as <code>HexToUnicodeTransliterator</code>,
|
|
|
|
* except it does not allow multiple specifications. The pattern sets
|
|
|
|
* the prefix string, suffix string, and minimum and maximum digit
|
|
|
|
* count. There are no setters or getters for these attributes; they
|
|
|
|
* are set only through the pattern.
|
|
|
|
*
|
|
|
|
* <p>The setUppercase() and isUppercase() methods control whether 'a'
|
|
|
|
* through 'f' or 'A' through 'F' are output as hex digits. This is
|
|
|
|
* not controlled through the pattern; only through the methods. The
|
|
|
|
* default is uppercase.
|
|
|
|
*
|
1999-12-28 23:57:50 +00:00
|
|
|
* @author Alan Liu
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
class U_I18N_API UnicodeToHexTransliterator : public Transliterator {
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
2000-03-08 19:26:17 +00:00
|
|
|
// Character constants defined here to avoid ASCII dependency
|
|
|
|
enum {
|
|
|
|
ZERO = 0x0030, // '0'
|
|
|
|
POUND = 0x0023, // '#'
|
|
|
|
BACKSLASH = 0x005C // '\\'
|
|
|
|
};
|
|
|
|
|
|
|
|
static const UChar HEX_DIGITS[32];
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
/**
|
|
|
|
* ID for this transliterator.
|
|
|
|
*/
|
2001-10-11 23:54:55 +00:00
|
|
|
static const char _ID[];
|
1999-12-28 23:57:50 +00:00
|
|
|
|
2000-03-08 19:26:17 +00:00
|
|
|
/**
|
|
|
|
* The pattern set by applyPattern() and returned by toPattern().
|
|
|
|
*/
|
|
|
|
UnicodeString pattern;
|
1999-12-28 23:57:50 +00:00
|
|
|
|
2000-03-08 19:26:17 +00:00
|
|
|
/**
|
|
|
|
* The string preceding the hex digits, parsed from the pattern.
|
|
|
|
*/
|
1999-12-28 23:57:50 +00:00
|
|
|
UnicodeString prefix;
|
|
|
|
|
2000-03-08 19:26:17 +00:00
|
|
|
/**
|
|
|
|
* The string following the hex digits, parsed from the pattern.
|
|
|
|
*/
|
|
|
|
UnicodeString suffix;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The minimum number of hex digits to output, between 1 and 4,
|
|
|
|
* inclusive. Parsed from the pattern.
|
|
|
|
*/
|
|
|
|
int8_t minDigits;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* If TRUE, output uppercase hex digits; otherwise output
|
|
|
|
* lowercase. Set by setUppercase() and returned by isUppercase().
|
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool uppercase;
|
1999-12-28 23:57:50 +00:00
|
|
|
|
2002-06-29 00:04:16 +00:00
|
|
|
/**
|
|
|
|
* The address of this static class variable serves as this class's ID
|
|
|
|
* for ICU "poor man's RTTI".
|
|
|
|
*/
|
|
|
|
static const char fgClassID;
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
public:
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Constructs a transliterator.
|
2000-03-08 19:26:17 +00:00
|
|
|
* @param pattern The pattern for this transliterator. See
|
|
|
|
* applyPattern() for pattern syntax.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param isUppercase if true, the four hex digits will be
|
1999-12-28 23:57:50 +00:00
|
|
|
* converted to uppercase; otherwise they will be lowercase.
|
2000-03-08 19:26:17 +00:00
|
|
|
* @param adoptedFilter the filter for this transliterator, or
|
|
|
|
* NULL if none. Adopted by this transliterator.
|
|
|
|
* @param status Error code indicating success or failure
|
|
|
|
* to parse pattern.
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
2000-03-08 19:26:17 +00:00
|
|
|
UnicodeToHexTransliterator(const UnicodeString& pattern,
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool isUppercase,
|
2000-03-08 19:26:17 +00:00
|
|
|
UnicodeFilter* adoptedFilter,
|
|
|
|
UErrorCode& status);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Constructs an uppercase transliterator with no filter.
|
|
|
|
* @param pattern The pattern for this transliterator. See
|
|
|
|
* applyPattern() for pattern syntax.
|
|
|
|
* @param status Error code indicating success or failure
|
|
|
|
* to parse pattern.
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
2000-03-08 19:26:17 +00:00
|
|
|
*/
|
|
|
|
UnicodeToHexTransliterator(const UnicodeString& pattern,
|
|
|
|
UErrorCode& status);
|
1999-12-28 23:57:50 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Constructs a transliterator with the default prefix "\u"
|
|
|
|
* that outputs uppercase hex digits.
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
UnicodeToHexTransliterator(UnicodeFilter* adoptedFilter = 0);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Destructor.
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
virtual ~UnicodeToHexTransliterator();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Copy constructor.
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
UnicodeToHexTransliterator(const UnicodeToHexTransliterator&);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Assignment operator.
|
2002-12-04 23:46:16 +00:00
|
|
|
* @stable ICU 2.0
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
UnicodeToHexTransliterator& operator=(const UnicodeToHexTransliterator&);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transliterator API.
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
virtual Transliterator* clone(void) const;
|
|
|
|
|
|
|
|
/**
|
2000-03-08 19:26:17 +00:00
|
|
|
* Set the pattern recognized by this transliterator. The pattern
|
|
|
|
* must contain zero or more prefix characters, one or more digit
|
|
|
|
* characters, and zero or more suffix characters. The digit
|
|
|
|
* characters indicates optional digits ('#') followed by required
|
|
|
|
* digits ('0'). The total number of digits cannot exceed 4, and
|
|
|
|
* must be at least 1 required digit. Use a backslash ('\\') to
|
|
|
|
* escape any of the special characters. An empty pattern is not
|
|
|
|
* allowed.
|
|
|
|
*
|
|
|
|
* <p>Example: "U+0000" specifies a prefix of "U+", exactly four
|
|
|
|
* digits, and no suffix. "<###0>" has a prefix of "<", between
|
|
|
|
* one and four digits, and a suffix of ">".
|
|
|
|
*
|
|
|
|
* <p><pre>
|
|
|
|
* pattern := prefix-char* digit-spec suffix-char*
|
|
|
|
* digit-spec := '#'* '0'+
|
|
|
|
* prefix-char := [^special-char] | '\\' special-char
|
|
|
|
* suffix-char := [^special-char] | '\\' special-char
|
|
|
|
* special-char := ';' | '0' | '#' | '\\'
|
|
|
|
* </pre>
|
|
|
|
*
|
|
|
|
* <p>Limitations: There is no way to set the uppercase attribute
|
|
|
|
* in the pattern. (applyPattern() does not alter the uppercase
|
|
|
|
* attribute.)
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
2000-03-08 19:26:17 +00:00
|
|
|
void applyPattern(const UnicodeString& thePattern, UErrorCode& status);
|
1999-12-28 23:57:50 +00:00
|
|
|
|
|
|
|
/**
|
2000-03-08 19:26:17 +00:00
|
|
|
* Return this transliterator's pattern.
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
2000-03-08 19:26:17 +00:00
|
|
|
const UnicodeString& toPattern(void) const;
|
1999-12-28 23:57:50 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true if this transliterator outputs uppercase hex digits.
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
virtual UBool isUppercase(void) const;
|
1999-12-28 23:57:50 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Sets if this transliterator outputs uppercase hex digits.
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
2000-05-18 22:08:39 +00:00
|
|
|
virtual void setUppercase(UBool outputUppercase);
|
1999-12-28 23:57:50 +00:00
|
|
|
|
|
|
|
/**
|
2000-01-18 18:27:27 +00:00
|
|
|
* Implements {@link Transliterator#handleTransliterate}.
|
2003-05-01 01:31:28 +00:00
|
|
|
* @internal Use transliterator factory methods instead since this class will be removed in that release.
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
2000-06-27 19:00:38 +00:00
|
|
|
virtual void handleTransliterate(Replaceable& text, UTransPosition& offsets,
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool isIncremental) const;
|
2002-06-29 00:04:16 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
|
|
|
*
|
|
|
|
* @draft ICU 2.2
|
|
|
|
*/
|
2003-02-06 22:57:35 +00:00
|
|
|
virtual inline UClassID getDynamicClassID() const;
|
2002-06-29 00:04:16 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* ICU "poor man's RTTI", returns a UClassID for this class.
|
|
|
|
*
|
|
|
|
* @draft ICU 2.2
|
|
|
|
*/
|
2003-02-06 22:57:35 +00:00
|
|
|
static inline UClassID getStaticClassID();
|
1999-12-28 23:57:50 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
inline UnicodeToHexTransliterator::~UnicodeToHexTransliterator() {}
|
|
|
|
|
2003-02-06 22:57:35 +00:00
|
|
|
inline UClassID
|
|
|
|
UnicodeToHexTransliterator::getStaticClassID()
|
|
|
|
{ return (UClassID)&fgClassID; }
|
|
|
|
|
|
|
|
inline UClassID
|
|
|
|
UnicodeToHexTransliterator::getDynamicClassID() const
|
|
|
|
{ return UnicodeToHexTransliterator::getStaticClassID(); }
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_END
|
2002-09-20 01:54:48 +00:00
|
|
|
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
#endif
|