1999-12-28 23:57:50 +00:00
|
|
|
/*
|
2001-03-22 00:09:10 +00:00
|
|
|
* Copyright (C) {1999}, International Business Machines Corporation and others. All Rights Reserved.
|
1999-12-28 23:57:50 +00:00
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 11/17/99 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
#ifndef HEXTOUNI_H
|
|
|
|
#define HEXTOUNI_H
|
|
|
|
|
|
|
|
#include "unicode/translit.h"
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
/**
|
2000-03-08 19:26:17 +00:00
|
|
|
* A transliterator that converts from hexadecimal Unicode escape
|
|
|
|
* sequences to the characters they represent. For example, "U+0040"
|
|
|
|
* and '\u0040'. A default HexToUnicodeTransliterator recognizes the
|
1999-12-28 23:57:50 +00:00
|
|
|
* prefixes "U+", "u+", "\U", and "\u". Hex values may be
|
2000-03-08 19:26:17 +00:00
|
|
|
* upper- or lowercase. By calling the applyPattern() method, one
|
|
|
|
* or more custom prefix/suffix pairs may be specified. See
|
|
|
|
* applyPattern() for details.
|
1999-12-28 23:57:50 +00:00
|
|
|
*
|
|
|
|
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
|
|
|
*
|
|
|
|
* @author Alan Liu
|
2001-10-11 23:54:55 +00:00
|
|
|
* @version $RCSfile: hextouni.h,v $ $Revision: 1.11 $ $Date: 2001/10/11 23:54:55 $
|
2000-03-22 19:19:33 +00:00
|
|
|
* @draft
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
class U_I18N_API HexToUnicodeTransliterator : public Transliterator {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ID for this transliterator.
|
|
|
|
*/
|
2001-10-11 23:54:55 +00:00
|
|
|
static const char _ID[];
|
1999-12-28 23:57:50 +00:00
|
|
|
|
2000-03-08 19:26:17 +00:00
|
|
|
/**
|
|
|
|
* The pattern used by the default constructor
|
|
|
|
*/
|
2001-10-11 23:54:55 +00:00
|
|
|
static const UChar DEFAULT_PATTERN[];
|
2000-03-08 19:26:17 +00:00
|
|
|
|
|
|
|
// Character constants defined here to avoid ASCII dependency
|
|
|
|
enum {
|
|
|
|
SEMICOLON = 0x003B, // ';'
|
|
|
|
ZERO = 0x0030, // '0'
|
|
|
|
POUND = 0x0023, // '#'
|
|
|
|
BACKSLASH = 0x005C // '\\'
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The pattern for this transliterator
|
|
|
|
*/
|
|
|
|
UnicodeString pattern;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The processed pattern specification. See applyPattern() for
|
|
|
|
* details.
|
|
|
|
*/
|
|
|
|
UnicodeString affixes;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The number of different affix sets in affixes.
|
|
|
|
*/
|
|
|
|
int32_t affixCount;
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
public:
|
|
|
|
|
|
|
|
/**
|
2000-03-08 19:26:17 +00:00
|
|
|
* Constructs a transliterator that recognizes the standard
|
|
|
|
* prefixes "\u", "\U", "u+", and "U+", each with no
|
|
|
|
* suffix.
|
2000-03-22 19:19:33 +00:00
|
|
|
* @draft
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
HexToUnicodeTransliterator(UnicodeFilter* adoptedFilter = 0);
|
|
|
|
|
2000-03-08 19:26:17 +00:00
|
|
|
/**
|
|
|
|
* Constructs a custom transliterator with the given pattern.
|
|
|
|
* @see #applyPattern
|
|
|
|
*/
|
|
|
|
HexToUnicodeTransliterator(const UnicodeString& pattern,
|
|
|
|
UErrorCode& status);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Constructs a custom transliterator with the given pattern
|
|
|
|
* and filter.
|
|
|
|
* @see #applyPattern
|
|
|
|
*/
|
|
|
|
HexToUnicodeTransliterator(const UnicodeString& pattern,
|
|
|
|
UnicodeFilter* adoptedFilter,
|
|
|
|
UErrorCode& status);
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
/**
|
|
|
|
* Destructor.
|
2000-03-22 19:19:33 +00:00
|
|
|
* @draft
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
virtual ~HexToUnicodeTransliterator();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Copy constructor.
|
2000-03-22 19:19:33 +00:00
|
|
|
* @draft
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
HexToUnicodeTransliterator(const HexToUnicodeTransliterator&);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Assignment operator.
|
2000-03-22 19:19:33 +00:00
|
|
|
* @draft
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
HexToUnicodeTransliterator& operator=(const HexToUnicodeTransliterator&);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transliterator API.
|
2000-03-22 19:19:33 +00:00
|
|
|
* @draft
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
|
|
|
Transliterator* clone(void) const;
|
|
|
|
|
2000-03-08 19:26:17 +00:00
|
|
|
/**
|
|
|
|
* Set the patterns recognized by this transliterator. One or
|
|
|
|
* more patterns may be specified, separated by semicolons (';').
|
|
|
|
* Each pattern contains zero or more prefix characters, one or
|
|
|
|
* more digit characters, and zero or more suffix characters. The
|
|
|
|
* digit characters indicates optional digits ('#') followed by
|
|
|
|
* required digits ('0'). The total number of digits cannot
|
|
|
|
* exceed 4, and must be at least 1 required digit. Use a
|
|
|
|
* backslash ('\\') to escape any of the special characters. An
|
|
|
|
* empty pattern is allowed; it specifies a transliterator that
|
|
|
|
* does nothing.
|
|
|
|
*
|
|
|
|
* <p>Example: "U+0000;<###0>" specifies two patterns. The first
|
|
|
|
* has a prefix of "U+", exactly four digits, and no suffix. The
|
|
|
|
* second has a prefix of "<", between one and four digits, and a
|
|
|
|
* suffix of ">".
|
|
|
|
*
|
|
|
|
* <p><pre>
|
|
|
|
* pattern := spec | ( pattern ';' spec )
|
|
|
|
* spec := prefix-char* digit-spec suffix-char*
|
|
|
|
* digit-spec := '#'* '0'+
|
|
|
|
* prefix-char := [^special-char] | '\\' special-char
|
|
|
|
* suffix-char := [^special-char] | '\\' special-char
|
|
|
|
* special-char := ';' | '0' | '#' | '\\'
|
|
|
|
* </pre>
|
|
|
|
*/
|
|
|
|
void applyPattern(const UnicodeString& thePattern, UErrorCode& status);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return this transliterator's pattern.
|
|
|
|
*/
|
|
|
|
const UnicodeString& toPattern(void) const;
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
/**
|
2000-01-18 18:27:27 +00:00
|
|
|
* Implements {@link Transliterator#handleTransliterate}.
|
2000-03-22 19:19:33 +00:00
|
|
|
* @draft
|
1999-12-28 23:57:50 +00:00
|
|
|
*/
|
2000-06-27 19:00:38 +00:00
|
|
|
virtual void handleTransliterate(Replaceable& text, UTransPosition& offset,
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool isIncremental) const;
|
1999-12-28 23:57:50 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
inline HexToUnicodeTransliterator::~HexToUnicodeTransliterator() {}
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_END
|
|
|
|
|
1999-12-28 23:57:50 +00:00
|
|
|
#endif
|