2001-08-15 19:06:40 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
|
|
|
* Copyright (c) 2001, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
**********************************************************************
|
|
|
|
* Date Name Description
|
|
|
|
* 08/10/2001 aliu Creation.
|
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
#ifndef _TRANSREG_H
|
|
|
|
#define _TRANSREG_H
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
2002-09-20 01:54:48 +00:00
|
|
|
|
|
|
|
#if !UCONFIG_NO_TRANSLITERATION
|
|
|
|
|
2002-06-27 01:19:20 +00:00
|
|
|
#include "unicode/uobject.h"
|
2001-08-15 19:06:40 +00:00
|
|
|
#include "unicode/translit.h"
|
|
|
|
#include "hash.h"
|
|
|
|
#include "uvector.h"
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
2001-08-15 19:06:40 +00:00
|
|
|
class Entry;
|
|
|
|
class Spec;
|
|
|
|
class UnicodeString;
|
|
|
|
|
2001-10-04 21:22:17 +00:00
|
|
|
//------------------------------------------------------------------
|
|
|
|
// TransliteratorAlias
|
|
|
|
//------------------------------------------------------------------
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A TransliteratorAlias object is returned by get() if the given ID
|
|
|
|
* actually translates into something else. The caller then invokes
|
|
|
|
* the create() method on the alias to create the actual
|
|
|
|
* transliterator, and deletes the alias.
|
|
|
|
*
|
|
|
|
* Why all the shenanigans? To prevent circular calls between
|
|
|
|
* the registry code and the transliterator code that deadlocks.
|
|
|
|
*/
|
2002-06-27 01:19:20 +00:00
|
|
|
class TransliteratorAlias : public UObject {
|
2001-10-04 21:22:17 +00:00
|
|
|
public:
|
|
|
|
/**
|
|
|
|
* Construct a simple alias.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param aliasID the given id.
|
2001-10-04 21:22:17 +00:00
|
|
|
*/
|
|
|
|
TransliteratorAlias(const UnicodeString& aliasID);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Construct a compound RBT alias.
|
|
|
|
*/
|
|
|
|
TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlock,
|
2001-10-10 19:29:45 +00:00
|
|
|
Transliterator* adopted, int32_t idSplitPoint,
|
|
|
|
const UnicodeSet* compoundFilter);
|
2001-10-04 21:22:17 +00:00
|
|
|
|
|
|
|
~TransliteratorAlias();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The whole point of create() is that the caller must invoke
|
|
|
|
* it when the registry mutex is NOT held, to prevent deadlock.
|
|
|
|
* It may only be called once.
|
|
|
|
*/
|
|
|
|
Transliterator* create(UParseError&, UErrorCode&);
|
|
|
|
|
2002-06-29 00:04:16 +00:00
|
|
|
/**
|
|
|
|
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
|
|
|
*
|
|
|
|
* @draft ICU 2.2
|
|
|
|
*/
|
|
|
|
virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ICU "poor man's RTTI", returns a UClassID for this class.
|
|
|
|
*
|
|
|
|
* @draft ICU 2.2
|
|
|
|
*/
|
|
|
|
static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
|
|
|
|
|
2001-10-04 21:22:17 +00:00
|
|
|
private:
|
|
|
|
// We actually come in two flavors:
|
|
|
|
// 1. Simple alias
|
|
|
|
// Here aliasID is the alias string. Everything else is
|
|
|
|
// null, zero, empty.
|
|
|
|
// 2. CompoundRBT
|
|
|
|
// Here ID is the ID, aliasID is the idBlock, trans is the
|
|
|
|
// contained RBT, and idSplitPoint is the offet in aliasID
|
2001-10-10 19:29:45 +00:00
|
|
|
// where the contained RBT goes. compoundFilter is the
|
|
|
|
// compound filter, and it is _not_ owned.
|
2001-10-04 21:22:17 +00:00
|
|
|
UnicodeString ID;
|
|
|
|
UnicodeString aliasID;
|
|
|
|
Transliterator* trans; // owned
|
2001-10-10 19:29:45 +00:00
|
|
|
const UnicodeSet* compoundFilter; // alias
|
2001-10-04 21:22:17 +00:00
|
|
|
int32_t idSplitPoint;
|
2002-06-29 00:04:16 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* The address of this static class variable serves as this class's ID
|
|
|
|
* for ICU "poor man's RTTI".
|
|
|
|
*/
|
|
|
|
static const char fgClassID;
|
2001-10-04 21:22:17 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2001-08-15 19:06:40 +00:00
|
|
|
/**
|
|
|
|
* A registry of system transliterators. This is the data structure
|
|
|
|
* that implements the mapping between transliterator IDs and the data
|
|
|
|
* or function pointers used to create the corresponding
|
|
|
|
* transliterators. There is one instance of the registry that is
|
|
|
|
* created statically.
|
|
|
|
*
|
|
|
|
* The registry consists of a dynamic component -- a hashtable -- and
|
|
|
|
* a static component -- locale resource bundles. The dynamic store
|
|
|
|
* is semantically overlaid on the static store, so the static mapping
|
|
|
|
* can be dynamically overridden.
|
|
|
|
*
|
|
|
|
* This is an internal class that is only used by Transliterator.
|
|
|
|
* Transliterator maintains one static instance of this class and
|
|
|
|
* delegates all registry-related operations to it.
|
|
|
|
*
|
|
|
|
* @author Alan Liu
|
|
|
|
*/
|
2002-06-27 01:19:20 +00:00
|
|
|
class TransliteratorRegistry : public UObject {
|
2001-08-15 19:06:40 +00:00
|
|
|
|
|
|
|
public:
|
|
|
|
|
2002-07-01 11:04:45 +00:00
|
|
|
/**
|
|
|
|
* Contructor
|
|
|
|
* @param status Output param set to success/failure code.
|
|
|
|
*/
|
2001-08-23 01:06:08 +00:00
|
|
|
TransliteratorRegistry(UErrorCode& status);
|
2001-08-15 19:06:40 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Nonvirtual destructor -- this class is not subclassable.
|
|
|
|
*/
|
|
|
|
~TransliteratorRegistry();
|
|
|
|
|
|
|
|
//------------------------------------------------------------------
|
|
|
|
// Basic public API
|
|
|
|
//------------------------------------------------------------------
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Given a simple ID (forward direction, no inline filter, not
|
|
|
|
* compound) attempt to instantiate it from the registry. Return
|
|
|
|
* 0 on failure.
|
|
|
|
*
|
2001-10-04 21:22:17 +00:00
|
|
|
* Return a non-NULL aliasReturn value if the ID points to an alias.
|
2001-08-15 19:06:40 +00:00
|
|
|
* We cannot instantiate it ourselves because the alias may contain
|
|
|
|
* filters or compounds, which we do not understand. Caller should
|
2001-10-04 21:22:17 +00:00
|
|
|
* make aliasReturn NULL before calling.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param ID the given ID
|
|
|
|
* @param aliasReturn the given TransliteratorAlias
|
|
|
|
* @param parseError Struct to recieve information on position
|
|
|
|
* of error if an error is encountered
|
|
|
|
* @param status Output param set to success/failure code.
|
2001-08-15 19:06:40 +00:00
|
|
|
*/
|
|
|
|
Transliterator* get(const UnicodeString& ID,
|
2001-10-04 21:22:17 +00:00
|
|
|
TransliteratorAlias*& aliasReturn,
|
2001-08-31 03:23:39 +00:00
|
|
|
UParseError& parseError,
|
|
|
|
UErrorCode& status);
|
2001-08-15 19:06:40 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Register a prototype (adopted). This adds an entry to the
|
|
|
|
* dynamic store, or replaces an existing entry. Any entry in the
|
|
|
|
* underlying static locale resource store is masked.
|
|
|
|
*/
|
|
|
|
void put(Transliterator* adoptedProto,
|
|
|
|
UBool visible);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Register an ID and a factory function pointer. This adds an
|
|
|
|
* entry to the dynamic store, or replaces an existing entry. Any
|
|
|
|
* entry in the underlying static locale resource store is masked.
|
|
|
|
*/
|
|
|
|
void put(const UnicodeString& ID,
|
|
|
|
Transliterator::Factory factory,
|
2001-10-17 17:29:34 +00:00
|
|
|
Transliterator::Token context,
|
2001-08-15 19:06:40 +00:00
|
|
|
UBool visible);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Register an ID and a resource name. This adds an entry to the
|
|
|
|
* dynamic store, or replaces an existing entry. Any entry in the
|
|
|
|
* underlying static locale resource store is masked.
|
|
|
|
*/
|
|
|
|
void put(const UnicodeString& ID,
|
|
|
|
const UnicodeString& resourceName,
|
|
|
|
UTransDirection dir,
|
|
|
|
UBool visible);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Register an ID and an alias ID. This adds an entry to the
|
|
|
|
* dynamic store, or replaces an existing entry. Any entry in the
|
|
|
|
* underlying static locale resource store is masked.
|
|
|
|
*/
|
|
|
|
void put(const UnicodeString& ID,
|
|
|
|
const UnicodeString& alias,
|
|
|
|
UBool visible);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Unregister an ID. This removes an entry from the dynamic store
|
|
|
|
* if there is one. The static locale resource store is
|
|
|
|
* unaffected.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param ID the given ID.
|
2001-08-15 19:06:40 +00:00
|
|
|
*/
|
|
|
|
void remove(const UnicodeString& ID);
|
|
|
|
|
|
|
|
//------------------------------------------------------------------
|
|
|
|
// Public ID and spec management
|
|
|
|
//------------------------------------------------------------------
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the number of IDs currently registered with the system.
|
|
|
|
* To retrieve the actual IDs, call getAvailableID(i) with
|
|
|
|
* i from 0 to countAvailableIDs() - 1.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @return the number of IDs currently registered with the system.
|
2001-08-15 19:06:40 +00:00
|
|
|
* @draft
|
|
|
|
*/
|
|
|
|
int32_t countAvailableIDs(void);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the index-th available ID. index must be between 0
|
|
|
|
* and countAvailableIDs() - 1, inclusive. If index is out of
|
|
|
|
* range, the result of getAvailableID(0) is returned.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param index the given index.
|
|
|
|
* @return the index-th available ID. index must be between 0
|
|
|
|
* and countAvailableIDs() - 1, inclusive. If index is out of
|
|
|
|
* range, the result of getAvailableID(0) is returned.
|
2001-08-15 19:06:40 +00:00
|
|
|
* @draft
|
|
|
|
*/
|
|
|
|
const UnicodeString& getAvailableID(int32_t index);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the number of registered source specifiers.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @return the number of registered source specifiers.
|
2001-08-15 19:06:40 +00:00
|
|
|
*/
|
|
|
|
int32_t countAvailableSources(void);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return a registered source specifier.
|
|
|
|
* @param index which specifier to return, from 0 to n-1, where
|
|
|
|
* n = countAvailableSources()
|
|
|
|
* @param result fill-in paramter to receive the source specifier.
|
|
|
|
* If index is out of range, result will be empty.
|
|
|
|
* @return reference to result
|
|
|
|
*/
|
|
|
|
UnicodeString& getAvailableSource(int32_t index,
|
|
|
|
UnicodeString& result);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the number of registered target specifiers for a given
|
|
|
|
* source specifier.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param source the given source specifier.
|
|
|
|
* @return the number of registered target specifiers for a given
|
|
|
|
* source specifier.
|
2001-08-15 19:06:40 +00:00
|
|
|
*/
|
|
|
|
int32_t countAvailableTargets(const UnicodeString& source);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return a registered target specifier for a given source.
|
|
|
|
* @param index which specifier to return, from 0 to n-1, where
|
|
|
|
* n = countAvailableTargets(source)
|
|
|
|
* @param source the source specifier
|
|
|
|
* @param result fill-in paramter to receive the target specifier.
|
|
|
|
* If source is invalid or if index is out of range, result will
|
|
|
|
* be empty.
|
|
|
|
* @return reference to result
|
|
|
|
*/
|
|
|
|
UnicodeString& getAvailableTarget(int32_t index,
|
|
|
|
const UnicodeString& source,
|
|
|
|
UnicodeString& result);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the number of registered variant specifiers for a given
|
|
|
|
* source-target pair. There is always at least one variant: If
|
|
|
|
* just source-target is registered, then the single variant
|
|
|
|
* NO_VARIANT is returned. If source-target/variant is registered
|
|
|
|
* then that variant is returned.
|
2002-07-01 11:04:45 +00:00
|
|
|
* @param source the source specifiers
|
|
|
|
* @param target the target specifiers
|
|
|
|
* @return the number of registered variant specifiers for a given
|
|
|
|
* source-target pair.
|
2001-08-15 19:06:40 +00:00
|
|
|
*/
|
|
|
|
int32_t countAvailableVariants(const UnicodeString& source,
|
|
|
|
const UnicodeString& target);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return a registered variant specifier for a given source-target
|
|
|
|
* pair. If NO_VARIANT is one of the variants, then it will be
|
|
|
|
* at index 0.
|
|
|
|
* @param index which specifier to return, from 0 to n-1, where
|
|
|
|
* n = countAvailableVariants(source, target)
|
|
|
|
* @param source the source specifier
|
|
|
|
* @param target the target specifier
|
|
|
|
* @param result fill-in paramter to receive the variant
|
|
|
|
* specifier. If source is invalid or if target is invalid or if
|
|
|
|
* index is out of range, result will be empty.
|
|
|
|
* @return reference to result
|
|
|
|
*/
|
|
|
|
UnicodeString& getAvailableVariant(int32_t index,
|
|
|
|
const UnicodeString& source,
|
|
|
|
const UnicodeString& target,
|
|
|
|
UnicodeString& result);
|
|
|
|
|
2002-06-29 00:04:16 +00:00
|
|
|
/**
|
|
|
|
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
|
|
|
*
|
|
|
|
* @draft ICU 2.2
|
|
|
|
*/
|
|
|
|
virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ICU "poor man's RTTI", returns a UClassID for this class.
|
|
|
|
*
|
|
|
|
* @draft ICU 2.2
|
|
|
|
*/
|
|
|
|
static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
|
|
|
|
|
2001-08-15 19:06:40 +00:00
|
|
|
private:
|
|
|
|
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
// Private implementation
|
|
|
|
//----------------------------------------------------------------
|
|
|
|
|
|
|
|
Entry* find(const UnicodeString& ID);
|
|
|
|
|
|
|
|
Entry* find(UnicodeString& source,
|
|
|
|
UnicodeString& target,
|
|
|
|
UnicodeString& variant);
|
|
|
|
|
|
|
|
Entry* findInDynamicStore(const Spec& src,
|
|
|
|
const Spec& trg,
|
|
|
|
const UnicodeString& variant);
|
|
|
|
|
|
|
|
Entry* findInStaticStore(const Spec& src,
|
|
|
|
const Spec& trg,
|
|
|
|
const UnicodeString& variant);
|
|
|
|
|
|
|
|
static Entry* findInBundle(const Spec& specToOpen,
|
2001-11-03 02:03:33 +00:00
|
|
|
const Spec& specToFind,
|
|
|
|
const UnicodeString& variant,
|
|
|
|
UTransDirection direction);
|
2001-08-15 19:06:40 +00:00
|
|
|
|
|
|
|
void registerEntry(const UnicodeString& source,
|
|
|
|
const UnicodeString& target,
|
|
|
|
const UnicodeString& variant,
|
|
|
|
Entry* adopted,
|
|
|
|
UBool visible);
|
|
|
|
|
|
|
|
void registerEntry(const UnicodeString& ID,
|
|
|
|
Entry* adopted,
|
|
|
|
UBool visible);
|
|
|
|
|
|
|
|
void registerEntry(const UnicodeString& ID,
|
|
|
|
const UnicodeString& source,
|
|
|
|
const UnicodeString& target,
|
|
|
|
const UnicodeString& variant,
|
|
|
|
Entry* adopted,
|
|
|
|
UBool visible);
|
|
|
|
|
|
|
|
void registerSTV(const UnicodeString& source,
|
|
|
|
const UnicodeString& target,
|
|
|
|
const UnicodeString& variant);
|
|
|
|
|
|
|
|
void removeSTV(const UnicodeString& source,
|
|
|
|
const UnicodeString& target,
|
|
|
|
const UnicodeString& variant);
|
|
|
|
|
|
|
|
Transliterator* instantiateEntry(const UnicodeString& ID,
|
|
|
|
Entry *entry,
|
2001-10-04 21:22:17 +00:00
|
|
|
TransliteratorAlias*& aliasReturn,
|
2001-08-31 03:23:39 +00:00
|
|
|
UParseError& parseError,
|
|
|
|
UErrorCode& status);
|
2001-08-15 19:06:40 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Dynamic registry mapping full IDs to Entry objects. This
|
|
|
|
* contains both public and internal entities. The visibility is
|
|
|
|
* controlled by whether an entry is listed in availableIDs and
|
|
|
|
* specDAG or not.
|
|
|
|
*/
|
|
|
|
Hashtable registry;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* DAG of visible IDs by spec. Hashtable: source => (Hashtable:
|
|
|
|
* target => (UVector: variant)) The UVector of variants is never
|
|
|
|
* empty. For a source-target with no variant, the special
|
|
|
|
* variant NO_VARIANT (the empty string) is stored in slot zero of
|
2001-11-21 21:45:40 +00:00
|
|
|
* the UVector.
|
2001-08-15 19:06:40 +00:00
|
|
|
*/
|
|
|
|
Hashtable specDAG;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Vector of public full IDs.
|
|
|
|
*/
|
|
|
|
UVector availableIDs;
|
2002-06-29 00:04:16 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* The address of this static class variable serves as this class's ID
|
|
|
|
* for ICU "poor man's RTTI".
|
|
|
|
*/
|
|
|
|
static const char fgClassID;
|
2001-08-15 19:06:40 +00:00
|
|
|
};
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
U_NAMESPACE_END
|
|
|
|
|
2002-09-20 01:54:48 +00:00
|
|
|
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
|
|
|
|
2001-08-15 19:06:40 +00:00
|
|
|
#endif
|
|
|
|
//eof
|