2008-08-04 21:56:02 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
2010-01-23 06:36:03 +00:00
|
|
|
* Copyright (C) 2008-2010, International Business Machines
|
2008-08-04 21:56:02 +00:00
|
|
|
* Corporation, Google and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
*/
|
2008-08-06 21:34:53 +00:00
|
|
|
/*
|
|
|
|
* Author : eldawy@google.com (Mohamed Eldawy)
|
|
|
|
* ucnvsel.h
|
|
|
|
*
|
|
|
|
* Purpose: To generate a list of encodings capable of handling
|
|
|
|
* a given Unicode text
|
|
|
|
*
|
|
|
|
* Started 09-April-2008
|
|
|
|
*/
|
2008-08-04 21:56:02 +00:00
|
|
|
|
|
|
|
#ifndef __ICU_UCNV_SEL_H__
|
|
|
|
#define __ICU_UCNV_SEL_H__
|
|
|
|
|
|
|
|
#include "unicode/uset.h"
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#include "unicode/utf16.h"
|
|
|
|
#include "unicode/uenum.h"
|
|
|
|
#include "unicode/ucnv.h"
|
2009-11-14 00:36:06 +00:00
|
|
|
#include "unicode/localpointer.h"
|
2008-08-04 21:56:02 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* \file
|
|
|
|
*
|
2008-10-24 04:35:50 +00:00
|
|
|
* A converter selector is built with a set of encoding/charset names
|
|
|
|
* and given an input string returns the set of names of the
|
|
|
|
* corresponding converters which can convert the string.
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
2008-10-24 04:35:50 +00:00
|
|
|
* A converter selector can be serialized into a buffer and reopened
|
|
|
|
* from the serialized form.
|
2008-08-04 21:56:02 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
2008-09-27 01:12:32 +00:00
|
|
|
* @{
|
2008-08-04 21:56:02 +00:00
|
|
|
* The selector data structure
|
|
|
|
*/
|
|
|
|
struct UConverterSelector;
|
|
|
|
typedef struct UConverterSelector UConverterSelector;
|
2008-09-27 01:12:32 +00:00
|
|
|
/** @} */
|
2008-08-04 21:56:02 +00:00
|
|
|
|
|
|
|
/**
|
2008-10-24 04:35:50 +00:00
|
|
|
* Open a selector.
|
|
|
|
* If converterListSize is 0, build for all available converters.
|
|
|
|
* If excludedCodePoints is NULL, don't exclude any code points.
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
|
|
|
* @param converterList a pointer to encoding names needed to be involved.
|
2008-10-24 04:35:50 +00:00
|
|
|
* Can be NULL if converterListSize==0.
|
|
|
|
* The list and the names will be cloned, and the caller
|
|
|
|
* retains ownership of the original.
|
|
|
|
* @param converterListSize number of encodings in above list.
|
|
|
|
* If 0, builds a selector for all available converters.
|
|
|
|
* @param excludedCodePoints a set of code points to be excluded from consideration.
|
|
|
|
* That is, excluded code points in a string do not change
|
|
|
|
* the selection result. (They might be handled by a callback.)
|
|
|
|
* Use NULL to exclude nothing.
|
|
|
|
* @param whichSet what converter set to use? Use this to determine whether
|
|
|
|
* to consider only roundtrip mappings or also fallbacks.
|
2008-08-04 21:56:02 +00:00
|
|
|
* @param status an in/out ICU UErrorCode
|
2008-10-24 04:35:50 +00:00
|
|
|
* @return the new selector
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
2010-01-28 21:09:53 +00:00
|
|
|
* @stable ICU 4.2
|
2008-08-04 21:56:02 +00:00
|
|
|
*/
|
2010-01-23 06:36:03 +00:00
|
|
|
U_STABLE UConverterSelector* U_EXPORT2
|
2008-10-24 04:35:50 +00:00
|
|
|
ucnvsel_open(const char* const* converterList, int32_t converterListSize,
|
|
|
|
const USet* excludedCodePoints,
|
|
|
|
const UConverterUnicodeSet whichSet, UErrorCode* status);
|
2008-08-04 21:56:02 +00:00
|
|
|
|
|
|
|
/**
|
2008-10-24 04:35:50 +00:00
|
|
|
* Closes a selector.
|
|
|
|
* If any Enumerations were returned by ucnv_select*, they become invalid.
|
2008-08-04 21:56:02 +00:00
|
|
|
* They can be closed before or after calling ucnv_closeSelector,
|
2008-10-24 04:35:50 +00:00
|
|
|
* but should never be used after the selector is closed.
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
|
|
|
* @see ucnv_selectForString
|
|
|
|
* @see ucnv_selectForUTF8
|
|
|
|
*
|
|
|
|
* @param sel selector to close
|
|
|
|
*
|
2010-01-28 21:09:53 +00:00
|
|
|
* @stable ICU 4.2
|
2008-08-04 21:56:02 +00:00
|
|
|
*/
|
2010-01-23 06:36:03 +00:00
|
|
|
U_STABLE void U_EXPORT2
|
2008-10-24 04:35:50 +00:00
|
|
|
ucnvsel_close(UConverterSelector *sel);
|
2008-08-04 21:56:02 +00:00
|
|
|
|
2009-12-17 07:13:28 +00:00
|
|
|
#if U_SHOW_CPLUSPLUS_API
|
2009-11-14 00:36:06 +00:00
|
|
|
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \class LocalUConverterSelectorPointer
|
|
|
|
* "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
|
|
|
|
* For most methods see the LocalPointerBase base class.
|
|
|
|
*
|
|
|
|
* @see LocalPointerBase
|
|
|
|
* @see LocalPointer
|
|
|
|
* @draft ICU 4.4
|
|
|
|
*/
|
|
|
|
U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
|
|
|
|
|
|
|
|
U_NAMESPACE_END
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2008-08-04 21:56:02 +00:00
|
|
|
/**
|
2008-10-24 04:35:50 +00:00
|
|
|
* Open a selector from its serialized form.
|
|
|
|
* The buffer must remain valid and unchanged for the lifetime of the selector.
|
|
|
|
* This is much faster than creating a selector from scratch.
|
|
|
|
* Using a serialized form from a different machine (endianness/charset) is supported.
|
|
|
|
*
|
|
|
|
* @param buffer pointer to the serialized form of a converter selector;
|
|
|
|
* must be 32-bit-aligned
|
2008-08-04 21:56:02 +00:00
|
|
|
* @param length the capacity of this buffer (can be equal to or larger than
|
2008-10-24 04:35:50 +00:00
|
|
|
* the actual data length)
|
2008-08-04 21:56:02 +00:00
|
|
|
* @param status an in/out ICU UErrorCode
|
2008-10-24 04:35:50 +00:00
|
|
|
* @return the new selector
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
2010-01-28 21:09:53 +00:00
|
|
|
* @stable ICU 4.2
|
2008-08-04 21:56:02 +00:00
|
|
|
*/
|
2010-01-23 06:36:03 +00:00
|
|
|
U_STABLE UConverterSelector* U_EXPORT2
|
2008-10-24 04:35:50 +00:00
|
|
|
ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
|
2008-08-04 21:56:02 +00:00
|
|
|
|
|
|
|
/**
|
2008-10-24 04:35:50 +00:00
|
|
|
* Serialize a selector into a linear buffer.
|
|
|
|
* The serialized form is portable to different machines.
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
|
|
|
* @param sel selector to consider
|
2008-10-24 04:35:50 +00:00
|
|
|
* @param buffer pointer to 32-bit-aligned memory to be filled with the
|
|
|
|
* serialized form of this converter selector
|
2008-08-04 21:56:02 +00:00
|
|
|
* @param bufferCapacity the capacity of this buffer
|
|
|
|
* @param status an in/out ICU UErrorCode
|
|
|
|
* @return the required buffer capacity to hold serialize data (even if the call fails
|
2008-10-24 04:35:50 +00:00
|
|
|
* with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
2010-01-28 21:09:53 +00:00
|
|
|
* @stable ICU 4.2
|
2008-08-04 21:56:02 +00:00
|
|
|
*/
|
2010-01-23 06:36:03 +00:00
|
|
|
U_STABLE int32_t U_EXPORT2
|
2008-10-24 04:35:50 +00:00
|
|
|
ucnvsel_serialize(const UConverterSelector* sel,
|
|
|
|
void* buffer, int32_t bufferCapacity, UErrorCode* status);
|
2008-08-04 21:56:02 +00:00
|
|
|
|
|
|
|
/**
|
2008-10-24 04:35:50 +00:00
|
|
|
* Select converters that can map all characters in a UTF-16 string,
|
|
|
|
* ignoring the excluded code points.
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
2008-10-24 04:35:50 +00:00
|
|
|
* @param sel a selector
|
|
|
|
* @param s UTF-16 string
|
|
|
|
* @param length length of the string, or -1 if NUL-terminated
|
2008-08-04 21:56:02 +00:00
|
|
|
* @param status an in/out ICU UErrorCode
|
2008-10-24 04:35:50 +00:00
|
|
|
* @return an enumeration containing encoding names.
|
|
|
|
* The returned encoding names and their order will be the same as
|
|
|
|
* supplied when building the selector.
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
2010-01-28 21:09:53 +00:00
|
|
|
* @stable ICU 4.2
|
2008-08-04 21:56:02 +00:00
|
|
|
*/
|
2010-01-23 06:36:03 +00:00
|
|
|
U_STABLE UEnumeration * U_EXPORT2
|
2008-10-24 04:35:50 +00:00
|
|
|
ucnvsel_selectForString(const UConverterSelector* sel,
|
|
|
|
const UChar *s, int32_t length, UErrorCode *status);
|
2008-08-04 21:56:02 +00:00
|
|
|
|
|
|
|
/**
|
2008-10-24 04:35:50 +00:00
|
|
|
* Select converters that can map all characters in a UTF-8 string,
|
|
|
|
* ignoring the excluded code points.
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
2008-10-24 04:35:50 +00:00
|
|
|
* @param sel a selector
|
|
|
|
* @param s UTF-8 string
|
|
|
|
* @param length length of the string, or -1 if NUL-terminated
|
2008-08-04 21:56:02 +00:00
|
|
|
* @param status an in/out ICU UErrorCode
|
2008-10-24 04:35:50 +00:00
|
|
|
* @return an enumeration containing encoding names.
|
|
|
|
* The returned encoding names and their order will be the same as
|
|
|
|
* supplied when building the selector.
|
2008-08-04 21:56:02 +00:00
|
|
|
*
|
2010-01-28 21:09:53 +00:00
|
|
|
* @stable ICU 4.2
|
2008-08-04 21:56:02 +00:00
|
|
|
*/
|
2010-01-23 06:36:03 +00:00
|
|
|
U_STABLE UEnumeration * U_EXPORT2
|
2008-10-24 04:35:50 +00:00
|
|
|
ucnvsel_selectForUTF8(const UConverterSelector* sel,
|
|
|
|
const char *s, int32_t length, UErrorCode *status);
|
2008-08-04 21:56:02 +00:00
|
|
|
|
2008-08-06 21:34:53 +00:00
|
|
|
#endif /* __ICU_UCNV_SEL_H__ */
|