scuffed-code/icu4c/source/common/unicode/ucnvsel.h

170 lines
6.1 KiB
C
Raw Normal View History

/*
*******************************************************************************
*
* Copyright (C) 2008, International Business Machines
* Corporation, Google and others. All Rights Reserved.
*
*******************************************************************************
*/
// Author : eldawy@google.com (Mohamed Eldawy)
// ucnvsel.h
//
// Purpose: To generate a list of encodings capable of handling
// a given Unicode text
//
// Started 09-April-2008
#ifndef __ICU_UCNV_SEL_H__
#define __ICU_UCNV_SEL_H__
#include "unicode/uset.h"
#include "unicode/utypes.h"
#include "unicode/utf16.h"
#include "unicode/uenum.h"
#include "unicode/ucnv.h"
/**
* \file
*
* This is the declarations for the encoding selector.
* The goal is, given a unicode string, find the encodings
* this string can be mapped to.
*
*/
/**
* The selector data structure
*/
struct UConverterSelector;
typedef struct UConverterSelector UConverterSelector;
/**
* open a selector. If converterList is NULL, build for all converters. If excludedCodePoints
* is NULL, don't exclude any codepoints
*
*
* @param converterList a pointer to encoding names needed to be involved.
* NULL means build a selector for all possible converters
* @param converterListSize number of encodings in above list.
* Setting converterListSize to 0, builds a selector for all
* converters. ucnvsel_open() does not transfer ownership to this
* array. Once uncvsel_open() returns, the caller is free to reuse/destroy
* the array.
* @param excludedCodePoints a set of codepoints to be excluded from
* consideration. set to NULL to exclude nothing
* @param whichset what converter set to use? use this to determine whether
* to construct selector for fallback or for roundtrip only mappings
* @param status an in/out ICU UErrorCode
* @return a pointer to the created selector
*
* @draft ICU 4.2
*/
U_CAPI UConverterSelector* ucnvsel_open(const char* const* converterList,
int32_t converterListSize,
const USet* excludedCodePoints,
const UConverterUnicodeSet whichSet,
UErrorCode* status);
/* close opened selector */
/**
* closes a selector. and releases allocated memory
* if any Enumerations were returned by ucnv_select*, they become invalid.
* They can be closed before or after calling ucnv_closeSelector,
* but should never be used after selector is closed
*
* @see ucnv_selectForString
* @see ucnv_selectForUTF8
*
* @param sel selector to close
*
* @draft ICU 4.2
*/
U_CAPI void ucnvsel_close(UConverterSelector *sel);
/**
* unserialize a selector from a linear buffer. No alignment necessary.
* the function does NOT take ownership of the given buffer. Caller is free
* to reuse/destroy buffer immediately after calling this function
* Unserializing a selector is much faster than creating it from scratch
* and is nicer on the heap (not as many allocations and frees)
* ucnvsel_open() is expensive. Therefore, it is desirable to unserialize the data structre
* rather than building it from scratch.
*
* @param buffer pointer to a linear buffer containing serialized data
* @param length the capacity of this buffer (can be equal to or larger than
the actual data length)
* @param status an in/out ICU UErrorCode
* @return a pointer to the created selector
*
* @draft ICU 4.2
*/
U_CAPI UConverterSelector* ucnvsel_unserialize(const char* buffer,
int32_t length,
UErrorCode* status);
/**
* serialize a selector into a linear buffer. No alignment necessary
* The current serialized form is portable to different Endianness, and can
* travel between ASCII and EBCDIC systems
*
* @param sel selector to consider
* @param buffer pointer to a linear buffer to receive data
* @param bufferCapacity the capacity of this buffer
* @param status an in/out ICU UErrorCode
* @return the required buffer capacity to hold serialize data (even if the call fails
with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
*
* @draft ICU 4.2
*/
U_CAPI int32_t ucnvsel_serialize(const UConverterSelector* sel,
char* buffer,
int32_t bufferCapacity,
UErrorCode* status);
/**
* check a UTF16 string using the selector. Find out what encodings it can be mapped to
*
*
* @param sel built selector
* @param s pointer to UTF16 string
* @param length length of UTF16 string in UChars, or -1 if NULL terminated
* @param status an in/out ICU UErrorCode
* @return an enumeration containing encoding names. Returned encoding names
* will be the same as supplied to ucnv_openSelector, or will be the
* canonical names if selector was built for all encodings.
* The order of encodings will be the same as supplied by the call to
* ucnv_openSelector (if encodings were supplied)
*
* @draft ICU 4.2
*/
U_CAPI UEnumeration *ucnvsel_selectForString(const UConverterSelector*, const UChar *s,
int32_t length, UErrorCode *status);
/**
* check a UTF8 string using the selector. Find out what encodings it can be
* mapped to illegal codepoints will be ignored by this function! Only legal
* codepoints will be considered for conversion
*
* @param sel built selector
* @param s pointer to UTF8 string
* @param length length of UTF8 string (in chars), or -1 if NULL terminated
* @param status an in/out ICU UErrorCode
* @return an enumeration containing encoding names. Returned encoding names
* will be the same as supplied to ucnv_openSelector, or will be the canonical
* names if selector was built for all encodings.
* The order of encodings will be the same as supplied by the call to
* ucnv_openSelector (if encodings were supplied)
*
* @draft ICU 4.2
*/
U_CAPI UEnumeration *ucnvsel_selectForUTF8(const UConverterSelector*,
const char *s,
int32_t length,
UErrorCode *status);
#endif // __ICU_UCNV_SEL_H__