2000-06-28 19:43:17 +00:00
/*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2011-05-26 20:05:39 +00:00
* Copyright ( C ) 1999 - 2011 International Business Machines
2000-06-28 19:43:17 +00:00
* Corporation and others . All Rights Reserved .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
*
* ucnv_bld . h :
2001-03-16 20:49:41 +00:00
* Contains internal data structure definitions
* Created by Bertrand A . Damiba
2000-06-28 19:43:17 +00:00
*
* Change history :
*
* 06 / 29 / 2000 helena Major rewrite of the callback APIs .
*/
# ifndef UCNV_BLD_H
# define UCNV_BLD_H
# include "unicode/utypes.h"
2004-08-26 22:51:40 +00:00
# if !UCONFIG_NO_CONVERSION
2000-06-28 19:43:17 +00:00
# include "unicode/ucnv.h"
# include "unicode/ucnv_err.h"
2003-11-08 00:09:50 +00:00
# include "ucnv_cnv.h"
# include "ucnvmbcs.h"
2003-10-25 00:29:13 +00:00
# include "ucnv_ext.h"
2003-09-22 13:57:19 +00:00
# include "udataswp.h"
2001-10-08 23:26:58 +00:00
2000-07-17 19:47:36 +00:00
/* size of the overflow buffers in UConverter, enough for escaping callbacks */
# define UCNV_ERROR_BUFFER_LENGTH 32
2003-07-22 04:22:57 +00:00
/* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
2000-07-17 19:47:36 +00:00
# define UCNV_MAX_SUBCHAR_LEN 4
2000-06-28 19:43:17 +00:00
2003-07-22 04:22:57 +00:00
/* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
# define UCNV_MAX_CHAR_LEN 8
2002-09-18 01:27:37 +00:00
/* converter options bits */
# define UCNV_OPTION_VERSION 0xf
# define UCNV_OPTION_SWAP_LFNL 0x10
2009-04-01 19:31:33 +00:00
# define UCNV_GET_VERSION(cnv) ((cnv)->options&UCNV_OPTION_VERSION)
2002-09-18 01:27:37 +00:00
2000-06-28 19:43:17 +00:00
U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
itself is compiled under C + + , the linkage of the funcptrs will
work .
*/
2003-11-08 00:09:50 +00:00
union UConverterTable {
UConverterMBCSTable mbcs ;
} ;
2000-06-28 19:43:17 +00:00
typedef union UConverterTable UConverterTable ;
struct UConverterImpl ;
typedef struct UConverterImpl UConverterImpl ;
2000-12-19 23:07:50 +00:00
/** values for the unicodeMask */
# define UCNV_HAS_SUPPLEMENTARY 1
# define UCNV_HAS_SURROGATES 2
2000-10-03 20:18:22 +00:00
typedef struct UConverterStaticData { /* +offset: size */
uint32_t structSize ; /* +0: 4 Size of this structure */
2000-06-28 19:43:17 +00:00
2000-10-03 20:18:22 +00:00
char name
[ UCNV_MAX_CONVERTER_NAME_LENGTH ] ; /* +4: 60 internal name of the converter- invariant chars */
2000-06-28 19:43:17 +00:00
2000-10-03 20:18:22 +00:00
int32_t codepage ; /* +64: 4 codepage # (now IBM-$codepage) */
2000-06-28 19:43:17 +00:00
2000-10-03 20:18:22 +00:00
int8_t platform ; /* +68: 1 platform of the converter (only IBM now) */
int8_t conversionType ; /* +69: 1 conversion type */
2000-06-28 19:43:17 +00:00
2000-10-03 20:18:22 +00:00
int8_t minBytesPerChar ; /* +70: 1 Minimum # bytes per char in this codepage */
2003-10-30 22:54:18 +00:00
int8_t maxBytesPerChar ; /* +71: 1 Maximum # bytes output per UChar in this codepage */
2000-06-28 19:43:17 +00:00
2000-10-03 20:18:22 +00:00
uint8_t subChar [ UCNV_MAX_SUBCHAR_LEN ] ; /* +72: 4 [note: 4 and 8 byte boundary] */
int8_t subCharLen ; /* +76: 1 */
uint8_t hasToUnicodeFallback ; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
uint8_t hasFromUnicodeFallback ; /* +78: 1 */
2000-12-19 23:07:50 +00:00
uint8_t unicodeMask ; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
2001-02-26 19:45:35 +00:00
uint8_t subChar1 ; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
2001-08-24 01:02:37 +00:00
uint8_t reserved [ 19 ] ; /* +81: 19 to round out the structure */
2000-10-03 20:18:22 +00:00
/* total size: 100 */
2000-06-28 19:43:17 +00:00
} UConverterStaticData ;
/*
* Defines the UConverterSharedData struct ,
* the immutable , shared part of UConverter .
*/
2000-07-13 16:51:48 +00:00
struct UConverterSharedData {
2000-06-28 19:43:17 +00:00
uint32_t structSize ; /* Size of this structure */
uint32_t referenceCounter ; /* used to count number of clients, 0xffffffff for static SharedData */
2002-07-17 02:34:50 +00:00
const void * dataMemory ; /* from udata_openChoice() - for cleanup */
2003-11-08 00:09:50 +00:00
void * table ; /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */
2000-06-28 19:43:17 +00:00
const UConverterStaticData * staticData ; /* pointer to the static (non changing) data. */
2002-07-17 02:34:50 +00:00
UBool sharedDataCached ; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
/*UBool staticDataOwned; TRUE if static data owned by shared data & should be freed with it, NEVER true for udata() loaded statics. This ignored variable was removed to make space for sharedDataCached. */
2000-06-28 19:43:17 +00:00
const UConverterImpl * impl ; /* vtable-style struct of mostly function pointers */
/*initial values of some members of the mutable part of object */
uint32_t toUnicodeStatus ;
2003-11-08 00:09:50 +00:00
/*
* Shared data structures currently come in two flavors :
* - readonly for built - in algorithmic converters
* - allocated for MBCS , with a pointer to an allocated UConverterTable
* which always has a UConverterMBCSTable
*
* To eliminate one allocation , I am making the UConverterMBCSTable
* a member of the shared data . It is the last member so that static
* definitions of UConverterSharedData work as before .
* The table field above also remains to avoid updating all static
* definitions , but is now unused .
*
* markus 2003 - nov - 07
*/
UConverterMBCSTable mbcs ;
} ;
2000-06-28 19:43:17 +00:00
/* Defines a UConverter, the lightweight mutable part the user sees */
struct UConverter {
/*
* Error function pointer called when conversion issues
* occur during a ucnv_fromUnicode call
*/
2001-11-21 01:02:11 +00:00
void ( U_EXPORT2 * fromUCharErrorBehaviour ) ( const void * context ,
2000-06-28 19:43:17 +00:00
UConverterFromUnicodeArgs * args ,
const UChar * codeUnits ,
int32_t length ,
UChar32 codePoint ,
UConverterCallbackReason reason ,
UErrorCode * ) ;
/*
* Error function pointer called when conversion issues
2003-08-01 15:03:46 +00:00
* occur during a ucnv_toUnicode call
2000-06-28 19:43:17 +00:00
*/
2001-11-21 01:02:11 +00:00
void ( U_EXPORT2 * fromCharErrorBehaviour ) ( const void * context ,
2000-06-28 19:43:17 +00:00
UConverterToUnicodeArgs * args ,
const char * codeUnits ,
int32_t length ,
UConverterCallbackReason reason ,
UErrorCode * ) ;
/*
2002-09-18 01:27:37 +00:00
* Pointer to additional data that depends on the converter type .
* Used by ISO 2022 , SCSU , GB 18030 converters , possibly more .
2000-06-28 19:43:17 +00:00
*/
void * extraInfo ;
2002-08-06 00:55:25 +00:00
const void * fromUContext ;
const void * toUContext ;
2006-07-05 23:08:50 +00:00
/*
* Pointer to charset bytes for substitution string if subCharLen > 0 ,
* or pointer to Unicode string ( UChar * ) if subCharLen < 0.
* subCharLen = = 0 is equivalent to using a skip callback .
* If the pointer is ! = subUChars then it is allocated with
* UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes .
* The subUChars field is declared as UChar [ ] not uint8_t [ ] to
* guarantee alignment for UChars .
*/
uint8_t * subChars ;
2002-08-06 00:55:25 +00:00
UConverterSharedData * sharedData ; /* Pointer to the shared immutable part of the converter object */
2002-09-18 01:27:37 +00:00
uint32_t options ; /* options flags from UConverterOpen, may contain additional bits */
2002-08-06 00:55:25 +00:00
UBool sharedDataIsCached ; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
2003-05-13 21:05:05 +00:00
UBool isCopyLocal ; /* TRUE if UConverter is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
UBool isExtraLocal ; /* TRUE if extraInfo is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
2002-08-06 00:55:25 +00:00
UBool useFallback ;
int8_t toULength ; /* number of bytes in toUBytes */
2003-07-22 04:22:57 +00:00
uint8_t toUBytes [ UCNV_MAX_CHAR_LEN - 1 ] ; /* more "toU status"; keeps the bytes of the current character */
2002-08-06 00:55:25 +00:00
uint32_t toUnicodeStatus ; /* Used to internalize stream status information */
int32_t mode ;
uint32_t fromUnicodeStatus ;
2003-07-24 00:28:47 +00:00
/*
* More fromUnicode ( ) status . Serves 3 purposes :
* - keeps a lead surrogate between buffers ( similar to toUBytes [ ] )
* - keeps a lead surrogate at the end of the stream ,
* which the framework handles as truncated input
* - if the fromUnicode ( ) implementation returns to the framework
* ( ucnv . c ucnv_fromUnicode ( ) ) , then the framework calls the callback
* for this code point
*/
UChar32 fromUChar32 ;
2002-08-06 00:55:25 +00:00
2003-10-30 22:54:18 +00:00
/*
* value for ucnv_getMaxCharSize ( )
*
* usually simply copied from the static data , but ucnvmbcs . c modifies
* the value depending on the converter type and options
*/
int8_t maxBytesPerUChar ;
2002-08-06 00:55:25 +00:00
int8_t subCharLen ; /* length of the codepage specific character sequence */
int8_t invalidCharLength ;
int8_t charErrorBufferLength ; /* number of valid bytes in charErrorBuffer */
int8_t invalidUCharLength ;
int8_t UCharErrorBufferLength ; /* number of valid UChars in charErrorBuffer */
uint8_t subChar1 ; /* single-byte substitution character if different from subChar */
2003-10-25 00:29:13 +00:00
UBool useSubChar1 ;
2003-07-22 04:22:57 +00:00
char invalidCharBuffer [ UCNV_MAX_CHAR_LEN ] ; /* bytes from last error/callback situation */
2002-08-06 00:55:25 +00:00
uint8_t charErrorBuffer [ UCNV_ERROR_BUFFER_LENGTH ] ; /* codepage output from Error functions */
2006-07-05 23:08:50 +00:00
UChar subUChars [ UCNV_MAX_SUBCHAR_LEN / U_SIZEOF_UCHAR ] ; /* see subChars documentation */
2002-08-06 00:55:25 +00:00
2003-07-22 04:22:57 +00:00
UChar invalidUCharBuffer [ U16_MAX_LENGTH ] ; /* UChars from last error/callback situation */
2002-08-06 00:55:25 +00:00
UChar UCharErrorBuffer [ UCNV_ERROR_BUFFER_LENGTH ] ; /* unicode output from Error functions */
2003-10-25 00:29:13 +00:00
/* fields for conversion extension */
/* store previous UChars/chars to continue partial matches */
UChar32 preFromUFirstCP ; /* >=0: partial match */
UChar preFromU [ UCNV_EXT_MAX_UCHARS ] ;
char preToU [ UCNV_EXT_MAX_BYTES ] ;
int8_t preFromULength , preToULength ; /* negative: replay */
int8_t preToUFirstLength ; /* length of first character */
2008-03-12 23:20:11 +00:00
/* new fields for ICU 4.0 */
UConverterCallbackReason toUCallbackReason ; /* (*fromCharErrorBehaviour) reason, set when error is detected */
2000-06-28 19:43:17 +00:00
} ;
U_CDECL_END /* end of UConverter */
# define CONVERTER_FILE_EXTENSION ".cnv"
2005-05-08 07:56:44 +00:00
/**
* Return the number of all converter names .
* @ param pErrorCode The error code
* @ return the number of all converter names
*/
U_CFUNC uint16_t
ucnv_bld_countAvailableConverters ( UErrorCode * pErrorCode ) ;
/**
* Return the ( n ) th converter name in mixed case , or NULL
* if there is none ( typically , if the data cannot be loaded ) .
* 0 < = index < ucnv_io_countAvailableConverters ( ) .
* @ param n The number specifies which converter name to get
* @ param pErrorCode The error code
* @ return the ( n ) th converter name in mixed case , or NULL if there is none .
*/
U_CFUNC const char *
ucnv_bld_getAvailableConverter ( uint16_t n , UErrorCode * pErrorCode ) ;
2003-11-08 00:09:50 +00:00
/**
* Load a non - algorithmic converter .
* If pkg = = NULL , then this function must be called inside umtx_lock ( & cnvCacheMutex ) .
*/
2011-05-26 20:05:39 +00:00
U_CAPI UConverterSharedData *
2003-11-12 02:04:55 +00:00
ucnv_load ( UConverterLoadArgs * pArgs , UErrorCode * err ) ;
2003-11-08 00:09:50 +00:00
/**
* Unload a non - algorithmic converter .
* It must be sharedData - > referenceCounter ! = ~ 0
* and this function must be called inside umtx_lock ( & cnvCacheMutex ) .
*/
2011-05-26 20:05:39 +00:00
U_CAPI void
2003-11-08 00:09:50 +00:00
ucnv_unload ( UConverterSharedData * sharedData ) ;
2003-09-22 13:57:19 +00:00
/**
* Swap ICU . cnv conversion tables . See udataswp . h .
* @ internal
*/
U_CAPI int32_t U_EXPORT2
ucnv_swap ( const UDataSwapper * ds ,
const void * inData , int32_t length , void * outData ,
UErrorCode * pErrorCode ) ;
2004-08-26 22:51:40 +00:00
# endif
2000-06-28 19:43:17 +00:00
# endif /* _UCNV_BLD */