scuffed-code/icu4c/source/common/ucnv_bld.c
2000-01-13 21:11:42 +00:00

1040 lines
30 KiB
C

/*
********************************************************************
* COPYRIGHT:
* Copyright (c) 1996-1999, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*
* uconv_bld.c:
*
* Defines functions that are used in the creation/initialization/deletion
* of converters and related structures.
* uses uconv_io.h routines to access disk information
* is used by ucnv.h to implement public API create/delete/flushCache routines
*/
#include "ucnv_io.h"
#include "uhash.h"
#include "ucmp16.h"
#include "ucmp8.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv_err.h"
#include "ucnv_imp.h"
#include "unicode/udata.h"
#include "unicode/ucnv.h"
#include "umutex.h"
#include "cstring.h"
#include "cmemory.h"
#include "filestrm.h"
#include <stdio.h>
/*Array used to generate ALGORITHMIC_CONVERTERS_HASHTABLE
*should ALWAYS BE EMPTY STRING TERMINATED.
*/
static const char *algorithmicConverterNames[] = {
"LATIN_1",
"UTF8",
"UTF16_BigEndian",
"UTF16_LittleEndian",
"UTF16_PlatformEndian",
"UTF16_OppositeEndian",
"ISO_2022",
"JIS",
"EUC",
"GB",
""
};
/*Takes an alias name gets an actual converter file name
*goes to disk and opens it.
*allocates the memory and returns a new UConverter object
*/
static UConverter *createConverterFromFile (const char *converterName, UErrorCode * err);
static UConverter *createConverterFromAlgorithmicType (const char *realName, UErrorCode * err);
/*Given a file returns a newly allocated CompactByteArray based on the a serialized one */
static CompactByteArray *createCompactByteArrayFromFile (FileStream * infile, UErrorCode * err);
/*Given a file returns a newly allocated CompactShortArray based on the a serialized one */
static CompactShortArray *createCompactShortArrayFromFile (FileStream * infile, UErrorCode * err);
/*Currently we have function to take us from a codepage name to
*a platform type and a codepage number
*assuming the following
*codepage name = $PLATFORM-#CODEPAGE
*e.g. ibm-949 = platform type = UCNV_IBM and codepage # = 949
*the functions below implement that
*/
static UConverterPlatform getPlatformFromName (char *name);
static int32_t getCodepageNumberFromName (char *name);
static UConverterType getAlgorithmicTypeFromName (const char *realName);
/*these functions initialize the lightweight mutable part of the
*object to correct values based on the sharedData defaults.
*/
static void initializeDataConverter (UConverter * myConverter);
static void initializeAlgorithmicConverter (UConverter * myConverter);
/**
*hash function for UConverterSharedData
*/
static int32_t uhash_hashSharedData (void *sharedData);
/**
* Un flatten shared data from a UDATA..
*/
U_CAPI UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(const UConverterSharedData *data, UErrorCode *status);
/*initializes some global variables */
UHashtable *SHARED_DATA_HASHTABLE = NULL;
UHashtable *ALGORITHMIC_CONVERTERS_HASHTABLE = NULL;
/*Returns uppercased string */
char *
strtoupper (char *name)
{
int32_t i = 0;
while (name[i] = uprv_toupper (name[i]))
i++;
return name;
}
/* Returns true in c is a in set 'setOfChars', false otherwise
*/
bool_t
isInSet (char c, const char *setOfChars)
{
uint8_t i = 0;
while (setOfChars[i] != '\0')
{
if (c == setOfChars[i++])
return TRUE;
}
return FALSE;
}
/* Returns pointer to the next non-whitespace (or non-separator)
*/
int32_t
nextTokenOffset (const char *line, const char *separators)
{
int32_t i = 0;
while (line[i] && isInSet (line[i], separators))
i++;
return i;
}
/* Returns pointer to the next token based on the set of separators
*/
char *
getToken (char *token, char *line, const char *separators)
{
int32_t i = nextTokenOffset (line, separators);
int8_t j = 0;
while (line[i] && (!isInSet (line[i], separators)))
token[j++] = line[i++];
token[j] = '\0';
return line + i;
}
int32_t uhash_hashIString(const void* name)
{
char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
uprv_strcpy(myName, (char*)name);
strtoupper(myName);
return uhash_hashString(myName);
}
CompactShortArray* createCompactShortArrayFromFile (FileStream * infile, UErrorCode * err)
{
int32_t i = 0;
int16_t *myShortArray = NULL;
uint16_t *myIndexArray = NULL;
int32_t myValuesCount = 0;
int32_t myIndexCount = 0;
int32_t myBlockShift = 0;
if (U_FAILURE (*err))
return NULL;
/*reads in the lengths of the 2 serialized array */
T_FileStream_read (infile, &myValuesCount, sizeof (int32_t));
T_FileStream_read (infile, &myIndexCount, sizeof (int32_t));
T_FileStream_read (infile, &myBlockShift, sizeof (int32_t));
if (myValuesCount < 0)
{
*err = U_INVALID_TABLE_FILE;
return NULL;
}
myShortArray = (int16_t *) uprv_malloc (myValuesCount * sizeof (int16_t));
if (myShortArray == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/*reads in the first array */
T_FileStream_read (infile, myShortArray, myValuesCount * sizeof (int16_t));
if (myIndexCount < 0)
{
uprv_free (myShortArray);
*err = U_INVALID_TABLE_FILE;
return NULL;
}
myIndexArray = (uint16_t *) uprv_malloc (myIndexCount * sizeof (uint16_t));
if (myIndexArray == NULL)
{
uprv_free (myShortArray);
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/*reads in the second array */
T_FileStream_read (infile, myIndexArray, myIndexCount * sizeof (uint16_t));
/*create a compact array from the data just read
*that adopts our newly created arrays
*/
return ucmp16_openAdoptWithBlockShift (myIndexArray, myShortArray, myValuesCount, 0, myBlockShift);
}
CompactByteArray* createCompactByteArrayFromFile (FileStream * infile,
UErrorCode * err)
{
int32_t i = 0;
int8_t *myByteArray = NULL;
uint16_t *myIndexArray = NULL;
int32_t myValuesCount = 0;
int32_t myIndexCount = 0;
if (U_FAILURE (*err))
return NULL;
/*reads in the lengths of the 2 serialized array */
T_FileStream_read (infile, &myValuesCount, sizeof (int32_t));
T_FileStream_read (infile, &myIndexCount, sizeof (int32_t));
if (myValuesCount < 0)
{
*err = U_INVALID_TABLE_FILE;
return NULL;
}
myByteArray = (int8_t *) uprv_malloc (myValuesCount * sizeof (int8_t));
if (myByteArray == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/*reads in the first array */
T_FileStream_read (infile, myByteArray, myValuesCount * sizeof (int8_t));
if (myIndexCount < 0)
{
uprv_free (myByteArray);
*err = U_INVALID_TABLE_FILE;
return NULL;
}
myIndexArray = (uint16_t *) uprv_malloc (myIndexCount * sizeof (uint16_t));
if (myIndexArray == NULL)
{
uprv_free (myByteArray);
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/*reads in the second array */
T_FileStream_read (infile, myIndexArray, myIndexCount * sizeof (uint16_t));
/*create a compact array from the data just read
*that adopts our newly created arrays
*/
return ucmp8_openAdopt (myIndexArray, myByteArray, myValuesCount);
}
static bool_t
isCnvAcceptable(void *context,
const char *type, const char *name,
UDataInfo *pInfo) {
return
pInfo->size>=20 &&
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
pInfo->charsetFamily==U_CHARSET_FAMILY &&
pInfo->sizeofUChar==U_SIZEOF_UCHAR &&
pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */
pInfo->dataFormat[1]==0x6e &&
pInfo->dataFormat[2]==0x76 &&
pInfo->dataFormat[3]==0x74 &&
pInfo->formatVersion[0]==2;
}
#define DATA_TYPE "cnv"
UConverter* createConverterFromFile (const char *fileName, UErrorCode * err)
{
int32_t i = 0;
const int8_t *myByteArray = NULL;
const uint16_t *myIndexArray = NULL;
int32_t myValuesCount = 0;
int32_t myIndexCount = 0;
UConverter *myConverter = NULL;
int8_t errorLevel = 0;
UDataMemory *data;
if (err == NULL || U_FAILURE (*err)) {
return NULL;
}
data = udata_openChoice(NULL, DATA_TYPE, fileName, isCnvAcceptable, NULL, err);
if(U_FAILURE(*err))
{
return NULL;
}
myConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myConverter == NULL)
{
udata_close(data);
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
myConverter->sharedData =
(UConverterSharedData *) udata_getMemory(data);
if (myConverter->sharedData == NULL)
{
udata_close(data);
uprv_free (myConverter);
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/* clone it. OK to drop the original sharedData */
myConverter->sharedData = ucnv_data_unFlattenClone(myConverter->sharedData, err);
myConverter->sharedData->dataMemory = (void*)data; /* for future use */
if(U_FAILURE(*err))
{
udata_close(data);
uprv_free (myConverter);
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
if (U_SUCCESS (*err))
{
initializeDataConverter (myConverter);
}
return myConverter;
}
void
copyPlatformString (char *platformString, UConverterPlatform pltfrm)
{
switch (pltfrm)
{
case UCNV_IBM:
{
uprv_strcpy (platformString, "ibm");
break;
}
default:
{
uprv_strcpy (platformString, "");
break;
}
};
return;
}
/*returns a converter type from a string
*/
UConverterType
getAlgorithmicTypeFromName (const char *realName)
{
if (uprv_strcmp (realName, "UTF8") == 0)
return UCNV_UTF8;
else if (uprv_strcmp (realName, "UTF16_BigEndian") == 0)
return UCNV_UTF16_BigEndian;
else if (uprv_strcmp (realName, "UTF16_LittleEndian") == 0)
return UCNV_UTF16_LittleEndian;
else if (uprv_strcmp (realName, "LATIN_1") == 0)
return UCNV_LATIN_1;
else if (uprv_strcmp (realName, "JIS") == 0)
return UCNV_JIS;
else if (uprv_strcmp (realName, "EUC") == 0)
return UCNV_EUC;
else if (uprv_strcmp (realName, "GB") == 0)
return UCNV_GB;
else if (uprv_strcmp (realName, "ISO_2022") == 0)
return UCNV_ISO_2022;
else if (uprv_strcmp (realName, "UTF16_PlatformEndian") == 0)
# if U_IS_BIG_ENDIAN
return UCNV_UTF16_BigEndian;
# else
return UCNV_UTF16_LittleEndian;
# endif
else if (uprv_strcmp (realName, "UTF16_OppositeEndian") == 0)
# if U_IS_BIG_ENDIAN
return UCNV_UTF16_LittleEndian;
# else
return UCNV_UTF16_BigEndian;
# endif
else
return UCNV_UNSUPPORTED_CONVERTER;
}
UConverterPlatform
getPlatformFromName (char *name)
{
char myPlatform[10];
char mySeparators[2] = {'-', '\0'};
getToken (myPlatform, name, mySeparators);
strtoupper (myPlatform);
if (uprv_strcmp (myPlatform, "IBM") == 0)
return UCNV_IBM;
else
return UCNV_UNKNOWN;
}
int32_t
getCodepageNumberFromName (char *name)
{
char myNumber[10];
char mySeparators[2] = {'-', '\0'};
char *line = NULL;
line = getToken (myNumber, name, mySeparators);
getToken (myNumber, line, mySeparators);
return T_CString_stringToInteger (myNumber, 10);
}
int32_t uhash_hashSharedData (void *sharedData)
{
return uhash_hashIString(((UConverterSharedData *) sharedData)->name);
}
/*Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
void shareConverterData (UConverterSharedData * data)
{
UErrorCode err = U_ZERO_ERROR;
/*Lazy evaluates the Hashtable itself */
if (SHARED_DATA_HASHTABLE == NULL)
{
UHashtable* myHT = uhash_openSize ((UHashFunction) uhash_hashSharedData,
ucnv_io_countAvailableAliases(&err),
&err);
if (U_FAILURE (err)) return;
umtx_lock (NULL);
if (SHARED_DATA_HASHTABLE == NULL) SHARED_DATA_HASHTABLE = myHT;
else uhash_close(myHT);
umtx_unlock (NULL);
}
umtx_lock (NULL);
uhash_put(SHARED_DATA_HASHTABLE,
data,
&err);
umtx_unlock (NULL);
return;
}
UConverterSharedData *getSharedConverterData (const char *name)
{
/*special case when no Table has yet been created we return NULL */
if (SHARED_DATA_HASHTABLE == NULL) return NULL;
else
/* return (UConverterSharedData *) uhash_get (SHARED_DATA_HASHTABLE, uhash_hashString (name));*/
{
UConverterSharedData *i = (UConverterSharedData*)uhash_get (SHARED_DATA_HASHTABLE, uhash_hashIString (name));
return i;
}
}
/*frees the string of memory blocks associates with a sharedConverter
*if and only if the referenceCounter == 0
*/
bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
{
if (deadSharedData->referenceCounter > 0)
return FALSE;
/* Note: if we have a dataMemory, then that means that all ucmp's came
from udata, and their tables will go away at the end
of this function. So, we need to simply dealloc the UCMP8's themselves.
We're guaranteed that they do not allocate any further memory.
When we have an API to simply 'init' a ucmp8, then no action at all will
need to happen. --srl
*/
switch (deadSharedData->conversionType)
{
case UCNV_SBCS:
{
ucmp8_close (deadSharedData->table->sbcs.fromUnicode);
uprv_free (deadSharedData->table);
};
break;
case UCNV_MBCS:
{
ucmp16_close (deadSharedData->table->mbcs.fromUnicode);
ucmp16_close (deadSharedData->table->mbcs.toUnicode);
uprv_free (deadSharedData->table);
};
break;
case UCNV_DBCS:
case UCNV_EBCDIC_STATEFUL:
{
ucmp16_close (deadSharedData->table->dbcs.fromUnicode);
ucmp16_close (deadSharedData->table->dbcs.toUnicode);
uprv_free (deadSharedData->table);
};
break;
default: ; /* semicolon makes MSVC happy */
};
if(deadSharedData->dataMemory != NULL)
{
UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory;
udata_close(data);
}
uprv_free (deadSharedData);
return TRUE;
}
bool_t isDataBasedConverter (const char *name)
{
int32_t i = 0;
bool_t result = FALSE;
UErrorCode err = U_ZERO_ERROR;
/*Lazy evaluates the hashtable */
if (ALGORITHMIC_CONVERTERS_HASHTABLE == NULL)
{
UHashtable* myHT;
{
myHT = uhash_open ((UHashFunction)uhash_hashIString, &err);
if (U_FAILURE (err)) return FALSE;
while (algorithmicConverterNames[i][0] != '\0')
{
/*Stores in the hashtable a pointer to the statically init'ed array containing
*the names
*/
uhash_put (myHT,
(void *) algorithmicConverterNames[i],
&err);
i++; /*Some Compilers (Solaris WSpro and MSVC-Release Mode
*don't differentiate between i++ and ++i
*so we have to increment in a line by itself
*/
}
}
umtx_lock (NULL);
if (ALGORITHMIC_CONVERTERS_HASHTABLE == NULL) ALGORITHMIC_CONVERTERS_HASHTABLE = myHT;
else uhash_close(myHT);
umtx_unlock (NULL);
}
if (uhash_get (ALGORITHMIC_CONVERTERS_HASHTABLE,
uhash_hashIString (name)) == NULL)
{
result = TRUE;
}
return result;
}
/*Logic determines if the converter is Algorithmic AND/OR cached
*depending on that:
* -we either go to get data from disk and cache it (Data=TRUE, Cached=False)
* -Get it from a Hashtable (Data=X, Cached=TRUE)
* -Call dataConverter initializer (Data=TRUE, Cached=TRUE)
* -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE)
*/
UConverter *
createConverter (const char *converterName, UErrorCode * err)
{
const char *realName;
UConverter *myUConverter = NULL;
UConverterSharedData *mySharedConverterData = NULL;
UErrorCode internalErrorCode = U_ZERO_ERROR;
bool_t isDefaultConverter;
if (U_FAILURE (*err))
return NULL;
/* In case "name" is NULL we want to open the default converter. */
if (converterName == NULL) {
converterName = ucnv_io_getDefaultConverterName();
if (converterName == NULL) {
*err = U_MISSING_RESOURCE_ERROR;
return NULL;
} else {
isDefaultConverter = TRUE;
}
} else {
isDefaultConverter = FALSE;
}
if (*converterName == 0) {
/* Use the PlatformInvariant algorithmic converter. */
realName = "PlatformInvariant";
} else if(isDefaultConverter) {
/* the default converter name is already canonical */
realName = converterName;
} else {
/* get the canonical converter name */
realName = ucnv_io_getConverterName(converterName, &internalErrorCode);
if (U_FAILURE(internalErrorCode) || realName == NULL) {
/*
* set the input name in case the converter was added
* without updating the alias table, or when there is no alias table
*/
realName = converterName;
}
}
if (isDataBasedConverter (realName))
{
mySharedConverterData = getSharedConverterData (realName);
if (mySharedConverterData == NULL)
{
/*Not cached, we need to stream it in from file */
myUConverter = createConverterFromFile (realName, err);
if (U_FAILURE (*err) || (myUConverter == NULL))
{
return myUConverter;
}
else
{
/*shared it with other library clients */
shareConverterData (myUConverter->sharedData);
return myUConverter;
}
}
else
{
/*Is already cached, point to an existing one */
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myUConverter == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/*update the reference counter: one more client */
umtx_lock (NULL);
mySharedConverterData->referenceCounter++;
umtx_unlock (NULL);
myUConverter->sharedData = mySharedConverterData;
initializeDataConverter (myUConverter);
return myUConverter;
}
}
else
{
/*with have an algorithmic converter */
mySharedConverterData = getSharedConverterData (realName);
/*Non cached */
if (mySharedConverterData == NULL)
{
myUConverter = createConverterFromAlgorithmicType (realName, err);
if (U_FAILURE (*err) || (myUConverter == NULL))
{
uprv_free (myUConverter);
return NULL;
}
else
{
/* put the shared object in shared table */
shareConverterData (myUConverter->sharedData);
return myUConverter;
}
}
else
{
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myUConverter == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/*Increase the reference counter */
umtx_lock (NULL);
mySharedConverterData->referenceCounter++;
umtx_unlock (NULL);
/*initializes the converter */
myUConverter->sharedData = mySharedConverterData;
initializeAlgorithmicConverter (myUConverter);
return myUConverter;
}
return myUConverter;
}
return NULL;
}
/*Initializes the mutable lightweight portion of the object
*By copying data from UConverter->sharedData->defaultConverter
*/
void initializeDataConverter (UConverter * myUConverter)
{
myUConverter->mode = UCNV_SI;
myUConverter->UCharErrorBufferLength = 0;
myUConverter->charErrorBufferLength = 0;
myUConverter->subCharLen = myUConverter->sharedData->defaultConverterValues.subCharLen;
uprv_memcpy (myUConverter->subChar,
myUConverter->sharedData->defaultConverterValues.subChar,
myUConverter->subCharLen);
myUConverter->toUnicodeStatus = 0x00;
myUConverter->fromUnicodeStatus = 0x00;
myUConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0x00;
myUConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
myUConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
myUConverter->extraInfo = NULL;
return;
}
/* This function initializes algorithmic converters
* based on there type
*/
void
initializeAlgorithmicConverter (UConverter * myConverter)
{
char UTF8_subChar[] = {(char) 0xFF, (char) 0xFF, (char) 0xFF};
char UTF16BE_subChar[] = {(char) 0xFF, (char) 0xFD};
char UTF16LE_subChar[] = {(char) 0xFD, (char) 0xFF};
char EUC_subChar[] = {(char) 0xAF, (char) 0xFE};
char GB_subChar[] = {(char) 0xFF, (char) 0xFF};
char JIS_subChar[] = {(char) 0xFF, (char) 0xFF};
char LATIN1_subChar = 0x1A;
myConverter->mode = UCNV_SI;
myConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
myConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
myConverter->charErrorBufferLength = 0;
myConverter->UCharErrorBufferLength = 0;
myConverter->extraInfo = NULL;
switch (myConverter->sharedData->conversionType)
{
case UCNV_UTF8:
{
myConverter->sharedData->minBytesPerChar = 1;
myConverter->sharedData->maxBytesPerChar = 4;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 3;
myConverter->subCharLen = 3;
myConverter->toUnicodeStatus = 0;
myConverter->fromUnicodeStatus = 0; /* srl */
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 1208;
uprv_strcpy(myConverter->sharedData->name, "UTF8");
uprv_memcpy (myConverter->subChar, UTF8_subChar, 3);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF8_subChar, 3);
break;
}
case UCNV_LATIN_1:
{
myConverter->sharedData->minBytesPerChar = 1;
myConverter->sharedData->maxBytesPerChar = 1;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 1;
myConverter->subCharLen = 1;
myConverter->toUnicodeStatus = 0;
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 819;
uprv_strcpy(myConverter->sharedData->name, "LATIN_1");
*(myConverter->subChar) = LATIN1_subChar;
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
break;
}
case UCNV_UTF16_BigEndian:
{
myConverter->sharedData->minBytesPerChar = 2;
myConverter->sharedData->maxBytesPerChar = 2;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
myConverter->fromUnicodeStatus = 0;
uprv_strcpy(myConverter->sharedData->name, "UTF_16BE");
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 1200;
uprv_memcpy (myConverter->subChar, UTF16BE_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16BE_subChar, 2);
break;
}
case UCNV_UTF16_LittleEndian:
{
myConverter->sharedData->minBytesPerChar = 2;
myConverter->sharedData->maxBytesPerChar = 2;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
myConverter->fromUnicodeStatus = 0;
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 1200;
uprv_strcpy(myConverter->sharedData->name, "UTF_16LE");
uprv_memcpy (myConverter->subChar, UTF16LE_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16LE_subChar, 2);
break;
}
case UCNV_EUC:
{
myConverter->sharedData->minBytesPerChar = 1;
myConverter->sharedData->maxBytesPerChar = 2;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
uprv_memcpy (myConverter->subChar, EUC_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, EUC_subChar, 2);
break;
}
case UCNV_ISO_2022:
{
myConverter->charErrorBuffer[0] = 0x1b;
myConverter->charErrorBuffer[1] = 0x25;
myConverter->charErrorBuffer[2] = 0x42;
myConverter->charErrorBufferLength = 3;
myConverter->sharedData->minBytesPerChar = 1;
myConverter->sharedData->maxBytesPerChar = 3;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 1;
myConverter->subCharLen = 1;
myConverter->toUnicodeStatus = 0;
myConverter->fromUnicodeStatus = 0; /* srl */
myConverter->sharedData->codepage = 2022;
uprv_strcpy(myConverter->sharedData->name, "ISO_2022");
*(myConverter->subChar) = LATIN1_subChar;
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
myConverter->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
((UConverterDataISO2022 *) myConverter->extraInfo)->currentConverter = NULL;
((UConverterDataISO2022 *) myConverter->extraInfo)->escSeq2022Length = 0;
break;
}
case UCNV_GB:
{
myConverter->sharedData->minBytesPerChar = 2;
myConverter->sharedData->maxBytesPerChar = 2;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
uprv_memcpy (myConverter->subChar, GB_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, GB_subChar, 2);
break;
}
case UCNV_JIS:
{
myConverter->sharedData->minBytesPerChar = 2;
myConverter->sharedData->maxBytesPerChar = 2;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
uprv_memcpy (myConverter->subChar, JIS_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, JIS_subChar, 2);
break;
}
default:
break;
};
myConverter->toUnicodeStatus = myConverter->sharedData->defaultConverterValues.toUnicodeStatus;
}
/*This function creates an algorithmic converter
*Note That even algorithmic converters are shared
* (The UConverterSharedData->table == NULL since
* there are no tables)
*for uniformity of design and control flow
*/
UConverter *
createConverterFromAlgorithmicType (const char *actualName, UErrorCode * err)
{
int32_t i = 0;
UConverter *myConverter = NULL;
UConverterSharedData *mySharedData = NULL;
UConverterType myType = getAlgorithmicTypeFromName (actualName);
if (U_FAILURE (*err))
return NULL;
myConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myConverter == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
myConverter->sharedData = NULL;
mySharedData = (UConverterSharedData *) uprv_malloc (sizeof (UConverterSharedData));
if (mySharedData == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
uprv_free (myConverter);
return NULL;
}
mySharedData->structSize = sizeof(UConverterSharedData);
mySharedData->table = NULL;
mySharedData->dataMemory = NULL;
uprv_strcpy (mySharedData->name, actualName);
/*Initializes the referenceCounter to 1 */
mySharedData->referenceCounter = 1;
mySharedData->platform = UCNV_UNKNOWN;
mySharedData->codepage = 0;
mySharedData->conversionType = myType;
myConverter->sharedData = mySharedData;
initializeAlgorithmicConverter (myConverter);
return myConverter;
}
UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *source, UErrorCode *status)
{
const uint8_t *raw, *oldraw;
UConverterSharedData *data = NULL;
if(U_FAILURE(*status))
return NULL;
if(source->structSize != sizeof(UConverterSharedData))
{
*status = U_INVALID_TABLE_FORMAT;
return NULL;
}
data = (UConverterSharedData*) malloc(sizeof(UConverterSharedData));
raw = (uint8_t*)source;
uprv_memcpy(data,source,sizeof(UConverterSharedData));
raw += data->structSize;
/* data->table = (UConverterTable*)raw; */
switch (data->conversionType)
{
case UCNV_SBCS:
data->table = malloc(sizeof(UConverterSBCSTable));
data->table->sbcs.toUnicode = (UChar*)raw;
raw += sizeof(UChar)*256;
data->table->sbcs.fromUnicode = ucmp8_cloneFromData(&raw, status);
break;
case UCNV_EBCDIC_STATEFUL:
case UCNV_DBCS:
data->table = uprv_malloc(sizeof(UConverterDBCSTable));
oldraw = raw;
data->table->dbcs.toUnicode=ucmp16_cloneFromData(&raw, status);
while((raw-oldraw)%4) /* pad to 4 */
raw++;
data->table->dbcs.fromUnicode =ucmp16_cloneFromData(&raw, status);
break;
case UCNV_MBCS:
data->table = uprv_malloc(sizeof(UConverterMBCSTable));
data->table->mbcs.starters = (bool_t*)raw;
raw += sizeof(bool_t)*256;
oldraw = raw;
data->table->mbcs.toUnicode = ucmp16_cloneFromData(&raw, status);
while((raw-oldraw)%4) /* pad to 4 */
raw++;
data->table->mbcs.fromUnicode = ucmp16_cloneFromData(&raw, status);
break;
default:
*status = U_INVALID_TABLE_FORMAT;
return NULL;
}
return data;
}