423580d732
. X-SVN-Rev: 570
1040 lines
30 KiB
C
1040 lines
30 KiB
C
/*
|
|
********************************************************************
|
|
* COPYRIGHT:
|
|
* Copyright (c) 1996-1999, International Business Machines Corporation and
|
|
* others. All Rights Reserved.
|
|
********************************************************************
|
|
*
|
|
* uconv_bld.c:
|
|
*
|
|
* Defines functions that are used in the creation/initialization/deletion
|
|
* of converters and related structures.
|
|
* uses uconv_io.h routines to access disk information
|
|
* is used by ucnv.h to implement public API create/delete/flushCache routines
|
|
*/
|
|
|
|
|
|
#include "ucnv_io.h"
|
|
#include "uhash.h"
|
|
#include "ucmp16.h"
|
|
#include "ucmp8.h"
|
|
#include "unicode/ucnv_bld.h"
|
|
#include "unicode/ucnv_err.h"
|
|
#include "ucnv_imp.h"
|
|
#include "unicode/udata.h"
|
|
#include "unicode/ucnv.h"
|
|
#include "umutex.h"
|
|
#include "cstring.h"
|
|
#include "cmemory.h"
|
|
#include "filestrm.h"
|
|
|
|
#include <stdio.h>
|
|
|
|
/*Array used to generate ALGORITHMIC_CONVERTERS_HASHTABLE
|
|
*should ALWAYS BE EMPTY STRING TERMINATED.
|
|
*/
|
|
static const char *algorithmicConverterNames[] = {
|
|
"LATIN_1",
|
|
"UTF8",
|
|
"UTF16_BigEndian",
|
|
"UTF16_LittleEndian",
|
|
"UTF16_PlatformEndian",
|
|
"UTF16_OppositeEndian",
|
|
"ISO_2022",
|
|
"JIS",
|
|
"EUC",
|
|
"GB",
|
|
""
|
|
};
|
|
|
|
/*Takes an alias name gets an actual converter file name
|
|
*goes to disk and opens it.
|
|
*allocates the memory and returns a new UConverter object
|
|
*/
|
|
static UConverter *createConverterFromFile (const char *converterName, UErrorCode * err);
|
|
static UConverter *createConverterFromAlgorithmicType (const char *realName, UErrorCode * err);
|
|
|
|
/*Given a file returns a newly allocated CompactByteArray based on the a serialized one */
|
|
static CompactByteArray *createCompactByteArrayFromFile (FileStream * infile, UErrorCode * err);
|
|
|
|
/*Given a file returns a newly allocated CompactShortArray based on the a serialized one */
|
|
static CompactShortArray *createCompactShortArrayFromFile (FileStream * infile, UErrorCode * err);
|
|
|
|
/*Currently we have function to take us from a codepage name to
|
|
*a platform type and a codepage number
|
|
*assuming the following
|
|
*codepage name = $PLATFORM-#CODEPAGE
|
|
*e.g. ibm-949 = platform type = UCNV_IBM and codepage # = 949
|
|
*the functions below implement that
|
|
*/
|
|
static UConverterPlatform getPlatformFromName (char *name);
|
|
static int32_t getCodepageNumberFromName (char *name);
|
|
|
|
static UConverterType getAlgorithmicTypeFromName (const char *realName);
|
|
|
|
|
|
/*these functions initialize the lightweight mutable part of the
|
|
*object to correct values based on the sharedData defaults.
|
|
*/
|
|
static void initializeDataConverter (UConverter * myConverter);
|
|
static void initializeAlgorithmicConverter (UConverter * myConverter);
|
|
/**
|
|
*hash function for UConverterSharedData
|
|
*/
|
|
|
|
static int32_t uhash_hashSharedData (void *sharedData);
|
|
|
|
/**
|
|
* Un flatten shared data from a UDATA..
|
|
*/
|
|
U_CAPI UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(const UConverterSharedData *data, UErrorCode *status);
|
|
|
|
|
|
/*initializes some global variables */
|
|
UHashtable *SHARED_DATA_HASHTABLE = NULL;
|
|
UHashtable *ALGORITHMIC_CONVERTERS_HASHTABLE = NULL;
|
|
|
|
/*Returns uppercased string */
|
|
char *
|
|
strtoupper (char *name)
|
|
{
|
|
int32_t i = 0;
|
|
|
|
while (name[i] = uprv_toupper (name[i]))
|
|
i++;
|
|
|
|
return name;
|
|
}
|
|
|
|
/* Returns true in c is a in set 'setOfChars', false otherwise
|
|
*/
|
|
bool_t
|
|
isInSet (char c, const char *setOfChars)
|
|
{
|
|
uint8_t i = 0;
|
|
|
|
while (setOfChars[i] != '\0')
|
|
{
|
|
if (c == setOfChars[i++])
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/* Returns pointer to the next non-whitespace (or non-separator)
|
|
*/
|
|
int32_t
|
|
nextTokenOffset (const char *line, const char *separators)
|
|
{
|
|
int32_t i = 0;
|
|
|
|
while (line[i] && isInSet (line[i], separators))
|
|
i++;
|
|
|
|
return i;
|
|
}
|
|
|
|
/* Returns pointer to the next token based on the set of separators
|
|
*/
|
|
char *
|
|
getToken (char *token, char *line, const char *separators)
|
|
{
|
|
int32_t i = nextTokenOffset (line, separators);
|
|
int8_t j = 0;
|
|
|
|
while (line[i] && (!isInSet (line[i], separators)))
|
|
token[j++] = line[i++];
|
|
token[j] = '\0';
|
|
|
|
return line + i;
|
|
}
|
|
|
|
int32_t uhash_hashIString(const void* name)
|
|
{
|
|
char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
|
|
uprv_strcpy(myName, (char*)name);
|
|
strtoupper(myName);
|
|
|
|
return uhash_hashString(myName);
|
|
}
|
|
|
|
CompactShortArray* createCompactShortArrayFromFile (FileStream * infile, UErrorCode * err)
|
|
{
|
|
int32_t i = 0;
|
|
int16_t *myShortArray = NULL;
|
|
uint16_t *myIndexArray = NULL;
|
|
int32_t myValuesCount = 0;
|
|
int32_t myIndexCount = 0;
|
|
int32_t myBlockShift = 0;
|
|
|
|
if (U_FAILURE (*err))
|
|
return NULL;
|
|
|
|
/*reads in the lengths of the 2 serialized array */
|
|
T_FileStream_read (infile, &myValuesCount, sizeof (int32_t));
|
|
T_FileStream_read (infile, &myIndexCount, sizeof (int32_t));
|
|
T_FileStream_read (infile, &myBlockShift, sizeof (int32_t));
|
|
|
|
if (myValuesCount < 0)
|
|
{
|
|
*err = U_INVALID_TABLE_FILE;
|
|
return NULL;
|
|
}
|
|
myShortArray = (int16_t *) uprv_malloc (myValuesCount * sizeof (int16_t));
|
|
if (myShortArray == NULL)
|
|
{
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
/*reads in the first array */
|
|
T_FileStream_read (infile, myShortArray, myValuesCount * sizeof (int16_t));
|
|
|
|
if (myIndexCount < 0)
|
|
{
|
|
uprv_free (myShortArray);
|
|
*err = U_INVALID_TABLE_FILE;
|
|
return NULL;
|
|
}
|
|
|
|
myIndexArray = (uint16_t *) uprv_malloc (myIndexCount * sizeof (uint16_t));
|
|
if (myIndexArray == NULL)
|
|
{
|
|
uprv_free (myShortArray);
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
/*reads in the second array */
|
|
T_FileStream_read (infile, myIndexArray, myIndexCount * sizeof (uint16_t));
|
|
|
|
/*create a compact array from the data just read
|
|
*that adopts our newly created arrays
|
|
*/
|
|
return ucmp16_openAdoptWithBlockShift (myIndexArray, myShortArray, myValuesCount, 0, myBlockShift);
|
|
}
|
|
|
|
CompactByteArray* createCompactByteArrayFromFile (FileStream * infile,
|
|
UErrorCode * err)
|
|
{
|
|
int32_t i = 0;
|
|
int8_t *myByteArray = NULL;
|
|
uint16_t *myIndexArray = NULL;
|
|
int32_t myValuesCount = 0;
|
|
int32_t myIndexCount = 0;
|
|
|
|
if (U_FAILURE (*err))
|
|
return NULL;
|
|
|
|
/*reads in the lengths of the 2 serialized array */
|
|
T_FileStream_read (infile, &myValuesCount, sizeof (int32_t));
|
|
T_FileStream_read (infile, &myIndexCount, sizeof (int32_t));
|
|
|
|
if (myValuesCount < 0)
|
|
{
|
|
*err = U_INVALID_TABLE_FILE;
|
|
return NULL;
|
|
}
|
|
myByteArray = (int8_t *) uprv_malloc (myValuesCount * sizeof (int8_t));
|
|
if (myByteArray == NULL)
|
|
{
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
/*reads in the first array */
|
|
T_FileStream_read (infile, myByteArray, myValuesCount * sizeof (int8_t));
|
|
|
|
if (myIndexCount < 0)
|
|
{
|
|
uprv_free (myByteArray);
|
|
*err = U_INVALID_TABLE_FILE;
|
|
return NULL;
|
|
}
|
|
myIndexArray = (uint16_t *) uprv_malloc (myIndexCount * sizeof (uint16_t));
|
|
if (myIndexArray == NULL)
|
|
{
|
|
uprv_free (myByteArray);
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
/*reads in the second array */
|
|
T_FileStream_read (infile, myIndexArray, myIndexCount * sizeof (uint16_t));
|
|
|
|
/*create a compact array from the data just read
|
|
*that adopts our newly created arrays
|
|
*/
|
|
return ucmp8_openAdopt (myIndexArray, myByteArray, myValuesCount);
|
|
}
|
|
|
|
|
|
static bool_t
|
|
isCnvAcceptable(void *context,
|
|
const char *type, const char *name,
|
|
UDataInfo *pInfo) {
|
|
return
|
|
pInfo->size>=20 &&
|
|
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
|
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
|
pInfo->sizeofUChar==U_SIZEOF_UCHAR &&
|
|
pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */
|
|
pInfo->dataFormat[1]==0x6e &&
|
|
pInfo->dataFormat[2]==0x76 &&
|
|
pInfo->dataFormat[3]==0x74 &&
|
|
pInfo->formatVersion[0]==2;
|
|
}
|
|
|
|
#define DATA_TYPE "cnv"
|
|
|
|
UConverter* createConverterFromFile (const char *fileName, UErrorCode * err)
|
|
{
|
|
int32_t i = 0;
|
|
const int8_t *myByteArray = NULL;
|
|
const uint16_t *myIndexArray = NULL;
|
|
int32_t myValuesCount = 0;
|
|
int32_t myIndexCount = 0;
|
|
UConverter *myConverter = NULL;
|
|
int8_t errorLevel = 0;
|
|
|
|
UDataMemory *data;
|
|
|
|
if (err == NULL || U_FAILURE (*err)) {
|
|
return NULL;
|
|
}
|
|
|
|
|
|
data = udata_openChoice(NULL, DATA_TYPE, fileName, isCnvAcceptable, NULL, err);
|
|
if(U_FAILURE(*err))
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
myConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
|
if (myConverter == NULL)
|
|
{
|
|
udata_close(data);
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
myConverter->sharedData =
|
|
(UConverterSharedData *) udata_getMemory(data);
|
|
|
|
if (myConverter->sharedData == NULL)
|
|
{
|
|
udata_close(data);
|
|
uprv_free (myConverter);
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
/* clone it. OK to drop the original sharedData */
|
|
myConverter->sharedData = ucnv_data_unFlattenClone(myConverter->sharedData, err);
|
|
|
|
myConverter->sharedData->dataMemory = (void*)data; /* for future use */
|
|
|
|
|
|
if(U_FAILURE(*err))
|
|
{
|
|
udata_close(data);
|
|
uprv_free (myConverter);
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
if (U_SUCCESS (*err))
|
|
{
|
|
initializeDataConverter (myConverter);
|
|
}
|
|
|
|
return myConverter;
|
|
}
|
|
|
|
|
|
void
|
|
copyPlatformString (char *platformString, UConverterPlatform pltfrm)
|
|
{
|
|
switch (pltfrm)
|
|
{
|
|
case UCNV_IBM:
|
|
{
|
|
uprv_strcpy (platformString, "ibm");
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
uprv_strcpy (platformString, "");
|
|
break;
|
|
}
|
|
};
|
|
|
|
return;
|
|
}
|
|
|
|
/*returns a converter type from a string
|
|
*/
|
|
UConverterType
|
|
getAlgorithmicTypeFromName (const char *realName)
|
|
{
|
|
if (uprv_strcmp (realName, "UTF8") == 0)
|
|
return UCNV_UTF8;
|
|
else if (uprv_strcmp (realName, "UTF16_BigEndian") == 0)
|
|
return UCNV_UTF16_BigEndian;
|
|
else if (uprv_strcmp (realName, "UTF16_LittleEndian") == 0)
|
|
return UCNV_UTF16_LittleEndian;
|
|
else if (uprv_strcmp (realName, "LATIN_1") == 0)
|
|
return UCNV_LATIN_1;
|
|
else if (uprv_strcmp (realName, "JIS") == 0)
|
|
return UCNV_JIS;
|
|
else if (uprv_strcmp (realName, "EUC") == 0)
|
|
return UCNV_EUC;
|
|
else if (uprv_strcmp (realName, "GB") == 0)
|
|
return UCNV_GB;
|
|
else if (uprv_strcmp (realName, "ISO_2022") == 0)
|
|
return UCNV_ISO_2022;
|
|
else if (uprv_strcmp (realName, "UTF16_PlatformEndian") == 0)
|
|
# if U_IS_BIG_ENDIAN
|
|
return UCNV_UTF16_BigEndian;
|
|
# else
|
|
return UCNV_UTF16_LittleEndian;
|
|
# endif
|
|
else if (uprv_strcmp (realName, "UTF16_OppositeEndian") == 0)
|
|
# if U_IS_BIG_ENDIAN
|
|
return UCNV_UTF16_LittleEndian;
|
|
# else
|
|
return UCNV_UTF16_BigEndian;
|
|
# endif
|
|
else
|
|
return UCNV_UNSUPPORTED_CONVERTER;
|
|
}
|
|
|
|
|
|
UConverterPlatform
|
|
getPlatformFromName (char *name)
|
|
{
|
|
char myPlatform[10];
|
|
char mySeparators[2] = {'-', '\0'};
|
|
|
|
getToken (myPlatform, name, mySeparators);
|
|
strtoupper (myPlatform);
|
|
|
|
if (uprv_strcmp (myPlatform, "IBM") == 0)
|
|
return UCNV_IBM;
|
|
else
|
|
return UCNV_UNKNOWN;
|
|
}
|
|
|
|
int32_t
|
|
getCodepageNumberFromName (char *name)
|
|
{
|
|
char myNumber[10];
|
|
char mySeparators[2] = {'-', '\0'};
|
|
char *line = NULL;
|
|
|
|
line = getToken (myNumber, name, mySeparators);
|
|
getToken (myNumber, line, mySeparators);
|
|
|
|
return T_CString_stringToInteger (myNumber, 10);
|
|
}
|
|
|
|
int32_t uhash_hashSharedData (void *sharedData)
|
|
{
|
|
return uhash_hashIString(((UConverterSharedData *) sharedData)->name);
|
|
}
|
|
|
|
/*Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
|
|
void shareConverterData (UConverterSharedData * data)
|
|
{
|
|
UErrorCode err = U_ZERO_ERROR;
|
|
/*Lazy evaluates the Hashtable itself */
|
|
|
|
if (SHARED_DATA_HASHTABLE == NULL)
|
|
{
|
|
UHashtable* myHT = uhash_openSize ((UHashFunction) uhash_hashSharedData,
|
|
ucnv_io_countAvailableAliases(&err),
|
|
&err);
|
|
if (U_FAILURE (err)) return;
|
|
umtx_lock (NULL);
|
|
if (SHARED_DATA_HASHTABLE == NULL) SHARED_DATA_HASHTABLE = myHT;
|
|
else uhash_close(myHT);
|
|
umtx_unlock (NULL);
|
|
|
|
}
|
|
umtx_lock (NULL);
|
|
uhash_put(SHARED_DATA_HASHTABLE,
|
|
data,
|
|
&err);
|
|
umtx_unlock (NULL);
|
|
|
|
return;
|
|
}
|
|
|
|
UConverterSharedData *getSharedConverterData (const char *name)
|
|
{
|
|
/*special case when no Table has yet been created we return NULL */
|
|
if (SHARED_DATA_HASHTABLE == NULL) return NULL;
|
|
else
|
|
/* return (UConverterSharedData *) uhash_get (SHARED_DATA_HASHTABLE, uhash_hashString (name));*/
|
|
{
|
|
UConverterSharedData *i = (UConverterSharedData*)uhash_get (SHARED_DATA_HASHTABLE, uhash_hashIString (name));
|
|
return i;
|
|
}
|
|
}
|
|
|
|
/*frees the string of memory blocks associates with a sharedConverter
|
|
*if and only if the referenceCounter == 0
|
|
*/
|
|
bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
|
|
{
|
|
if (deadSharedData->referenceCounter > 0)
|
|
return FALSE;
|
|
|
|
/* Note: if we have a dataMemory, then that means that all ucmp's came
|
|
from udata, and their tables will go away at the end
|
|
of this function. So, we need to simply dealloc the UCMP8's themselves.
|
|
We're guaranteed that they do not allocate any further memory.
|
|
|
|
When we have an API to simply 'init' a ucmp8, then no action at all will
|
|
need to happen. --srl
|
|
*/
|
|
|
|
switch (deadSharedData->conversionType)
|
|
{
|
|
case UCNV_SBCS:
|
|
{
|
|
ucmp8_close (deadSharedData->table->sbcs.fromUnicode);
|
|
uprv_free (deadSharedData->table);
|
|
};
|
|
break;
|
|
|
|
case UCNV_MBCS:
|
|
{
|
|
ucmp16_close (deadSharedData->table->mbcs.fromUnicode);
|
|
ucmp16_close (deadSharedData->table->mbcs.toUnicode);
|
|
uprv_free (deadSharedData->table);
|
|
};
|
|
break;
|
|
|
|
case UCNV_DBCS:
|
|
case UCNV_EBCDIC_STATEFUL:
|
|
{
|
|
ucmp16_close (deadSharedData->table->dbcs.fromUnicode);
|
|
ucmp16_close (deadSharedData->table->dbcs.toUnicode);
|
|
uprv_free (deadSharedData->table);
|
|
};
|
|
break;
|
|
|
|
default: ; /* semicolon makes MSVC happy */
|
|
};
|
|
|
|
if(deadSharedData->dataMemory != NULL)
|
|
{
|
|
UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory;
|
|
udata_close(data);
|
|
}
|
|
|
|
uprv_free (deadSharedData);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
bool_t isDataBasedConverter (const char *name)
|
|
{
|
|
int32_t i = 0;
|
|
bool_t result = FALSE;
|
|
UErrorCode err = U_ZERO_ERROR;
|
|
|
|
/*Lazy evaluates the hashtable */
|
|
if (ALGORITHMIC_CONVERTERS_HASHTABLE == NULL)
|
|
{
|
|
UHashtable* myHT;
|
|
|
|
{
|
|
myHT = uhash_open ((UHashFunction)uhash_hashIString, &err);
|
|
|
|
if (U_FAILURE (err)) return FALSE;
|
|
while (algorithmicConverterNames[i][0] != '\0')
|
|
{
|
|
/*Stores in the hashtable a pointer to the statically init'ed array containing
|
|
*the names
|
|
*/
|
|
|
|
uhash_put (myHT,
|
|
(void *) algorithmicConverterNames[i],
|
|
&err);
|
|
i++; /*Some Compilers (Solaris WSpro and MSVC-Release Mode
|
|
*don't differentiate between i++ and ++i
|
|
*so we have to increment in a line by itself
|
|
*/
|
|
}
|
|
}
|
|
|
|
umtx_lock (NULL);
|
|
if (ALGORITHMIC_CONVERTERS_HASHTABLE == NULL) ALGORITHMIC_CONVERTERS_HASHTABLE = myHT;
|
|
else uhash_close(myHT);
|
|
umtx_unlock (NULL);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (uhash_get (ALGORITHMIC_CONVERTERS_HASHTABLE,
|
|
uhash_hashIString (name)) == NULL)
|
|
{
|
|
result = TRUE;
|
|
}
|
|
|
|
|
|
return result;
|
|
}
|
|
/*Logic determines if the converter is Algorithmic AND/OR cached
|
|
*depending on that:
|
|
* -we either go to get data from disk and cache it (Data=TRUE, Cached=False)
|
|
* -Get it from a Hashtable (Data=X, Cached=TRUE)
|
|
* -Call dataConverter initializer (Data=TRUE, Cached=TRUE)
|
|
* -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE)
|
|
*/
|
|
UConverter *
|
|
createConverter (const char *converterName, UErrorCode * err)
|
|
{
|
|
const char *realName;
|
|
UConverter *myUConverter = NULL;
|
|
UConverterSharedData *mySharedConverterData = NULL;
|
|
UErrorCode internalErrorCode = U_ZERO_ERROR;
|
|
bool_t isDefaultConverter;
|
|
|
|
if (U_FAILURE (*err))
|
|
return NULL;
|
|
|
|
/* In case "name" is NULL we want to open the default converter. */
|
|
if (converterName == NULL) {
|
|
converterName = ucnv_io_getDefaultConverterName();
|
|
if (converterName == NULL) {
|
|
*err = U_MISSING_RESOURCE_ERROR;
|
|
return NULL;
|
|
} else {
|
|
isDefaultConverter = TRUE;
|
|
}
|
|
} else {
|
|
isDefaultConverter = FALSE;
|
|
}
|
|
|
|
if (*converterName == 0) {
|
|
/* Use the PlatformInvariant algorithmic converter. */
|
|
realName = "PlatformInvariant";
|
|
} else if(isDefaultConverter) {
|
|
/* the default converter name is already canonical */
|
|
realName = converterName;
|
|
} else {
|
|
/* get the canonical converter name */
|
|
realName = ucnv_io_getConverterName(converterName, &internalErrorCode);
|
|
if (U_FAILURE(internalErrorCode) || realName == NULL) {
|
|
/*
|
|
* set the input name in case the converter was added
|
|
* without updating the alias table, or when there is no alias table
|
|
*/
|
|
realName = converterName;
|
|
}
|
|
}
|
|
|
|
if (isDataBasedConverter (realName))
|
|
{
|
|
mySharedConverterData = getSharedConverterData (realName);
|
|
|
|
if (mySharedConverterData == NULL)
|
|
{
|
|
/*Not cached, we need to stream it in from file */
|
|
myUConverter = createConverterFromFile (realName, err);
|
|
|
|
if (U_FAILURE (*err) || (myUConverter == NULL))
|
|
{
|
|
return myUConverter;
|
|
}
|
|
else
|
|
{
|
|
/*shared it with other library clients */
|
|
|
|
|
|
shareConverterData (myUConverter->sharedData);
|
|
return myUConverter;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*Is already cached, point to an existing one */
|
|
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
|
if (myUConverter == NULL)
|
|
{
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
/*update the reference counter: one more client */
|
|
umtx_lock (NULL);
|
|
mySharedConverterData->referenceCounter++;
|
|
umtx_unlock (NULL);
|
|
|
|
myUConverter->sharedData = mySharedConverterData;
|
|
initializeDataConverter (myUConverter);
|
|
|
|
return myUConverter;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*with have an algorithmic converter */
|
|
mySharedConverterData = getSharedConverterData (realName);
|
|
|
|
/*Non cached */
|
|
if (mySharedConverterData == NULL)
|
|
{
|
|
myUConverter = createConverterFromAlgorithmicType (realName, err);
|
|
if (U_FAILURE (*err) || (myUConverter == NULL))
|
|
{
|
|
uprv_free (myUConverter);
|
|
return NULL;
|
|
}
|
|
else
|
|
{
|
|
/* put the shared object in shared table */
|
|
shareConverterData (myUConverter->sharedData);
|
|
return myUConverter;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
|
if (myUConverter == NULL)
|
|
{
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
/*Increase the reference counter */
|
|
umtx_lock (NULL);
|
|
mySharedConverterData->referenceCounter++;
|
|
umtx_unlock (NULL);
|
|
|
|
/*initializes the converter */
|
|
myUConverter->sharedData = mySharedConverterData;
|
|
initializeAlgorithmicConverter (myUConverter);
|
|
return myUConverter;
|
|
}
|
|
|
|
return myUConverter;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*Initializes the mutable lightweight portion of the object
|
|
*By copying data from UConverter->sharedData->defaultConverter
|
|
*/
|
|
void initializeDataConverter (UConverter * myUConverter)
|
|
{
|
|
|
|
myUConverter->mode = UCNV_SI;
|
|
myUConverter->UCharErrorBufferLength = 0;
|
|
myUConverter->charErrorBufferLength = 0;
|
|
myUConverter->subCharLen = myUConverter->sharedData->defaultConverterValues.subCharLen;
|
|
uprv_memcpy (myUConverter->subChar,
|
|
myUConverter->sharedData->defaultConverterValues.subChar,
|
|
myUConverter->subCharLen);
|
|
myUConverter->toUnicodeStatus = 0x00;
|
|
myUConverter->fromUnicodeStatus = 0x00;
|
|
myUConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0x00;
|
|
|
|
myUConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
|
|
myUConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
|
|
myUConverter->extraInfo = NULL;
|
|
|
|
return;
|
|
}
|
|
|
|
/* This function initializes algorithmic converters
|
|
* based on there type
|
|
*/
|
|
void
|
|
initializeAlgorithmicConverter (UConverter * myConverter)
|
|
{
|
|
char UTF8_subChar[] = {(char) 0xFF, (char) 0xFF, (char) 0xFF};
|
|
char UTF16BE_subChar[] = {(char) 0xFF, (char) 0xFD};
|
|
char UTF16LE_subChar[] = {(char) 0xFD, (char) 0xFF};
|
|
char EUC_subChar[] = {(char) 0xAF, (char) 0xFE};
|
|
char GB_subChar[] = {(char) 0xFF, (char) 0xFF};
|
|
char JIS_subChar[] = {(char) 0xFF, (char) 0xFF};
|
|
char LATIN1_subChar = 0x1A;
|
|
|
|
|
|
|
|
myConverter->mode = UCNV_SI;
|
|
myConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
|
|
myConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
|
|
myConverter->charErrorBufferLength = 0;
|
|
myConverter->UCharErrorBufferLength = 0;
|
|
|
|
myConverter->extraInfo = NULL;
|
|
|
|
|
|
switch (myConverter->sharedData->conversionType)
|
|
{
|
|
case UCNV_UTF8:
|
|
{
|
|
myConverter->sharedData->minBytesPerChar = 1;
|
|
myConverter->sharedData->maxBytesPerChar = 4;
|
|
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
|
myConverter->sharedData->defaultConverterValues.subCharLen = 3;
|
|
myConverter->subCharLen = 3;
|
|
myConverter->toUnicodeStatus = 0;
|
|
myConverter->fromUnicodeStatus = 0; /* srl */
|
|
myConverter->sharedData->platform = UCNV_IBM;
|
|
myConverter->sharedData->codepage = 1208;
|
|
uprv_strcpy(myConverter->sharedData->name, "UTF8");
|
|
uprv_memcpy (myConverter->subChar, UTF8_subChar, 3);
|
|
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF8_subChar, 3);
|
|
|
|
break;
|
|
}
|
|
case UCNV_LATIN_1:
|
|
{
|
|
myConverter->sharedData->minBytesPerChar = 1;
|
|
myConverter->sharedData->maxBytesPerChar = 1;
|
|
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
|
myConverter->sharedData->defaultConverterValues.subCharLen = 1;
|
|
myConverter->subCharLen = 1;
|
|
myConverter->toUnicodeStatus = 0;
|
|
myConverter->sharedData->platform = UCNV_IBM;
|
|
myConverter->sharedData->codepage = 819;
|
|
uprv_strcpy(myConverter->sharedData->name, "LATIN_1");
|
|
*(myConverter->subChar) = LATIN1_subChar;
|
|
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
|
|
break;
|
|
}
|
|
|
|
case UCNV_UTF16_BigEndian:
|
|
{
|
|
myConverter->sharedData->minBytesPerChar = 2;
|
|
myConverter->sharedData->maxBytesPerChar = 2;
|
|
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
|
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
|
myConverter->subCharLen = 2;
|
|
myConverter->toUnicodeStatus = 0;
|
|
myConverter->fromUnicodeStatus = 0;
|
|
uprv_strcpy(myConverter->sharedData->name, "UTF_16BE");
|
|
myConverter->sharedData->platform = UCNV_IBM;
|
|
myConverter->sharedData->codepage = 1200;
|
|
uprv_memcpy (myConverter->subChar, UTF16BE_subChar, 2);
|
|
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16BE_subChar, 2);
|
|
|
|
break;
|
|
}
|
|
|
|
case UCNV_UTF16_LittleEndian:
|
|
{
|
|
myConverter->sharedData->minBytesPerChar = 2;
|
|
myConverter->sharedData->maxBytesPerChar = 2;
|
|
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
|
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
|
myConverter->subCharLen = 2;
|
|
myConverter->toUnicodeStatus = 0;
|
|
myConverter->fromUnicodeStatus = 0;
|
|
myConverter->sharedData->platform = UCNV_IBM;
|
|
myConverter->sharedData->codepage = 1200;
|
|
uprv_strcpy(myConverter->sharedData->name, "UTF_16LE");
|
|
uprv_memcpy (myConverter->subChar, UTF16LE_subChar, 2);
|
|
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16LE_subChar, 2);
|
|
break;
|
|
}
|
|
case UCNV_EUC:
|
|
{
|
|
myConverter->sharedData->minBytesPerChar = 1;
|
|
myConverter->sharedData->maxBytesPerChar = 2;
|
|
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
|
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
|
myConverter->subCharLen = 2;
|
|
myConverter->toUnicodeStatus = 0;
|
|
uprv_memcpy (myConverter->subChar, EUC_subChar, 2);
|
|
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, EUC_subChar, 2);
|
|
break;
|
|
}
|
|
case UCNV_ISO_2022:
|
|
{
|
|
myConverter->charErrorBuffer[0] = 0x1b;
|
|
myConverter->charErrorBuffer[1] = 0x25;
|
|
myConverter->charErrorBuffer[2] = 0x42;
|
|
myConverter->charErrorBufferLength = 3;
|
|
myConverter->sharedData->minBytesPerChar = 1;
|
|
myConverter->sharedData->maxBytesPerChar = 3;
|
|
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
|
myConverter->sharedData->defaultConverterValues.subCharLen = 1;
|
|
myConverter->subCharLen = 1;
|
|
myConverter->toUnicodeStatus = 0;
|
|
myConverter->fromUnicodeStatus = 0; /* srl */
|
|
myConverter->sharedData->codepage = 2022;
|
|
uprv_strcpy(myConverter->sharedData->name, "ISO_2022");
|
|
*(myConverter->subChar) = LATIN1_subChar;
|
|
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
|
|
myConverter->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
|
|
((UConverterDataISO2022 *) myConverter->extraInfo)->currentConverter = NULL;
|
|
((UConverterDataISO2022 *) myConverter->extraInfo)->escSeq2022Length = 0;
|
|
break;
|
|
}
|
|
case UCNV_GB:
|
|
{
|
|
myConverter->sharedData->minBytesPerChar = 2;
|
|
myConverter->sharedData->maxBytesPerChar = 2;
|
|
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
|
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
|
myConverter->subCharLen = 2;
|
|
myConverter->toUnicodeStatus = 0;
|
|
uprv_memcpy (myConverter->subChar, GB_subChar, 2);
|
|
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, GB_subChar, 2);
|
|
break;
|
|
}
|
|
case UCNV_JIS:
|
|
{
|
|
myConverter->sharedData->minBytesPerChar = 2;
|
|
myConverter->sharedData->maxBytesPerChar = 2;
|
|
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
|
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
|
myConverter->subCharLen = 2;
|
|
myConverter->toUnicodeStatus = 0;
|
|
uprv_memcpy (myConverter->subChar, JIS_subChar, 2);
|
|
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, JIS_subChar, 2);
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
};
|
|
|
|
myConverter->toUnicodeStatus = myConverter->sharedData->defaultConverterValues.toUnicodeStatus;
|
|
}
|
|
|
|
|
|
/*This function creates an algorithmic converter
|
|
*Note That even algorithmic converters are shared
|
|
* (The UConverterSharedData->table == NULL since
|
|
* there are no tables)
|
|
*for uniformity of design and control flow
|
|
*/
|
|
UConverter *
|
|
createConverterFromAlgorithmicType (const char *actualName, UErrorCode * err)
|
|
{
|
|
int32_t i = 0;
|
|
UConverter *myConverter = NULL;
|
|
UConverterSharedData *mySharedData = NULL;
|
|
UConverterType myType = getAlgorithmicTypeFromName (actualName);
|
|
|
|
if (U_FAILURE (*err))
|
|
return NULL;
|
|
|
|
myConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
|
if (myConverter == NULL)
|
|
{
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
myConverter->sharedData = NULL;
|
|
mySharedData = (UConverterSharedData *) uprv_malloc (sizeof (UConverterSharedData));
|
|
if (mySharedData == NULL)
|
|
{
|
|
*err = U_MEMORY_ALLOCATION_ERROR;
|
|
uprv_free (myConverter);
|
|
return NULL;
|
|
}
|
|
mySharedData->structSize = sizeof(UConverterSharedData);
|
|
mySharedData->table = NULL;
|
|
mySharedData->dataMemory = NULL;
|
|
uprv_strcpy (mySharedData->name, actualName);
|
|
/*Initializes the referenceCounter to 1 */
|
|
mySharedData->referenceCounter = 1;
|
|
mySharedData->platform = UCNV_UNKNOWN;
|
|
mySharedData->codepage = 0;
|
|
mySharedData->conversionType = myType;
|
|
myConverter->sharedData = mySharedData;
|
|
|
|
initializeAlgorithmicConverter (myConverter);
|
|
return myConverter;
|
|
}
|
|
|
|
|
|
UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *source, UErrorCode *status)
|
|
{
|
|
const uint8_t *raw, *oldraw;
|
|
UConverterSharedData *data = NULL;
|
|
|
|
if(U_FAILURE(*status))
|
|
return NULL;
|
|
|
|
if(source->structSize != sizeof(UConverterSharedData))
|
|
{
|
|
*status = U_INVALID_TABLE_FORMAT;
|
|
return NULL;
|
|
}
|
|
|
|
data = (UConverterSharedData*) malloc(sizeof(UConverterSharedData));
|
|
raw = (uint8_t*)source;
|
|
uprv_memcpy(data,source,sizeof(UConverterSharedData));
|
|
|
|
raw += data->structSize;
|
|
|
|
/* data->table = (UConverterTable*)raw; */
|
|
|
|
switch (data->conversionType)
|
|
{
|
|
case UCNV_SBCS:
|
|
data->table = malloc(sizeof(UConverterSBCSTable));
|
|
data->table->sbcs.toUnicode = (UChar*)raw;
|
|
raw += sizeof(UChar)*256;
|
|
|
|
data->table->sbcs.fromUnicode = ucmp8_cloneFromData(&raw, status);
|
|
|
|
break;
|
|
|
|
case UCNV_EBCDIC_STATEFUL:
|
|
case UCNV_DBCS:
|
|
data->table = uprv_malloc(sizeof(UConverterDBCSTable));
|
|
|
|
oldraw = raw;
|
|
|
|
data->table->dbcs.toUnicode=ucmp16_cloneFromData(&raw, status);
|
|
|
|
while((raw-oldraw)%4) /* pad to 4 */
|
|
raw++;
|
|
|
|
data->table->dbcs.fromUnicode =ucmp16_cloneFromData(&raw, status);
|
|
|
|
break;
|
|
|
|
case UCNV_MBCS:
|
|
data->table = uprv_malloc(sizeof(UConverterMBCSTable));
|
|
|
|
data->table->mbcs.starters = (bool_t*)raw;
|
|
raw += sizeof(bool_t)*256;
|
|
|
|
oldraw = raw;
|
|
|
|
data->table->mbcs.toUnicode = ucmp16_cloneFromData(&raw, status);
|
|
|
|
while((raw-oldraw)%4) /* pad to 4 */
|
|
raw++;
|
|
|
|
data->table->mbcs.fromUnicode = ucmp16_cloneFromData(&raw, status);
|
|
|
|
break;
|
|
|
|
default:
|
|
*status = U_INVALID_TABLE_FORMAT;
|
|
return NULL;
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|