diff --git a/icu4c/source/common/ucmp16.c b/icu4c/source/common/ucmp16.c index 67364f1807..6011eb5a63 100644 --- a/icu4c/source/common/ucmp16.c +++ b/icu4c/source/common/ucmp16.c @@ -85,6 +85,7 @@ CompactShortArray* ucmp16_open(int16_t defaultValue) this_obj->fAlias = FALSE; this_obj->fIndex = NULL; this_obj->fHashes = NULL; + this_obj->fIAmOwned = FALSE; this_obj->fDefaultValue = defaultValue; this_obj->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t)); @@ -129,6 +130,62 @@ CompactShortArray* ucmp16_open(int16_t defaultValue) return this_obj; } + +void ucmp16_init(CompactShortArray *this_obj, int16_t defaultValue) +{ + int32_t i; + + this_obj->fStructSize = sizeof(CompactShortArray); + this_obj->fCount = UCMP16_kUnicodeCount; + this_obj->fCompact = FALSE; + this_obj->fBogus = FALSE; + this_obj->fArray = NULL; + this_obj->fAlias = FALSE; + this_obj->fIndex = NULL; + this_obj->fHashes = NULL; + this_obj->fIAmOwned = TRUE; + this_obj->fDefaultValue = defaultValue; + + this_obj->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t)); + if (this_obj->fArray == NULL) + { + this_obj->fBogus = TRUE; + return; + } + + this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(uint16_t)); + if (this_obj->fIndex == NULL) + { + uprv_free(this_obj->fArray); + this_obj->fArray = NULL; + + this_obj->fBogus = TRUE; + return; + } + + this_obj->kBlockShift = UCMP16_kBlockShift; + this_obj->kBlockMask = UCMP16_kBlockMask; + for (i = 0; i < UCMP16_kUnicodeCount; i += 1) + { + this_obj->fArray[i] = defaultValue; + } + + this_obj->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t)); + if (this_obj->fHashes == NULL) + { + uprv_free(this_obj->fArray); + uprv_free(this_obj->fIndex); + this_obj->fBogus = TRUE; + return; + } + + for (i = 0; i < UCMP16_kIndexCount; i += 1) + { + this_obj->fIndex[i] = (uint16_t)(i << UCMP16_kBlockShift); + this_obj->fHashes[i] = 0; + } +} + CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray, int16_t *newValues, int32_t count, @@ -147,6 +204,8 @@ CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray, this_obj->kBlockShift = UCMP16_kBlockShift; this_obj->kBlockMask = UCMP16_kBlockMask; this_obj->fAlias = FALSE; + this_obj->fIAmOwned = FALSE; + return this_obj; } @@ -188,6 +247,8 @@ CompactShortArray* ucmp16_openAlias(uint16_t *indexArray, this_obj->kBlockShift = UCMP16_kBlockShift; this_obj->kBlockMask = UCMP16_kBlockMask; this_obj->fAlias = TRUE; + this_obj->fIAmOwned = FALSE; + return this_obj; } @@ -207,7 +268,10 @@ void ucmp16_close(CompactShortArray* this_obj) if(this_obj->fHashes != NULL) { uprv_free(this_obj->fHashes); } - uprv_free(this_obj); + if(!this_obj->fIAmOwned) + { + uprv_free(this_obj); + } } } @@ -432,41 +496,51 @@ const uint16_t* ucmp16_getIndex(const CompactShortArray* this_obj) possible between the ucmpX_ family. Check lines marked 'SIZE'. */ -U_CAPI CompactShortArray * U_EXPORT2 ucmp16_cloneFromData(const uint8_t **source, UErrorCode *status) +U_CAPI void U_EXPORT2 ucmp16_initFromData(CompactShortArray *this_obj, const uint8_t **source, UErrorCode *status) { - CompactShortArray *array; - const CompactShortArray *oldArray; + uint32_t i; + const uint8_t *oldSource = *source; if(U_FAILURE(*status)) - return NULL; + return; - oldArray= (const CompactShortArray*)*source; - - if(oldArray->fStructSize != sizeof(*oldArray)) - { - *status = U_INVALID_TABLE_FORMAT; /* ? */ - return NULL; - } - array = (CompactShortArray*)malloc(sizeof(*array)); + this_obj->fArray = NULL; + this_obj->fIndex = NULL; + this_obj->fBogus = FALSE; + this_obj->fStructSize = sizeof(CompactShortArray); + this_obj->fCompact = TRUE; + this_obj->fAlias = TRUE; + this_obj->fIAmOwned = TRUE; + this_obj->fHashes = NULL; + this_obj->fDefaultValue = 0x0000; /* not used */ - uprv_memcpy(array,*source, sizeof(*array)); + i = * ((const uint32_t*) *source); + (*source) += 4; - *source += array->fStructSize; - - array->fArray = (int16_t*)*source; /* SIZE */ - *source += (sizeof(int16_t)*array->fCount); /* SIZE */ - - array->fIndex = (uint16_t*)*source; - *source += (sizeof(uint16_t)*UCMP16_kIndexCount); /* SIZE*/ + if(i != ICU_UCMP16_VERSION) + { + *status = U_INVALID_FORMAT_ERROR; + return; + } - array->fAlias = TRUE; - - /* eat up padding */ - while((*source-((uint8_t*)oldArray))%4) - (*source)++; + this_obj->fCount = * ((const uint32_t*)*source); + (*source) += 4; - return array; + this_obj->kBlockShift = * ((const uint32_t*)*source); + (*source) += 4; + this_obj->kBlockMask = * ((const uint32_t*)*source); + (*source) += 4; + + this_obj->fIndex = (uint16_t*) *source; + (*source) += sizeof(this_obj->fIndex[0])*UCMP16_kIndexCount; + + this_obj->fArray = (uint16_t*) *source; + (*source) += sizeof(this_obj->fArray[0])*this_obj->fCount; + + /* eat up padding */ + while((*source-(oldSource))%4) + (*source)++; } diff --git a/icu4c/source/common/ucmp16.h b/icu4c/source/common/ucmp16.h index 3c08bc788c..913b7b7706 100644 --- a/icu4c/source/common/ucmp16.h +++ b/icu4c/source/common/ucmp16.h @@ -21,6 +21,10 @@ #include "unicode/utypes.h" +/* 32-bits. + Bump this whenever the internal structure changes. +*/ +#define ICU_UCMP16_VERSION 0x01270000 /** @@ -66,7 +70,7 @@ * @see CompactIntArray * @see CompactCharArray * @see CompactStringArray - * @version $Revision: 1.8 $ 8/25/98 + * @version $Revision: 1.9 $ 8/25/98 * @author Helena Shih */ @@ -82,6 +86,7 @@ typedef struct CompactShortArray { bool_t fAlias; int32_t kBlockShift; int32_t kBlockMask; + bool_t fIAmOwned; /* don't free CSA on close */ } CompactShortArray; @@ -93,6 +98,7 @@ U_CAPI int32_t U_EXPORT2 ucmp16_getkBlockCount(void); * @param defaultValue the default value for all characters not explicitly in the array */ U_CAPI CompactShortArray* U_EXPORT2 ucmp16_open(int16_t defaultValue); +U_CAPI void U_EXPORT2 ucmp16_init(CompactShortArray* array, int16_t defaultValue); /** * Construct a CompactShortArray from a pre-computed index and values array. The values @@ -203,7 +209,7 @@ U_CAPI const uint16_t* U_EXPORT2 ucmp16_getIndex(const CompactShortArray* array /** INTERNAL USE ONLY **/ -U_CAPI CompactShortArray * U_EXPORT2 ucmp16_cloneFromData(const uint8_t **source, UErrorCode *status); +U_CAPI void U_EXPORT2 ucmp16_initFromData(CompactShortArray* array, const uint8_t **source, UErrorCode *status); #endif diff --git a/icu4c/source/common/ucmp8.c b/icu4c/source/common/ucmp8.c index 68a7818118..54eb169b15 100644 --- a/icu4c/source/common/ucmp8.c +++ b/icu4c/source/common/ucmp8.c @@ -31,6 +31,66 @@ int32_t ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;} int32_t ucmp8_getkBlockCount() { return UCMP8_kBlockCount;} /* debug flags*/ /*=======================================================*/ +void ucmp8_init(CompactByteArray* array, int8_t defaultValue) +{ +/* set up the index array and the data array. + * the index array always points into particular parts of the data array + * it is initially set up to point at regular block boundaries + * The following example uses blocks of 4 for simplicity + * Example: Expanded + * INDEX# 0 1 2 3 4 + * INDEX 0 4 8 12 16 ... + * ARRAY abcdeababcedzyabcdea... + * | | | | | |... + * whenever you set an element in the array, it unpacks to this state + * After compression, the index will point to various places in the data array + * wherever there is a runs of the same elements as in the original + * Example: Compressed + * INDEX# 0 1 2 3 4 + * INDEX 0 4 1 8 2 ... + * ARRAY abcdeabazyabc... + * If you look at the example, index# 2 in the expanded version points + * to data position number 8, which has elements "bced". In the compressed + * version, index# 2 points to data position 1, which also has "bced" + */ + CompactByteArray* this_obj = array; + int32_t i; + + if (this_obj == NULL) return; + + this_obj->fStructSize = sizeof(CompactByteArray); + this_obj->fArray = NULL; + this_obj->fIndex = NULL; + this_obj->fCount = UCMP8_kUnicodeCount; + this_obj->fCompact = FALSE; + this_obj->fBogus = FALSE; + this_obj->fAlias = FALSE; + this_obj->fIAmOwned = TRUE; + + + this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount); + if (!this_obj->fArray) + { + this_obj->fBogus = TRUE; + return; + } + this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount); + if (!this_obj->fIndex) + { + uprv_free(this_obj->fArray); + this_obj->fArray = NULL; + this_obj->fBogus = TRUE; + return; + } + for (i = 0; i < UCMP8_kUnicodeCount; ++i) + { + this_obj->fArray[i] = defaultValue; + } + for (i = 0; i < UCMP8_kIndexCount; ++i) + { + this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift); + } +} CompactByteArray* ucmp8_open(int8_t defaultValue) { @@ -66,6 +126,8 @@ CompactByteArray* ucmp8_open(int8_t defaultValue) this_obj->fCompact = FALSE; this_obj->fBogus = FALSE; this_obj->fAlias = FALSE; + this_obj->fIAmOwned = FALSE; + this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount); @@ -111,6 +173,8 @@ CompactByteArray* ucmp8_openAdopt(uint16_t *indexArray, this_obj->fIndex = indexArray; this_obj->fCompact = (count < UCMP8_kUnicodeCount) ? TRUE : FALSE; this_obj->fAlias = FALSE; + this_obj->fIAmOwned = FALSE; + return this_obj; } @@ -131,6 +195,8 @@ CompactByteArray* ucmp8_openAlias(uint16_t *indexArray, this_obj->fIndex = indexArray; this_obj->fCompact = (count < UCMP8_kUnicodeCount) ? TRUE : FALSE; this_obj->fAlias = TRUE; + this_obj->fIAmOwned = FALSE; + return this_obj; } @@ -147,7 +213,10 @@ void ucmp8_close(CompactByteArray* this_obj) uprv_free(this_obj->fIndex); } } - uprv_free(this_obj); + if(!this_obj->fIAmOwned) /* Called if 'init' was called instead of 'open'. */ + { + uprv_free(this_obj); + } } } @@ -191,6 +260,8 @@ void ucmp8_expand(CompactByteArray* this_obj) this_obj->fArray = tempArray; this_obj->fCompact = FALSE; this_obj->fAlias = FALSE; + this_obj->fIAmOwned = FALSE; + } } @@ -390,45 +461,41 @@ ucmp8_compact(CompactByteArray* this_obj, possible between the ucmpX_ family. */ -U_CAPI CompactByteArray * U_EXPORT2 ucmp8_cloneFromData(const uint8_t **source, UErrorCode *status) +U_CAPI void U_EXPORT2 ucmp8_initFromData(CompactByteArray *this_obj, const uint8_t **source, UErrorCode *status) { - CompactByteArray *array; - const CompactByteArray *oldArray; - - if(U_FAILURE(*status)) - return NULL; - - oldArray= (const CompactByteArray*)*source; - - if(oldArray->fStructSize != sizeof(*oldArray)) - { - *status = U_INVALID_TABLE_FORMAT; /* ? */ - return NULL; - } - array = (CompactByteArray*)malloc(sizeof(*array)); - - uprv_memcpy(array,*source, sizeof(*array)); + uint32_t i; + const uint8_t *oldSource = *source; - array->fAlias = TRUE; - - *source += array->fStructSize; + if(U_FAILURE(*status)) + return; - array->fArray = (int8_t*)*source; - *source += (sizeof(int8_t)*array->fCount); + this_obj->fArray = NULL; + this_obj->fIndex = NULL; + this_obj->fBogus = FALSE; + this_obj->fStructSize = sizeof(CompactByteArray); + this_obj->fCompact = TRUE; + this_obj->fAlias = TRUE; + this_obj->fIAmOwned = TRUE; + + i = * ((const uint32_t*) *source); + (*source) += 4; - if(((*source)-((const uint8_t*)oldArray)) & 1 ) - { - (*source)++; - } + if(i != ICU_UCMP8_VERSION) + { + *status = U_INVALID_FORMAT_ERROR; + return; + } + + this_obj->fCount = * ((const uint32_t*)*source); + (*source) += 4; - array->fIndex = (uint16_t*)*source; - *source += (sizeof(uint16_t)*UCMP8_kIndexCount); + this_obj->fIndex = (uint16_t*) *source; + (*source) += sizeof(this_obj->fIndex[0])*UCMP8_kIndexCount; + this_obj->fArray = (uint8_t*) *source; + (*source) += sizeof(this_obj->fArray[0])*this_obj->fCount; - /* eat up padding */ - while((*source-((uint8_t*)oldArray))%4) - (*source)++; - - - return array; + /* eat up padding */ + while((*source-(oldSource))%4) + (*source)++; } diff --git a/icu4c/source/common/ucmp8.h b/icu4c/source/common/ucmp8.h index 0ab8e1093b..2393739c07 100644 --- a/icu4c/source/common/ucmp8.h +++ b/icu4c/source/common/ucmp8.h @@ -11,6 +11,10 @@ #ifndef UCMP8_H #define UCMP8_H +/* 32-bits. + Bump this whenever the internal structure changes. +*/ +#define ICU_UCMP8_VERSION 0x01260000 #include "unicode/utypes.h" @@ -33,6 +37,7 @@ typedef struct CompactByteArray { bool_t fCompact; bool_t fBogus; bool_t fAlias; + bool_t fIAmOwned; /* don't free CBA on close */ } CompactByteArray; #define UCMP8_kUnicodeCount 65536 @@ -44,6 +49,9 @@ typedef struct CompactByteArray { U_CAPI CompactByteArray* U_EXPORT2 ucmp8_open(int8_t defaultValue); + +U_CAPI void U_EXPORT2 ucmp8_init(CompactByteArray* array, int8_t defaultValue); + U_CAPI CompactByteArray* U_EXPORT2 ucmp8_openAdopt(uint16_t* indexArray, int8_t* newValues, int32_t count); @@ -84,8 +92,9 @@ U_CAPI void U_EXPORT2 ucmp8_compact(CompactByteArray* array, /* Expanded takes the array back to a 65536 element array*/ U_CAPI void U_EXPORT2 ucmp8_expand(CompactByteArray* array); -/** INTERNAL USE ONLY **/ -U_CAPI CompactByteArray * U_EXPORT2 ucmp8_cloneFromData(const uint8_t **source, UErrorCode *status); +/** (more) INTERNAL USE ONLY **/ +/* initializes an existing CBA from memory. Will cause ucmp8_close() to not deallocate anything. */ +U_CAPI void U_EXPORT2 ucmp8_initFromData(CompactByteArray* array, const uint8_t **source, UErrorCode *status); #endif diff --git a/icu4c/source/common/ucnv.c b/icu4c/source/common/ucnv.c index 37aae8fd24..e9793f6684 100644 --- a/icu4c/source/common/ucnv.c +++ b/icu4c/source/common/ucnv.c @@ -239,8 +239,8 @@ void ucnv_setSubstChars (UConverter * converter, return; /*Makes sure that the subChar is within the codepages char length boundaries */ - if ((len > converter->sharedData->maxBytesPerChar) - || (len < converter->sharedData->minBytesPerChar)) + if ((len > converter->sharedData->staticData->maxBytesPerChar) + || (len < converter->sharedData->staticData->minBytesPerChar)) { *err = U_ILLEGAL_ARGUMENT_ERROR; return; @@ -273,7 +273,7 @@ int32_t ucnv_getDisplayName (const UConverter * converter, rb = ures_open (NULL, displayLocale, err); stringToWrite = ures_get (rb, - converter->sharedData->name, + converter->sharedData->staticData->name, err); if (rb) @@ -289,8 +289,8 @@ int32_t ucnv_getDisplayName (const UConverter * converter, *sets stringToWriteLength (which accounts for a NULL terminator) *and stringToWrite */ - stringToWriteLength = uprv_strlen (converter->sharedData->name) + 1; - stringToWrite = u_uastrcpy (stringToWriteBuffer, converter->sharedData->name); + stringToWriteLength = uprv_strlen (converter->sharedData->staticData->name) + 1; + stringToWrite = u_uastrcpy (stringToWriteBuffer, converter->sharedData->staticData->name); /*Hides the fallback to the internal name from the user */ if (*err == U_MISSING_RESOURCE_ERROR) @@ -332,7 +332,7 @@ int32_t ucnv_getDisplayName (const UConverter * converter, */ void ucnv_reset (UConverter * converter) { - converter->toUnicodeStatus = converter->sharedData->defaultConverterValues.toUnicodeStatus; + converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; converter->fromUnicodeStatus = 0; converter->UCharErrorBufferLength = 0; converter->charErrorBufferLength = 0; @@ -347,13 +347,13 @@ void ucnv_reset (UConverter * converter) int8_t ucnv_getMaxCharSize (const UConverter * converter) { - return converter->sharedData->maxBytesPerChar; + return converter->sharedData->staticData->maxBytesPerChar; } int8_t ucnv_getMinCharSize (const UConverter * converter) { - return converter->sharedData->minBytesPerChar; + return converter->sharedData->staticData->minBytesPerChar; } const char* ucnv_getName (const UConverter * converter, UErrorCode * err) @@ -362,7 +362,7 @@ const char* ucnv_getName (const UConverter * converter, UErrorCode * err) if (U_FAILURE (*err)) return NULL; - return converter->sharedData->name; + return converter->sharedData->staticData->name; } int32_t ucnv_getCCSID (const UConverter * converter, @@ -371,7 +371,7 @@ int32_t ucnv_getCCSID (const UConverter * converter, if (U_FAILURE (*err)) return -1; - return converter->sharedData->codepage; + return converter->sharedData->staticData->codepage; } @@ -381,7 +381,7 @@ UConverterPlatform ucnv_getPlatform (const UConverter * converter, if (U_FAILURE (*err)) return UCNV_UNKNOWN; - return converter->sharedData->platform; + return converter->sharedData->staticData->platform; } UConverterToUCallback ucnv_getToUCallBack (const UConverter * converter) @@ -473,7 +473,7 @@ void ucnv_fromUnicode (UConverter * _this, } else { /* all code points are of the same length */ int32_t targetSize = targetLimit - *target; - int32_t i, bytesPerChar = _this->sharedData->maxBytesPerChar; + int32_t i, bytesPerChar = _this->sharedData->staticData->maxBytesPerChar; if(bytesPerChar == 1) { for (i=0; isharedData->maxBytesPerChar; + int32_t i, bytesPerChar = _this->sharedData->staticData->maxBytesPerChar; if(bytesPerChar == 1) { for (i=0; isharedData->conversionType; + return converter->sharedData->staticData->conversionType; } void ucnv_getStarters(const UConverter* converter, diff --git a/icu4c/source/common/ucnv2022.c b/icu4c/source/common/ucnv2022.c index 186ab49441..f9a32e98eb 100644 --- a/icu4c/source/common/ucnv2022.c +++ b/icu4c/source/common/ucnv2022.c @@ -687,11 +687,19 @@ static const UConverterImpl _ISO2022Impl={ NULL }; +const UConverterStaticData _ISO2022StaticData={ + sizeof(UConverterStaticData), + "ISO_2022", + 2022, UCNV_IBM, UCNV_ISO_2022, 1, 4, + 1, { 0x1a, 0, 0, 0 }, + { 0,0,0} /* reserved */ +}; + + const UConverterSharedData _ISO2022Data={ sizeof(UConverterSharedData), ~((uint32_t) 0), - NULL, NULL, &_ISO2022Impl, "ISO_2022", - 2022, UCNV_IBM, UCNV_ISO_2022, 1, 4, - { 0, 1, { 0x1a, 0, 0, 0 } } + NULL, NULL, &_ISO2022StaticData, FALSE, &_ISO2022Impl, + 0 }; /* EBCDICStateful ----------------------------------------------------------- */ @@ -723,7 +731,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverter * _this, int32_t myMode = _this->mode; - myToUnicode = _this->sharedData->table->dbcs.toUnicode; + myToUnicode = &_this->sharedData->table->dbcs.toUnicode; while (mySourceIndex < sourceLength) { @@ -841,7 +849,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverter * _this, int32_t* originalOffsets = offsets; - myToUnicode = _this->sharedData->table->dbcs.toUnicode; + myToUnicode = &_this->sharedData->table->dbcs.toUnicode; while (mySourceIndex < sourceLength) { @@ -968,7 +976,7 @@ void T_UConverter_fromUnicode_EBCDIC_STATEFUL (UConverter * _this, UChar mySourceChar = 0x0000; bool_t isTargetUCharDBCS = (bool_t)_this->fromUnicodeStatus; bool_t oldIsTargetUCharDBCS = isTargetUCharDBCS; - myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; + myFromUnicode = &_this->sharedData->table->dbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) @@ -1090,7 +1098,7 @@ void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverter * _this, bool_t oldIsTargetUCharDBCS = isTargetUCharDBCS; int32_t* originalOffsets = offsets; - myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; + myFromUnicode = &_this->sharedData->table->dbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) @@ -1226,7 +1234,7 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverter* converter, if (converter->mode == UCNV_SI) { /*Not lead byte: we update the source ptr and get the codepoint*/ - myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, + myUChar = ucmp16_getu( (&converter->sharedData->table->dbcs.toUnicode), (UChar)(**source)); (*source)++; } @@ -1240,7 +1248,7 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverter* converter, return 0xFFFD; } - myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, + myUChar = ucmp16_getu( (&converter->sharedData->table->dbcs.toUnicode), ((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))); (*source) += 2; @@ -1293,9 +1301,16 @@ static const UConverterImpl _EBCDICStatefulImpl={ NULL }; +const UConverterStaticData _EBCDICStatefulStaticData={ + sizeof(UConverterStaticData), + "EBCDICStateful", + 0, UCNV_IBM, UCNV_EBCDIC_STATEFUL, 1, 1, + 1, { 0, 0, 0, 0 }, + { 0,0,0} /* reserved */ +}; + const UConverterSharedData _EBCDICStatefulData={ sizeof(UConverterSharedData), 1, - NULL, NULL, &_EBCDICStatefulImpl, "EBCDICStateful", - 0, UCNV_IBM, UCNV_EBCDIC_STATEFUL, 1, 1, - { 0, 1, 0, 0, 0, 0 } + NULL, NULL, &_EBCDICStatefulStaticData, FALSE, &_EBCDICStatefulImpl, + 0 }; diff --git a/icu4c/source/common/ucnv_bld.c b/icu4c/source/common/ucnv_bld.c index 0006cbca85..ab939aa4e0 100644 --- a/icu4c/source/common/ucnv_bld.c +++ b/icu4c/source/common/ucnv_bld.c @@ -128,7 +128,7 @@ isCnvAcceptable(void *context, pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x76 && pInfo->dataFormat[3]==0x74 && - pInfo->formatVersion[0]==2; + pInfo->formatVersion[0]==3; } #define DATA_TYPE "cnv" @@ -213,7 +213,7 @@ void shareConverterData (UConverterSharedData * data) umtx_lock (NULL); /* ### check to see if the element is not already there! */ uhash_put(SHARED_DATA_HASHTABLE, - (void*) data->name, /* Okay to cast away const as long as + (void*) data->staticData->name, /* Okay to cast away const as long as keyDeleter == NULL */ data, &err); @@ -355,9 +355,9 @@ UConverter * myUConverter->mode = UCNV_SI; myUConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE; myUConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE; - myUConverter->toUnicodeStatus = myUConverter->sharedData->defaultConverterValues.toUnicodeStatus; - myUConverter->subCharLen = myUConverter->sharedData->defaultConverterValues.subCharLen; - uprv_memcpy (myUConverter->subChar, myUConverter->sharedData->defaultConverterValues.subChar, myUConverter->subCharLen); + myUConverter->toUnicodeStatus = myUConverter->sharedData->toUnicodeStatus; + myUConverter->subCharLen = myUConverter->sharedData->staticData->subCharLen; + uprv_memcpy (myUConverter->subChar, myUConverter->sharedData->staticData->subChar, myUConverter->subCharLen); if(myUConverter != NULL && myUConverter->sharedData->impl->open != NULL) { myUConverter->sharedData->impl->open(myUConverter, realName, NULL, err); @@ -374,7 +374,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *s { const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); /* version 1.0 of .cnv files directly contains a UConverterSharedData_1_4 structure */ - const UConverterSharedData_1_4 *source = (const UConverterSharedData_1_4 *) raw; + const UConverterStaticData *source = (const UConverterStaticData *) raw; UConverterSharedData *data; UConverterType type = source->conversionType; @@ -383,7 +383,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *s if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || converterData[type]->referenceCounter != 1 || - source->structSize != sizeof(UConverterSharedData_1_4)) + source->structSize != sizeof(UConverterStaticData)) { *status = U_INVALID_TABLE_FORMAT; return NULL; @@ -405,20 +405,11 @@ UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *s *status = U_MEMORY_ALLOCATION_ERROR; return NULL; } + + data->staticData = source; /* fill in fields from the loaded data */ data->dataMemory = (void*)pData; /* for future use */ - data->name = source->name; /* ### this could/should come from the caller - should be the same as the canonical name?!! */ - data->codepage = source->codepage; - data->platform = source->platform; - data->minBytesPerChar = source->minBytesPerChar; - data->maxBytesPerChar = source->maxBytesPerChar; - - /* version 1.0 of .cnv files does not store valid toUnicodeStatus - do not copy the whole defaultConverterValues */ - data->defaultConverterValues.subCharLen = source->defaultConverterValues.subCharLen; - uprv_memcpy(&data->defaultConverterValues.subChar, - &source->defaultConverterValues.subChar, - data->defaultConverterValues.subCharLen); if(data->impl->load != NULL) { data->impl->load(data, raw + source->structSize, status); diff --git a/icu4c/source/common/ucnv_cnv.h b/icu4c/source/common/ucnv_cnv.h index d96409d478..5c85012c6d 100644 --- a/icu4c/source/common/ucnv_cnv.h +++ b/icu4c/source/common/ucnv_cnv.h @@ -15,6 +15,40 @@ #include "unicode/utypes.h" #include "unicode/ucnv_bld.h" +#include "ucmp8.h" +#include "ucmp16.h" + +/*Table Node Definitions */ +typedef struct + { + UChar *toUnicode; /* [256]; */ + CompactByteArray fromUnicode; + } +UConverterSBCSTable; + +typedef struct + { + CompactShortArray toUnicode; + CompactShortArray fromUnicode; + } +UConverterDBCSTable; + +typedef struct + { + bool_t *starters; /* [256]; */ + CompactShortArray toUnicode; + CompactShortArray fromUnicode; + } +UConverterMBCSTable; + +union UConverterTable + { + UConverterSBCSTable sbcs; + UConverterDBCSTable dbcs; + UConverterMBCSTable mbcs; + }; + + U_CDECL_BEGIN #define missingCharMarker 0xFFFF diff --git a/icu4c/source/common/ucnv_lmb.c b/icu4c/source/common/ucnv_lmb.c index 3cf8256a8b..5a2cda4f03 100644 --- a/icu4c/source/common/ucnv_lmb.c +++ b/icu4c/source/common/ucnv_lmb.c @@ -627,7 +627,7 @@ UChar32 _LMBCSGetNextUChar(UConverter* _this, /* check for LMBCS doubled-group-byte case */ mbChar = (HighCh == group) ? LowCh : (HighCh<<8) | LowCh; - MyCArray = cnv->sharedData->table->mbcs.toUnicode; + MyCArray = &cnv->sharedData->table->mbcs.toUnicode; uniChar = (UChar) ucmp16_getu (MyCArray, mbChar); } @@ -648,7 +648,7 @@ UChar32 _LMBCSGetNextUChar(UConverter* _this, /* Lookup value must include opt group */ mbChar = (UChar)(group << 8) | (UChar) CurByte; - MyCArray = cnv->sharedData->table->mbcs.toUnicode; + MyCArray = &cnv->sharedData->table->mbcs.toUnicode; uniChar = (UChar) ucmp16_getu(MyCArray, mbChar); } @@ -670,7 +670,7 @@ UChar32 _LMBCSGetNextUChar(UConverter* _this, LowCh = *(*source)++; mbChar = (HighCh<<8) | LowCh; - MyCArray = cnv->sharedData->table->mbcs.toUnicode; + MyCArray = &cnv->sharedData->table->mbcs.toUnicode; uniChar = (UChar) ucmp16_getu (MyCArray, mbChar); (*source) += sizeof(UChar); } @@ -933,11 +933,16 @@ DEFINE_LMBCS_OPEN(19) _LMBCSGetNextUChar,\ NULL\ };\ +const UConverterStaticData _LMBCSStaticData##n={\ + sizeof(UConverterStaticData),\ +"LMBCS_" ## #n,\ + 0, UCNV_IBM, UCNV_LMBCS_1, 1, 1,\ + 1, { 0x3f, 0, 0, 0 } \ +};\ const UConverterSharedData _LMBCSData##n={\ sizeof(UConverterSharedData), ~0,\ - NULL, NULL, &_LMBCSImpl##n, "LMBCS_" ## #n,\ - 0, UCNV_IBM, UCNV_LMBCS_1, 1, 1,\ - { 0, 1, { 0x3f, 0, 0, 0 } }\ + NULL, NULL, &_LMBCSStaticData##n, FALSE, &_LMBCSImpl##n, \ + 0 \ }; DECLARE_LMBCS_DATA(1) diff --git a/icu4c/source/common/ucnv_utf.c b/icu4c/source/common/ucnv_utf.c index a0d5b5652c..ee982c6310 100644 --- a/icu4c/source/common/ucnv_utf.c +++ b/icu4c/source/common/ucnv_utf.c @@ -690,11 +690,19 @@ static const UConverterImpl _UTF8Impl={ NULL }; + +const UConverterStaticData _UTF8StaticData={ + sizeof(UConverterStaticData), +"UTF8", + 1208, UCNV_IBM, UCNV_UTF8, 1, 4, + 3, { 0xef, 0xbf, 0xbd, 0 } +}; + + const UConverterSharedData _UTF8Data={ sizeof(UConverterSharedData), ~((uint32_t) 0), - NULL, NULL, &_UTF8Impl, "UTF8", - 1208, UCNV_IBM, UCNV_UTF8, 1, 4, - { 0, 3, { 0xef, 0xbf, 0xbd, 0 } } + NULL, NULL, &_UTF8StaticData, FALSE, &_UTF8Impl, + 0 }; /* UTF-16BE ----------------------------------------------------------------- */ @@ -884,11 +892,18 @@ static const UConverterImpl _UTF16BEImpl={ NULL }; +const UConverterStaticData _UTF16BEStaticData={ + sizeof(UConverterStaticData), +"UTF16_BigEndian", + 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2, + 2, { 0xff, 0xfd, 0, 0 } +}; + + const UConverterSharedData _UTF16BEData={ sizeof(UConverterSharedData), ~((uint32_t) 0), - NULL, NULL, &_UTF16BEImpl, "UTF16_BigEndian", - 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2, - { 0, 2, { 0xff, 0xfd, 0, 0 } } + NULL, NULL, &_UTF16BEStaticData, FALSE, &_UTF16BEImpl, + 0 }; /* UTF-16LE ----------------------------------------------------------------- */ @@ -1080,9 +1095,17 @@ static const UConverterImpl _UTF16LEImpl={ NULL }; + +const UConverterStaticData _UTF16LEStaticData={ + sizeof(UConverterStaticData), + "UTF16_LittleEndian", + 1200, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2, + 2, { 0xfd, 0xff, 0, 0 } +}; + + const UConverterSharedData _UTF16LEData={ sizeof(UConverterSharedData), ~((uint32_t) 0), - NULL, NULL, &_UTF16LEImpl, "UTF16_LittleEndian", - 1200, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2, - { 0, 2, { 0xfd, 0xff, 0, 0 } } + NULL, NULL, &_UTF16LEStaticData, FALSE, &_UTF16LEImpl, + 0 }; diff --git a/icu4c/source/common/ucnvlat1.c b/icu4c/source/common/ucnvlat1.c index df5ca8a58a..50c9787e2c 100644 --- a/icu4c/source/common/ucnvlat1.c +++ b/icu4c/source/common/ucnvlat1.c @@ -160,9 +160,16 @@ static const UConverterImpl _Latin1Impl={ NULL }; +const UConverterStaticData _Latin1StaticData={ + sizeof(UConverterStaticData), + "LATIN_1", + 819, UCNV_IBM, UCNV_LATIN_1, 1, 1, + 1, { 0x1a, 0, 0, 0 } +}; + + const UConverterSharedData _Latin1Data={ sizeof(UConverterSharedData), ~((uint32_t) 0), - NULL, NULL, &_Latin1Impl, "LATIN_1", - 819, UCNV_IBM, UCNV_LATIN_1, 1, 1, - { 0, 1, { 0x1a, 0, 0, 0 } } + NULL, NULL, &_Latin1StaticData, FALSE, &_Latin1Impl, + 0 }; diff --git a/icu4c/source/common/ucnvmbcs.c b/icu4c/source/common/ucnvmbcs.c index d0ed928aea..0f1df36c43 100644 --- a/icu4c/source/common/ucnvmbcs.c +++ b/icu4c/source/common/ucnvmbcs.c @@ -29,17 +29,17 @@ _MBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErr sharedData->table->mbcs.starters = (bool_t*)raw; oldraw = raw += sizeof(bool_t)*256; - sharedData->table->mbcs.toUnicode = ucmp16_cloneFromData(&raw, pErrorCode); + ucmp16_initFromData(&sharedData->table->mbcs.toUnicode, &raw, pErrorCode); if(((raw-oldraw)&3)!=0) { raw+=4-((raw-oldraw)&3); /* pad to 4 */ } - sharedData->table->mbcs.fromUnicode = ucmp16_cloneFromData(&raw, pErrorCode); + ucmp16_initFromData(&sharedData->table->mbcs.fromUnicode, &raw, pErrorCode); } static void _MBCSUnload(UConverterSharedData *sharedData) { - ucmp16_close (sharedData->table->mbcs.fromUnicode); - ucmp16_close (sharedData->table->mbcs.toUnicode); + ucmp16_close (&sharedData->table->mbcs.fromUnicode); + ucmp16_close (&sharedData->table->mbcs.toUnicode); uprv_free (sharedData->table); } @@ -66,7 +66,7 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this, - myToUnicode = _this->sharedData->table->mbcs.toUnicode; + myToUnicode = &_this->sharedData->table->mbcs.toUnicode; myStarters = _this->sharedData->table->mbcs.starters; while (mySourceIndex < sourceLength) @@ -184,7 +184,7 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, UChar oldMySourceChar = 0x0000; bool_t *myStarters = NULL; - myToUnicode = _this->sharedData->table->mbcs.toUnicode; + myToUnicode = &_this->sharedData->table->mbcs.toUnicode; myStarters = _this->sharedData->table->mbcs.starters; while (mySourceIndex < sourceLength) @@ -316,7 +316,7 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this, UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; - myFromUnicode = _this->sharedData->table->mbcs.fromUnicode; + myFromUnicode = &_this->sharedData->table->mbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) @@ -405,7 +405,7 @@ static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; - myFromUnicode = _this->sharedData->table->mbcs.fromUnicode; + myFromUnicode = &_this->sharedData->table->mbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) @@ -499,7 +499,7 @@ static UChar32 T_UConverter_getNextUChar_MBCS(UConverter* converter, if (converter->sharedData->table->mbcs.starters[(uint8_t)**source] == FALSE) { /*Not lead byte: we update the source ptr and get the codepoint*/ - myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode, + myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicode), (UChar)(**source)); (*source)++; } @@ -513,7 +513,7 @@ static UChar32 T_UConverter_getNextUChar_MBCS(UConverter* converter, return 0xFFFD; } - myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode, + myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicode), (uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)))); (*source) += 2; @@ -572,9 +572,17 @@ static const UConverterImpl _MBCSImpl={ _MBCSGetStarters }; + +const UConverterStaticData _MBCSStaticData={ + sizeof(UConverterStaticData), + "MBCS", + 0, UCNV_IBM, UCNV_MBCS, 1, 1, + 1, { 0, 0, 0, 0 } +}; + + const UConverterSharedData _MBCSData={ sizeof(UConverterSharedData), 1, - NULL, NULL, &_MBCSImpl, "MBCS", - 0, UCNV_IBM, UCNV_MBCS, 1, 1, - { 0, 1, { 0, 0, 0, 0 } } + NULL, NULL, &_MBCSStaticData, FALSE, &_MBCSImpl, + 0 }; diff --git a/icu4c/source/common/ucnvsbcs.c b/icu4c/source/common/ucnvsbcs.c index 1887268b14..6173c3d609 100644 --- a/icu4c/source/common/ucnvsbcs.c +++ b/icu4c/source/common/ucnvsbcs.c @@ -24,14 +24,14 @@ static void _SBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) { - sharedData->table->sbcs.toUnicode = (UChar*)raw; - raw += sizeof(UChar)*256; - sharedData->table->sbcs.fromUnicode = ucmp8_cloneFromData(&raw, pErrorCode); + sharedData->table->sbcs.toUnicode = (uint16_t*)raw; + raw += sizeof(uint16_t)*256; + ucmp8_initFromData(&sharedData->table->sbcs.fromUnicode, &raw, pErrorCode); } static void _SBCSUnload(UConverterSharedData *sharedData) { - ucmp8_close (sharedData->table->sbcs.fromUnicode); + ucmp8_close (&sharedData->table->sbcs.fromUnicode); uprv_free (sharedData->table); } @@ -121,7 +121,7 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this, CompactByteArray *myFromUnicode; unsigned char targetChar = 0x00; - myFromUnicode = _this->sharedData->table->sbcs.fromUnicode; + myFromUnicode = &_this->sharedData->table->sbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) @@ -241,11 +241,17 @@ static const UConverterImpl _SBCSImpl={ NULL }; +const UConverterStaticData _SBCSStaticData={ + sizeof(UConverterStaticData), + "SBCS", + 0, UCNV_IBM, UCNV_SBCS, 1, 1, + 1, { 0, 0, 0, 0 } +}; + const UConverterSharedData _SBCSData={ sizeof(UConverterSharedData), 1, - NULL, NULL, &_SBCSImpl, "SBCS", - 0, UCNV_IBM, UCNV_SBCS, 1, 1, - { 0, 1, { 0, 0, 0, 0 } } + NULL, NULL, &_SBCSStaticData, FALSE, &_SBCSImpl, + 0 }; /* DBCS --------------------------------------------------------------------- */ @@ -253,18 +259,18 @@ const UConverterSharedData _SBCSData={ U_CFUNC void _DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) { const uint8_t *oldraw = raw; - sharedData->table->dbcs.toUnicode=ucmp16_cloneFromData(&raw, pErrorCode); + ucmp16_initFromData(&sharedData->table->dbcs.toUnicode,&raw, pErrorCode); if(((raw-oldraw)&3)!=0) { raw+=4-((raw-oldraw)&3); /* pad to 4 */ } - sharedData->table->dbcs.fromUnicode =ucmp16_cloneFromData(&raw, pErrorCode); + ucmp16_initFromData(&sharedData->table->dbcs.fromUnicode, &raw, pErrorCode); } U_CFUNC void _DBCSUnload(UConverterSharedData *sharedData) { - ucmp16_close (sharedData->table->dbcs.fromUnicode); - ucmp16_close (sharedData->table->dbcs.toUnicode); - uprv_free (sharedData->table); + ucmp16_close (&sharedData->table->dbcs.fromUnicode); + ucmp16_close (&sharedData->table->dbcs.toUnicode); + uprv_free (sharedData->table); } void T_UConverter_toUnicode_DBCS (UConverter * _this, @@ -286,7 +292,7 @@ void T_UConverter_toUnicode_DBCS (UConverter * _this, UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; - myToUnicode = _this->sharedData->table->dbcs.toUnicode; + myToUnicode = &_this->sharedData->table->dbcs.toUnicode; while (mySourceIndex < sourceLength) { @@ -386,7 +392,7 @@ void T_UConverter_fromUnicode_DBCS (UConverter * _this, UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; - myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; + myFromUnicode = &_this->sharedData->table->dbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) @@ -475,7 +481,7 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverter* converter, } /*Gets the corresponding codepoint*/ - myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, + myUChar = ucmp16_getu((&converter->sharedData->table->dbcs.toUnicode), (uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)))); /*update the input pointer*/ @@ -527,9 +533,15 @@ static const UConverterImpl _DBCSImpl={ NULL }; +const UConverterStaticData _DBCSStaticData={ + sizeof(UConverterStaticData), + "DBCS", + 0, UCNV_IBM, UCNV_DBCS, 2, 2, + 1, { 0, 0, 0, 0 } /* subchar */ +}; + const UConverterSharedData _DBCSData={ sizeof(UConverterSharedData), 1, - NULL, NULL, &_DBCSImpl, "DBCS", - 0, UCNV_IBM, UCNV_DBCS, 2, 2, - { 0, 1, { 0, 0, 0, 0 } } + NULL, NULL, &_DBCSStaticData, FALSE, &_DBCSImpl, + 0, /* tounicodestatus */ }; diff --git a/icu4c/source/common/unicode/ucnv_bld.h b/icu4c/source/common/unicode/ucnv_bld.h index a354a73126..c87bf69685 100644 --- a/icu4c/source/common/unicode/ucnv_bld.h +++ b/icu4c/source/common/unicode/ucnv_bld.h @@ -26,14 +26,6 @@ #define UCNV_ERROR_BUFFER_LENGTH 20 #define UCNV_MAX_AMBIGUOUSCCSIDS 5 -#ifndef UCMP16_H -typedef struct _CompactShortArray CompactShortArray; -#endif - -#ifndef UCMP8_H -typedef struct _CompactByteArray CompactByteArray; -#endif - #define UCNV_IMPLEMENTED_CONVERSION_TYPES 9 /*Sentinel Value used to check the integrity of the binary data files */ @@ -102,44 +94,14 @@ typedef enum { UCNV_IBM = 0 } UConverterPlatform; - -/*Table Node Definitions */ -typedef struct - { - UChar *toUnicode; /* [256]; */ - CompactByteArray *fromUnicode; - } -UConverterSBCSTable; - -typedef struct - { - CompactShortArray *toUnicode; - CompactShortArray *fromUnicode; - } -UConverterDBCSTable; - -typedef struct - { - bool_t *starters; /* [256]; */ - CompactShortArray *toUnicode; - CompactShortArray *fromUnicode; - } -UConverterMBCSTable; - -typedef union - { - UConverterSBCSTable sbcs; - UConverterDBCSTable dbcs; - UConverterMBCSTable mbcs; - } -UConverterTable; - - U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv itself is compiled under C++, the linkage of the funcptrs will work. */ +union UConverterTable; +typedef union UConverterTable UConverterTable; + struct UConverterImpl; typedef struct UConverterImpl UConverterImpl; @@ -175,6 +137,27 @@ typedef struct UConverterImpl UConverterImpl; * it is in UConverterImpl and hardly used. */ +typedef struct { + uint32_t structSize; /* Size of this structure */ + + const char name [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* internal name of the converter- invariant chars */ + + int32_t codepage; /* codepage # (now IBM-$codepage) */ + + int8_t platform; /* platform of the converter (only IBM now) */ + int8_t conversionType; /* conversion type */ + + int8_t minBytesPerChar; /* Minimum # bytes per char in this codepage */ + int8_t maxBytesPerChar; /* Maximum # bytes per char in this codepage */ + + int8_t subCharLen; + + uint8_t reserved[3]; /* to round out the structure */ + + uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; + +} UConverterStaticData; + /* * Defines the UConverterSharedData struct, * the immutable, shared part of UConverter. @@ -185,23 +168,13 @@ typedef struct { const void *dataMemory; /* from udata_openChoice() */ UConverterTable *table; /* Pointer to conversion data */ + + const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */ + bool_t staticDataOwned; /* T if we own the staticData */ const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */ - const char *name; /* internal name of the converter */ - int32_t codepage; /* codepage # (now IBM-$codepage) */ - - int8_t platform; /* platform of the converter (only IBM now) */ - int8_t conversionType; /* conversion type */ - - int8_t minBytesPerChar; /* Minimum # bytes per char in this codepage */ - int8_t maxBytesPerChar; /* Maximum # bytes per char in this codepage */ - - /*initial values of some members of the mutable part of object */ - struct { - uint32_t toUnicodeStatus; - int8_t subCharLen; - uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; - } defaultConverterValues; + /*initial values of some members of the mutable part of object */ + uint32_t toUnicodeStatus; } UConverterSharedData; @@ -286,3 +259,8 @@ UConverterDataLMBCS; #define CONVERTER_FILE_EXTENSION ".cnv" #endif /* _UCNV_BLD */ + + + + + diff --git a/icu4c/source/tools/makeconv/makeconv.c b/icu4c/source/tools/makeconv/makeconv.c index 23ffb116f0..6f6114bdd8 100644 --- a/icu4c/source/tools/makeconv/makeconv.c +++ b/icu4c/source/tools/makeconv/makeconv.c @@ -19,6 +19,7 @@ #include "unicode/ucnv_bld.h" #include "unicode/ucnv_err.h" #include "ucnv_imp.h" +#include "ucnv_cnv.h" #include "cstring.h" #include "cmemory.h" #include "filestrm.h" @@ -29,133 +30,58 @@ #include "unewdata.h" #include "ucmpwrit.h" -/*Defines the struct of a UConverterSharedData the immutable, shared part of - *UConverter - - * This is the definition from ICU 1.4, necessary to read converter data - * version 1 because the structure is directly embedded in the data. - * See udata.html for why this is bad (pointers, enums, padding...). + +/* + * Global - verbosity */ -typedef struct - { - uint32_t structSize; /* Size of this structure */ - void *dataMemory; - uint32_t referenceCounter; /*used to count number of clients */ - char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */ - UConverterPlatform platform; /*platform of the converter (only IBM now) */ - int32_t codepage; /*codepage # (now IBM-$codepage) */ - UConverterType conversionType; /*conversion type */ - int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */ - int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */ - struct - { /*initial values of some members of the mutable part of object */ - uint32_t toUnicodeStatus; - int8_t subCharLen; - unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; - } - defaultConverterValues; - UConverterTable *table; /*Pointer to conversion data */ - } -UConverterSharedData_1_4; +bool_t VERBOSE = FALSE; -struct UConverter_1_4 - { - int32_t toUnicodeStatus; /*Used to internalize stream status information */ - int32_t fromUnicodeStatus; - int8_t invalidCharLength; - int8_t invalidUCharLength; - int8_t pad; - int32_t mode; - int8_t subCharLen; /*length of the codepage specific character sequence */ - unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */ - UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for - *output stream by the Error function pointers - */ - unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for - * output stream by the Error function pointers - */ - int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars - *in charErrorBuffer - */ - int8_t charErrorBufferLength; /*used to indicate the number of valid bytes - *in charErrorBuffer - */ - - UChar invalidUCharBuffer[3]; - char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN]; - /*Error function pointer called when conversion issues - *occur during a T_UConverter_fromUnicode call - */ - void (*fromUCharErrorBehaviour) (struct UConverter_1_4 *, - char **, - const char *, - const UChar **, - const UChar *, - int32_t* offsets, - bool_t, - UErrorCode *); - /*Error function pointer called when conversion issues - *occur during a T_UConverter_toUnicode call - */ - void (*fromCharErrorBehaviour) (struct UConverter_1_4 *, - UChar **, - const UChar *, - const char **, - const char *, - int32_t* offsets, - bool_t, - UErrorCode *); - - UConverterSharedData_1_4 *sharedData; /*Pointer to the shared immutable part of the - *converter object - */ - void *extraInfo; /*currently only used to point to a struct containing UConverter_1_4 used by iso 2022 - Could be used by clients writing their own call back function to - pass context to them - */ - }; - -typedef struct UConverter_1_4 UConverter_1_4; /*Reads the header of the table file and fills in basic knowledge about the converter *in "converter" */ -static void readHeaderFromFile(UConverter_1_4* myConverter, FileStream* convFile, const char* converterName, UErrorCode* err); +static void readHeaderFromFile(UConverterStaticData* myConverter, FileStream* convFile, const char* converterName, UErrorCode* err); -/*Reads the rest of the file, and fills up the shared objects if necessary*/ -static void loadMBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err); +/*Reads the rest of the file, and fills up the shared objects if necessary +Returns the UConverterTable. */ +static UConverterTable* loadMBCSTableFromFile(FileStream* convFile, UConverterStaticData* staticData, UErrorCode* err); -/*Reads the rest of the file, and fills up the shared objects if necessary*/ -static void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err); +/*Reads the rest of the file, and fills up the shared objects if necessary +Returns the UConverterTable. */ +static UConverterTable* loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverterStaticData* staticData, UErrorCode* err); -/*Reads the rest of the file, and fills up the shared objects if necessary*/ -static void loadSBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err); +/*Reads the rest of the file, and fills up the shared objects if necessary +Returns the UConverterTable. */ +static UConverterTable* loadSBCSTableFromFile(FileStream* convFile, UConverterStaticData* staticData, UErrorCode* err); -/*Reads the rest of the file, and fills up the shared objects if necessary*/ -static void loadDBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err); +/*Reads the rest of the file, and fills up the shared objects if necessary +Returns the UConverterTable. */ +static UConverterTable* loadDBCSTableFromFile(FileStream* convFile, UConverterStaticData* staticData, UErrorCode* err); -/* creates a UConverterSharedData_1_4 from a mapping file, fills in necessary links to it the - * appropriate function pointers - * if the data tables are already in memory +/* creates a UConverterSharedData from a mapping file. + * Fills in: *staticData, *table. Converter is NOT otherwise useful. */ -static UConverterSharedData_1_4* createConverterFromTableFile(const char* realName, UErrorCode* err); +static UConverterSharedData* createConverterFromTableFile(const char* realName, UErrorCode* err); -/*writes a CompactShortArray to a file*/ -static void writeCompactShortArrayToFile(FileStream* outfile, const CompactShortArray* myArray); +/* + * Set up the UNewData and write the converter.. + */ +void writeConverterData(UConverterSharedData *mySharedData, const char *cnvName, const char *cnvDir, UErrorCode *status); -/*writes a CompactByteArray to a file*/ -static void writeCompactByteArrayToFile(FileStream* outfile, const CompactByteArray* myArray); +/* + * Writes the StaticData followed by the Table to the udata + */ +static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData* data); -/*writes a binary to a file*/ -static void writeUConverterSharedDataToFile(const char* filename, - UConverterSharedData_1_4* mySharedData, - UErrorCode* err); - - -static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData_1_4* data); - -bool_t makeconv_deleteSharedConverterData(UConverterSharedData_1_4* deadSharedData); +/* + * Deletes the static data, table. Ignores any other options in the shareddata. + */ +bool_t makeconv_deleteSharedConverterData(UConverterSharedData* deadSharedData); +/* + * Utility functions + */ static UConverterPlatform getPlatformFromName(char* name); static int32_t getCodepageNumberFromName(char* name); @@ -245,36 +171,59 @@ static const UDataInfo dataInfo={ 0, 0x63, 0x6e, 0x76, 0x74, /* dataFormat="cnvt" */ - 2, 0, 0, 0, /* formatVersion */ - 1, 3, 1, 0 /* dataVersion */ + 3, 0, 0, 0, /* formatVersion */ + 1, 4, 2, 0 /* dataVersion */ }; -void writeConverterData(UConverterSharedData_1_4 *mySharedData, const char *cnvName, const char *cnvDir, UErrorCode *status) +void writeConverterData(UConverterSharedData *mySharedData, const char *cnvName, const char *cnvDir, UErrorCode *status) { UNewDataMemory *mem; uint32_t sz2; + + if(U_FAILURE(*status)) + { + return; + } mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); + + if(U_FAILURE(*status)) + { + fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", + cnvName, + "cnv", + u_errorName(*status)); + return; + } + + if(VERBOSE) + { + fprintf(stderr, "- Opened udata %s.%s\n", cnvName, "cnv"); + } WriteConverterSharedData(mem, mySharedData); sz2 = udata_finish(mem, status); -/* printf("Done. Wrote %d bytes.\n", sz2); */ + if(VERBOSE) + { + fprintf(stderr, "- Wrote %d bytes to the udata.\n", sz2); + } } static UOption options[]={ - UOPTION_HELP_H, - UOPTION_HELP_QUESTION_MARK, - UOPTION_COPYRIGHT, - UOPTION_VERSION, - UOPTION_DESTDIR + UOPTION_HELP_H, /* 0 Numbers for those who*/ + UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */ + UOPTION_COPYRIGHT, /* 2 */ + UOPTION_VERSION, /* 3 */ + UOPTION_DESTDIR, /* 4 */ + UOPTION_VERBOSE /* 5 */ }; int main(int argc, const char *argv[]) { - UConverterSharedData_1_4* mySharedData = NULL; + UConverterSharedData* mySharedData = NULL; UErrorCode err = U_ZERO_ERROR; char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; const char *pname = *argv; @@ -303,22 +252,24 @@ int main(int argc, const char *argv[]) "\t\t-h or -? or --help this usage text\n" "\t\t-V or --version show a version message\n" "\t\t-c or --copyright include a copyright notice\n" - "\t\t-d or --destdir destination directory, followed by the path\n", + "\t\t-d or --destdir destination directory, followed by the path\n" + "\t\t-v or --verbose Turn on verbose output\n", argv[0]); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } if(options[3].doesOccur) { - printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n", + fprintf(stderr,"makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n", dataInfo.formatVersion[0], dataInfo.formatVersion[1]); - printf("Copyright (C) 1998-2000, International Business Machines\n"); - printf("Corporation and others. All Rights Reserved.\n"); + fprintf(stderr, "Copyright (C) 1998-2000, International Business Machines\n"); + fprintf(stderr,"Corporation and others. All Rights Reserved.\n"); exit(0); } /* get the options values */ haveCopyright = options[2].doesOccur; destdir = options[4].value; + VERBOSE = options[5].doesOccur; if (destdir != NULL && *destdir != 0) { uprv_strcpy(outFileName, destdir); @@ -332,8 +283,9 @@ int main(int argc, const char *argv[]) destdirlen = 0; outBasename = outFileName; } + - for (++argv; *argv; ++argv) + for (++argv; --argc; ++argv) { err = U_ZERO_ERROR; arg = getLongPathname(*argv); @@ -375,7 +327,7 @@ int main(int argc, const char *argv[]) if (U_FAILURE(err) || (mySharedData == NULL)) { /* if an error is found, print out an error msg and keep going */ - printf("Error creating \"%s\" file for \"%s\" (error code %d - %s)\n", outFileName, arg, err, + fprintf(stderr, "Error creating \"%s\" file for \"%s\" (error code %d - %s)\n", outFileName, arg, err, u_errorName(err)); err = U_ZERO_ERROR; } @@ -387,12 +339,12 @@ int main(int argc, const char *argv[]) if(U_FAILURE(err)) { /* in an error is found, print out a error msg and keep going*/ - printf("Error writing \"%s\" file for \"%s\" (error code %d - %s)\n", outFileName, arg, err, - u_errorName(err)); + fprintf(stderr, "Error writing \"%s\" file for \"%s\" (error code %d - %s)\n", outFileName, arg, err, + u_errorName(err)); } else { - puts(outFileName); + puts(outFileName); } } @@ -439,7 +391,7 @@ int32_t getCodepageNumberFromName(char* name) } /*Reads the header of the table file and fills in basic knowledge about the converter in "converter"*/ -void readHeaderFromFile(UConverter_1_4* myConverter, +void readHeaderFromFile(UConverterStaticData* myConverter, FileStream* convFile, const char* converterName, UErrorCode* err) @@ -483,12 +435,12 @@ void readHeaderFromFile(UConverter_1_4* myConverter, { if (uprv_strlen(value) != 0) { - uprv_strcpy(myConverter->sharedData->name, value); - myConverter->sharedData->platform = getPlatformFromName(value); - myConverter->sharedData->codepage = getCodepageNumberFromName(value); + uprv_strcpy((char*)myConverter->name, value); + myConverter->platform = getPlatformFromName(value); + myConverter->codepage = getCodepageNumberFromName(value); } else { - uprv_strcpy(myConverter->sharedData->name, converterName); - myConverter->sharedData->platform = UCNV_IBM; + uprv_strcpy((char*)myConverter->name, converterName); + myConverter->platform = UCNV_IBM; } } @@ -500,19 +452,19 @@ void readHeaderFromFile(UConverter_1_4* myConverter, hasConvClass = TRUE; if (uprv_strcmp(value, "DBCS") == 0) { - myConverter->sharedData->conversionType = UCNV_DBCS; + myConverter->conversionType = UCNV_DBCS; } else if (uprv_strcmp(value, "SBCS") == 0) { - myConverter->sharedData->conversionType = UCNV_SBCS; + myConverter->conversionType = UCNV_SBCS; } else if (uprv_strcmp(value, "MBCS") == 0) { - myConverter->sharedData->conversionType = UCNV_MBCS; + myConverter->conversionType = UCNV_MBCS; } else if (uprv_strcmp(value, "EBCDIC_STATEFUL") == 0) { - myConverter->sharedData->conversionType = UCNV_EBCDIC_STATEFUL; + myConverter->conversionType = UCNV_EBCDIC_STATEFUL; } else { @@ -524,24 +476,24 @@ void readHeaderFromFile(UConverter_1_4* myConverter, /*get mb_cur_max amount*/ else if (uprv_strcmp(key, "mb_cur_max") == 0) - myConverter->sharedData->maxBytesPerChar = (int8_t)T_CString_stringToInteger(value, 10); + myConverter->maxBytesPerChar = (int8_t)T_CString_stringToInteger(value, 10); /*get mb_cur_max amount*/ else if (uprv_strcmp(key, "mb_cur_min") == 0) - myConverter->sharedData->minBytesPerChar = (int8_t)T_CString_stringToInteger(value, 10); + myConverter->minBytesPerChar = (int8_t)T_CString_stringToInteger(value, 10); else if (uprv_strcmp(key, "subchar") == 0) { hasSubChar = TRUE; - myConverter->sharedData->defaultConverterValues.subCharLen = 0; + myConverter->subCharLen = 0; /*readies value for tokenizing, we want to break each byte of the codepoint into single tokens*/ line = value; while (*line) { line = getToken(codepointByte, line, CODEPOINT_SEPARATORS); - myConverter->sharedData->defaultConverterValues.subChar[(myConverter->sharedData->defaultConverterValues.subCharLen++)] = + myConverter->subChar[(myConverter->subCharLen++)] = (unsigned char)T_CString_stringToInteger(codepointByte, 16); } @@ -553,23 +505,13 @@ void readHeaderFromFile(UConverter_1_4* myConverter, line = storeLine; } - if (!hasSubChar) {myConverter->subCharLen = myConverter->sharedData->defaultConverterValues.subCharLen = 0;} - else - { - myConverter->subCharLen = myConverter->sharedData->defaultConverterValues.subCharLen; - uprv_memcpy(myConverter->subChar, - myConverter->sharedData->defaultConverterValues.subChar, - myConverter->subCharLen); - } - - if (!endOfHeader || !hasConvClass) *err = U_INVALID_TABLE_FORMAT; return; } -void loadSBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err) +UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticData* myConverter, UErrorCode* err) { char storageLine[UCNV_MAX_LINE_TEXT]; char* line = NULL; @@ -592,7 +534,8 @@ void loadSBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UE } /*create a compact array with replacement chars as default chars*/ - myFromUnicode = ucmp8_open(0); + ucmp8_init(&myUConverterTable->sbcs.fromUnicode, 0); + myFromUnicode = &myUConverterTable->sbcs.fromUnicode; if (myFromUnicode == NULL) { uprv_free(myUConverterTable); @@ -628,15 +571,12 @@ void loadSBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UE } } ucmp8_compact(myFromUnicode, 1); - myUConverterTable->sbcs.fromUnicode = myFromUnicode; /*Initially sets the referenceCounter to 1*/ - myConverter->sharedData->referenceCounter = 1; - myConverter->sharedData->table = myUConverterTable; - return; + return myUConverterTable; } -void loadMBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err) +UConverterTable *loadMBCSTableFromFile(FileStream* convFile, UConverterStaticData* myConverter, UErrorCode* err) { char storageLine[UCNV_MAX_LINE_TEXT]; char* line = NULL; @@ -674,8 +614,11 @@ void loadMBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UE myUConverterTable->mbcs.starters[i] = FALSE; } - myFromUnicode = ucmp16_open((uint16_t)replacementChar); - myToUnicode = ucmp16_open((int16_t)0xFFFD); + myFromUnicode = &myUConverterTable->mbcs.fromUnicode; + ucmp16_init(myFromUnicode, (uint16_t)replacementChar); + + myToUnicode = &myUConverterTable->mbcs.toUnicode; + ucmp16_init(myToUnicode, (int16_t)0xFFFD); while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT)) { @@ -703,21 +646,17 @@ void loadMBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UE ucmp16_compact(myFromUnicode); ucmp16_compact(myToUnicode); - myUConverterTable->mbcs.fromUnicode = myFromUnicode; - myUConverterTable->mbcs.toUnicode = myToUnicode; - myConverter->sharedData->referenceCounter = 1; - myConverter->sharedData->table = myUConverterTable; /* if the default subCharLen is > 1 we need to insert it in the data structure so that we know how to transition */ if (myConverter->subCharLen > 1) { - myConverter->sharedData->table->mbcs.starters[(uint8_t)(myConverter->subChar[0])] = TRUE; + myUConverterTable->mbcs.starters[(uint8_t)(myConverter->subChar[0])] = TRUE; } - return; + return myUConverterTable; } -void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err) +UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverterStaticData* myConverter, UErrorCode* err) { char storageLine[UCNV_MAX_LINE_TEXT]; char* line = NULL; @@ -741,8 +680,11 @@ void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter_1_4* myCo } - myFromUnicode = ucmp16_open((uint16_t)replacementChar); - myToUnicode = ucmp16_open((int16_t)0xFFFD); + myFromUnicode = &myUConverterTable->dbcs.fromUnicode; + ucmp16_init(myFromUnicode, (uint16_t)replacementChar); + + myToUnicode = &myUConverterTable->dbcs.toUnicode; + ucmp16_init(myToUnicode, (int16_t)0xFFFD); while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT)) { @@ -769,16 +711,12 @@ void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter_1_4* myCo ucmp16_compact(myFromUnicode); ucmp16_compact(myToUnicode); - myUConverterTable->dbcs.fromUnicode = myFromUnicode; - myUConverterTable->dbcs.toUnicode = myToUnicode; - myConverter->sharedData->referenceCounter = 1; - myConverter->sharedData->table = myUConverterTable; - return; + return myUConverterTable; } -void loadDBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err) +UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticData* myConverter, UErrorCode* err) { char storageLine[UCNV_MAX_LINE_TEXT]; char* line = NULL; @@ -798,11 +736,14 @@ void loadDBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UE if (myUConverterTable == NULL) { *err = U_MEMORY_ALLOCATION_ERROR; - return; + return NULL; } - - myFromUnicode = ucmp16_open((int16_t)replacementChar); - myToUnicode = ucmp16_open((int16_t)0xFFFD); + + myFromUnicode = &(myUConverterTable->dbcs.fromUnicode); + ucmp16_init(myFromUnicode, (int16_t)replacementChar); + + myToUnicode = &(myUConverterTable->dbcs.toUnicode); + ucmp16_init(myToUnicode, (int16_t)0xFFFD); while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT)) { @@ -828,41 +769,39 @@ void loadDBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UE ucmp16_compact(myFromUnicode); ucmp16_compact(myToUnicode); - myUConverterTable->dbcs.fromUnicode = myFromUnicode; - myUConverterTable->dbcs.toUnicode = myToUnicode; - - - myConverter->sharedData->referenceCounter = 1; - myConverter->sharedData->table = myUConverterTable; - return; + return myUConverterTable; } /*deletes the "shared" type object*/ -bool_t makeconv_deleteSharedConverterData(UConverterSharedData_1_4* deadSharedData) +bool_t makeconv_deleteSharedConverterData(UConverterSharedData* deadSharedData) { - if (deadSharedData->conversionType == UCNV_SBCS) + if (deadSharedData->staticData->conversionType == UCNV_SBCS) { - ucmp8_close(deadSharedData->table->sbcs.fromUnicode); + ucmp8_close(&(deadSharedData->table->sbcs.fromUnicode)); uprv_free(deadSharedData->table); uprv_free(deadSharedData); } - else if (deadSharedData->conversionType == UCNV_MBCS) + else if (deadSharedData->staticData->conversionType == UCNV_MBCS) { - ucmp16_close(deadSharedData->table->mbcs.fromUnicode); - ucmp16_close(deadSharedData->table->mbcs.toUnicode); + ucmp16_close(&(deadSharedData->table->mbcs.fromUnicode)); + ucmp16_close(&(deadSharedData->table->mbcs.toUnicode)); uprv_free(deadSharedData->table); + uprv_free((UConverterStaticData*)deadSharedData->staticData); uprv_free(deadSharedData); } - else if ((deadSharedData->conversionType == UCNV_DBCS) || (deadSharedData->conversionType == UCNV_EBCDIC_STATEFUL)) + else if ((deadSharedData->staticData->conversionType == UCNV_DBCS) || (deadSharedData->staticData->conversionType == UCNV_EBCDIC_STATEFUL)) { - ucmp16_close(deadSharedData->table->dbcs.fromUnicode); - ucmp16_close(deadSharedData->table->dbcs.toUnicode); + ucmp16_close(&(deadSharedData->table->dbcs.fromUnicode)); + ucmp16_close(&(deadSharedData->table->dbcs.toUnicode)); uprv_free(deadSharedData->table); + uprv_free((UConverterStaticData*)deadSharedData->staticData); uprv_free(deadSharedData); } else - { + { /* ? */ + uprv_free(deadSharedData->table); + uprv_free((UConverterStaticData*)deadSharedData->staticData); uprv_free(deadSharedData); } return TRUE; @@ -870,14 +809,13 @@ bool_t makeconv_deleteSharedConverterData(UConverterSharedData_1_4* deadSharedDa -/*creates a UConverter_1_4, fills in necessary links to it the appropriate function pointers*/ -UConverterSharedData_1_4* createConverterFromTableFile(const char* converterName, UErrorCode* err) +/*creates a UConverterStaticData, fills in necessary links to it the appropriate function pointers*/ +UConverterSharedData* createConverterFromTableFile(const char* converterName, UErrorCode* err) { FileStream* convFile = NULL; int32_t i = 0; - UConverterSharedData_1_4* mySharedData = NULL; - UConverter_1_4 myConverter; - + UConverterSharedData* mySharedData = NULL; + UConverterStaticData* myStaticData = NULL; if (U_FAILURE(*err)) return NULL; @@ -889,84 +827,99 @@ UConverterSharedData_1_4* createConverterFromTableFile(const char* converterName } - mySharedData = (UConverterSharedData_1_4*) uprv_malloc(sizeof(UConverterSharedData_1_4)); + mySharedData = (UConverterSharedData*) uprv_malloc(sizeof(UConverterSharedData)); if (mySharedData == NULL) { *err = U_MEMORY_ALLOCATION_ERROR; T_FileStream_close(convFile); + return; } - mySharedData->structSize = sizeof(UConverterSharedData_1_4); + mySharedData->structSize = sizeof(UConverterSharedData); + + myStaticData = (UConverterStaticData*) uprv_malloc(sizeof(UConverterStaticData)); + mySharedData->staticData = myStaticData; + if (myStaticData == NULL) + { + *err = U_MEMORY_ALLOCATION_ERROR; + T_FileStream_close(convFile); + return; + } + myStaticData->structSize = sizeof(UConverterStaticData); + mySharedData->staticDataOwned = TRUE; + + mySharedData->dataMemory = NULL; /* for init */ - myConverter.sharedData = mySharedData; - readHeaderFromFile(&myConverter, convFile, converterName, err); + readHeaderFromFile(myStaticData, convFile, converterName, err); if (U_FAILURE(*err)) return NULL; - switch (mySharedData->conversionType) + switch (myStaticData->conversionType) { case UCNV_SBCS: { - loadSBCSTableFromFile(convFile, &myConverter, err); + mySharedData->table = loadSBCSTableFromFile(convFile, myStaticData, err); break; } case UCNV_MBCS: { - loadMBCSTableFromFile(convFile, &myConverter, err); + mySharedData->table = loadMBCSTableFromFile(convFile, myStaticData, err); break; } case UCNV_EBCDIC_STATEFUL: { - loadEBCDIC_STATEFULTableFromFile(convFile, &myConverter, err); + mySharedData->table = loadEBCDIC_STATEFULTableFromFile(convFile, myStaticData, err); break; } case UCNV_DBCS: { - loadDBCSTableFromFile(convFile, &myConverter, err); + mySharedData->table = loadDBCSTableFromFile(convFile, myStaticData, err); break; } - default : break; + default : + mySharedData->table = NULL; + break; }; T_FileStream_close(convFile); - return mySharedData; } -static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData_1_4* data) +static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData* data) { uint32_t size = 0; + + /* all read only, clean, platform independent data. Mmmm. :) */ + udata_writeBlock(pData, data->staticData, sizeof(UConverterStaticData)); + size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ - udata_writeBlock(pData, data, sizeof(UConverterSharedData_1_4)); - - size += sizeof(UConverterSharedData_1_4); /* Is 4-aligned- it ends with a pointer */ - - switch (data->conversionType) + /* Now, write the table .. Please note, the size of this table is + * */ + switch (data->staticData->conversionType) { - case UCNV_SBCS: - { - udata_writeBlock(pData, (void*)data->table->sbcs.toUnicode, sizeof(UChar)*256); - size += udata_write_ucmp8(pData, data->table->sbcs.fromUnicode); - size += sizeof(UChar)*256; - /* don't care aboutalignment */ + case UCNV_SBCS: { + udata_writeBlock(pData, (void*)data->table->sbcs.toUnicode, sizeof(uint16_t)*256); + size += sizeof(uint16_t)*256; + size += udata_write_ucmp8(pData, &data->table->sbcs.fromUnicode); + /* don't care about alignment anymore */ } break; case UCNV_DBCS: case UCNV_EBCDIC_STATEFUL: { - size += udata_write_ucmp16(pData,data->table->dbcs.toUnicode); + size += udata_write_ucmp16(pData,&data->table->dbcs.toUnicode); if(size%4) { udata_writePadding(pData, 4-(size%4) ); size+= 4-(size%4); } - size += udata_write_ucmp16(pData,data->table->dbcs.fromUnicode); + size += udata_write_ucmp16(pData,&data->table->dbcs.fromUnicode); } break; @@ -974,13 +927,13 @@ static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterShar { udata_writeBlock(pData, data->table->mbcs.starters, 256*sizeof(bool_t)); size += 256*sizeof(bool_t); - size += udata_write_ucmp16(pData,data->table->mbcs.toUnicode); + size += udata_write_ucmp16(pData,&data->table->mbcs.toUnicode); if(size%4) { udata_writePadding(pData, 4-(size%4) ); size+= 4-(size%4); } - size += udata_write_ucmp16(pData,data->table->mbcs.fromUnicode); + size += udata_write_ucmp16(pData,&data->table->mbcs.fromUnicode); } break; diff --git a/icu4c/source/tools/toolutil/ucmpwrit.c b/icu4c/source/tools/toolutil/ucmpwrit.c index cb816d1a83..a02dbd8125 100644 --- a/icu4c/source/tools/toolutil/ucmpwrit.c +++ b/icu4c/source/tools/toolutil/ucmpwrit.c @@ -5,24 +5,39 @@ #include "ucmpwrit.h" #include +/* + UCMP8 format: + + offset size what + --------------------------------------------- + 0 4 ICU_UCMP8_VERSION + 4 4 count + 8 512*2 = 1024 fIndex [uint16's] (UCMP8_kIndexCount*2) + 1032 1*fCount fArray [int8_t's] + + padding (to extend fCount to the nearest multiple of 4) +*/ + +/* Sanity check. */ +#if (UCMP8_kIndexCount != 512) +# error UCMP8_kIndexCount - changed size. Check to see if different pading needed. +#endif + U_CAPI uint32_t U_EXPORT2 udata_write_ucmp8 (UNewDataMemory *pData, const CompactByteArray* array) { - uint32_t size = sizeof(*array); + int32_t size; + + udata_write32(pData, ICU_UCMP8_VERSION); + size += 4; + + udata_write32(pData, array->fCount); + size += 4; + + udata_writeBlock(pData, array->fIndex, sizeof(array->fIndex[0])*UCMP8_kIndexCount); + size += sizeof(array->fIndex[0])*UCMP8_kIndexCount; - udata_writeBlock(pData, array, sizeof(*array)); udata_writeBlock(pData, array->fArray, sizeof(array->fArray[0])*array->fCount); size += sizeof(array->fArray[0])*array->fCount; - if((sizeof(*array)+(sizeof(array->fArray[0])*array->fCount))&1) - { - udata_writePadding(pData, 1); /* Pad total so far to even size */ - size += 1; - } - - udata_writeBlock(pData, array->fIndex, sizeof(array->fIndex[0])*UCMP8_kIndexCount); - size += sizeof(array->fIndex[0])*UCMP8_kIndexCount; - - while(size%4) /* end padding */ { udata_writePadding(pData, 1); /* Pad total so far to even size */ @@ -52,31 +67,54 @@ const int32_t UCMP16_kIndexShift = UCMP16_kIndexShift_int; const int32_t UCMP16_kIndexCount = UCMP16_kIndexCount_int; const uint32_t UCMP16_kBlockMask = UCMP16_kBlockMask_int; +/* + UCMP16 format: + + offset size what + --------------------------------------------- + 0 4 ICU_UCMP16_VERSION + 4 4 count + 8 4 blockShift + 12 4 blockMask + 16 512*2 = 1024 fIndex [uint16's] (UCMP16_kIndexCount*2) + 1032 1*fCount fArray [int16's] + + padding (to extend fCount to the nearest multiple of 4) + + */ + +#if (UCMP16_kIndexCount_int != 512) +# error UCMP16_kIndexCount - changed size. Check to see if different pading needed. +#endif U_CAPI uint32_t U_EXPORT2 udata_write_ucmp16 (UNewDataMemory *pData, const CompactShortArray* array) { - uint32_t size = sizeof(*array); + int32_t size; - udata_writeBlock(pData, array, sizeof(*array)); - - if(sizeof(*array)&1) + udata_write32(pData, ICU_UCMP16_VERSION); + size += 4; + + udata_write32(pData, array->fCount); + size += 4; + + udata_write32(pData, array->kBlockShift); + size += 4; + + udata_write32(pData, array->kBlockMask); + size += 4; + + udata_writeBlock(pData, array->fIndex, sizeof(array->fIndex[0])*UCMP16_kIndexCount); + size += sizeof(array->fIndex[0])*UCMP16_kIndexCount; + + udata_writeBlock(pData, array->fArray, sizeof(array->fArray[0])*array->fCount); + size += sizeof(array->fArray[0])*array->fCount; + + while(size%4) /* end padding */ { - udata_writePadding(pData, 1); /* Pad to even size */ - size++; - } - - udata_writeBlock(pData, array->fArray, sizeof(array->fArray[0])*array->fCount); - size += sizeof(array->fArray[0])*array->fCount; - udata_writeBlock(pData, array->fIndex, sizeof(array->fIndex[0])*UCMP16_kIndexCount); - size += sizeof(array->fIndex[0])*UCMP16_kIndexCount; - - while(size%4) /* end padding */ - { - udata_writePadding(pData, 1); /* Pad total so far to even size */ - size += 1; + udata_writePadding(pData, 1); /* Pad total so far to even size */ + size += 1; } - return size; + return size; }