scuffed-code/icu4c/source/common/ucnvsbcs.c
2000-05-20 00:17:47 +00:00

662 lines
22 KiB
C

/*
**********************************************************************
* Copyright (C) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnvsbcs.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2000feb03
* created by: Markus W. Scherer
*
* Change history:
*
* 05/09/00 helena Added implementation to handle fallback mappings.
*/
#include "unicode/utypes.h"
#include "cmemory.h"
#include "ucmp16.h"
#include "ucmp8.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
/* SBCS --------------------------------------------------------------------- */
static void
_SBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
const uint8_t *oldraw = raw;
sharedData->table->sbcs.toUnicode = (uint16_t*)raw;
raw += sizeof(uint16_t)*256; oldraw = raw;
ucmp8_initFromData(&sharedData->table->sbcs.fromUnicode, &raw, pErrorCode);
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
{
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
ucmp8_initFromData(&sharedData->table->sbcs.fromUnicodeFallback, &raw, pErrorCode);
}
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
{
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
sharedData->table->sbcs.toUnicodeFallback = (uint16_t*)raw;
}
}
static void
_SBCSUnload(UConverterSharedData *sharedData) {
ucmp8_close (&sharedData->table->sbcs.fromUnicode);
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
ucmp8_close (&sharedData->table->sbcs.fromUnicodeFallback);
uprv_free (sharedData->table);
}
void T_UConverter_toUnicode_SBCS (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
UErrorCode * err)
{
char *mySource = (char *) *source;
UChar *myTarget = *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
UChar *myToUnicode = NULL, *myToUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
myToUnicode = _this->sharedData->table->sbcs.toUnicode;
myToUnicodeFallback = _this->sharedData->table->sbcs.toUnicodeFallback;
while (mySourceIndex < sourceLength)
{
/*writing the UniChar to the output stream */
if (myTargetIndex < targetLength)
{
/*gets the corresponding UniChar */
targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]];
if (targetUniChar != missingUCharMarker)
{
/* writes the UniChar to the output stream */
myTarget[myTargetIndex++] = targetUniChar;
}
else
{
if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
/* Look up in the fallback table first */
targetUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
if (targetUniChar != missingUCharMarker)
{
myTarget[myTargetIndex++] = targetUniChar;
}
}
if (targetUniChar == missingUCharMarker)
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
_this->invalidCharLength = 1;
ToU_CALLBACK_MACRO(_this,
myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
}
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
return;
}
void T_UConverter_fromUnicode_SBCS (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
unsigned char targetChar = 0x00;
myFromUnicode = &_this->sharedData->table->sbcs.fromUnicode;
myFromUnicodeFallback = &_this->sharedData->table->sbcs.fromUnicodeFallback;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
{
targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]);
if (myTargetIndex < targetLength)
{
mySourceIndex++;
if (targetChar != 0 || !mySource[mySourceIndex - 1])
{
/*writes the char to the output stream */
myTarget[myTargetIndex++] = targetChar;
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasFromUnicodeFallback == TRUE))
{
/* Look up in the fallback table first */
targetChar = ucmp8_getu (myFromUnicodeFallback, mySource[mySourceIndex-1]);
if (targetChar != 0 || !mySource[mySourceIndex - 1])
{
/*writes the char to the output stream */
myTarget[myTargetIndex++] = targetChar;
}
}
if (targetChar == 0 && mySource[mySourceIndex-1] != 0)
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
_this->invalidUCharLength = 1;
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
FromU_CALLBACK_MACRO(_this,
(char *)myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err))
{
break;
}
_this->invalidUCharLength = 0;
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
return;
}
UChar32 T_UConverter_getNextUChar_SBCS(UConverter* converter,
const char** source,
const char* sourceLimit,
UErrorCode* err)
{
UChar myUChar;
if ((*source)+1 > sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
}
/*Gets the corresponding codepoint*/
myUChar = converter->sharedData->table->sbcs.toUnicode[(unsigned char)*((*source)++)];
if (myUChar != 0xFFFD) return myUChar;
else
{
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = *source;
/* Do the fallback stuff */
if ((converter->useFallback == TRUE)&&
(converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
myUChar = converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*((*source)-1)];
if (myUChar != 0xFFFD) return myUChar;
}
*err = U_INVALID_CHAR_FOUND;
/*Calls the ErrorFunctor after rewinding the input buffer*/
(*source)--;
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
converter->fromCharErrorBehaviour(converter,
&myUCharPtr,
myUCharPtr + 1,
&sourceFinal,
sourceLimit,
NULL,
TRUE,
err);
/*makes the internal caching transparent to the user*/
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
return myUChar;
}
}
static const UConverterImpl _SBCSImpl={
UCNV_SBCS,
_SBCSLoad,
_SBCSUnload,
NULL,
NULL,
NULL,
T_UConverter_toUnicode_SBCS,
NULL,
T_UConverter_fromUnicode_SBCS,
NULL,
T_UConverter_getNextUChar_SBCS,
NULL
};
/* Static data is in tools/makeconv/ucnvstat.c for data-based
* converters. Be sure to update it as well.
*/
const UConverterSharedData _SBCSData={
sizeof(UConverterSharedData), 1,
NULL, NULL, NULL, FALSE, &_SBCSImpl,
0
};
/* DBCS --------------------------------------------------------------------- */
U_CFUNC void
_DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
const uint8_t *oldraw = raw;
ucmp16_initFromData(&sharedData->table->dbcs.toUnicode,&raw, pErrorCode);
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
oldraw = raw;
ucmp16_initFromData(&sharedData->table->dbcs.fromUnicode, &raw, pErrorCode);
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
{
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
ucmp16_initFromData(&sharedData->table->dbcs.fromUnicodeFallback, &raw, pErrorCode);
oldraw = raw;
}
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
{
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
ucmp16_initFromData(&sharedData->table->dbcs.toUnicodeFallback, &raw, pErrorCode);
}
}
U_CFUNC void
_DBCSUnload(UConverterSharedData *sharedData) {
ucmp16_close (&sharedData->table->dbcs.fromUnicode);
ucmp16_close (&sharedData->table->dbcs.toUnicode);
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
ucmp16_close (&sharedData->table->dbcs.fromUnicodeFallback);
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
ucmp16_close (&sharedData->table->dbcs.toUnicodeFallback);
uprv_free (sharedData->table);
}
void T_UConverter_toUnicode_DBCS (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
UErrorCode * err)
{
const char *mySource = ( char *) *source;
UChar *myTarget = *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
myToUnicode = &_this->sharedData->table->dbcs.toUnicode;
myToUnicodeFallback = &_this->sharedData->table->dbcs.toUnicodeFallback;
while (mySourceIndex < sourceLength)
{
if (myTargetIndex < targetLength)
{
/*gets the corresponding UniChar */
mySourceChar = (unsigned char) mySource[mySourceIndex++];
/*We have no internal state, we should */
if (_this->toUnicodeStatus == 0x00)
{
_this->toUnicodeStatus = (unsigned char) mySourceChar;
}
else
{
if (_this->toUnicodeStatus != 0x00)
{
mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | (mySourceChar & 0x00FF));
_this->toUnicodeStatus = 0x00;
}
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
/*writing the UniChar to the output stream */
if (targetUniChar != missingUCharMarker)
{
/*writes the UniChar to the output stream */
myTarget[myTargetIndex++] = targetUniChar;
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
if (targetUniChar != missingUCharMarker)
{
myTarget[myTargetIndex++] = targetUniChar;
}
}
if (targetUniChar == missingUCharMarker)
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
_this->invalidCharBuffer[1] = (char) mySourceChar;
_this->invalidCharLength = 2;
ToU_CALLBACK_MACRO(_this,
myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
}
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if ((flush == TRUE)
&& (mySourceIndex == sourceLength)
&& (_this->toUnicodeStatus != 0x00))
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
return;
}
void T_UConverter_fromUnicode_DBCS (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
myFromUnicode = &_this->sharedData->table->dbcs.fromUnicode;
myFromUnicodeFallback = &_this->sharedData->table->dbcs.fromUnicodeFallback;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
{
if (myTargetIndex < targetLength)
{
mySourceChar = (UChar) mySource[mySourceIndex++];
/*Gets the corresponding codepoint */
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
if (targetUniChar != missingCharMarker)
{
/*writes the char to the output stream */
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
if (myTargetIndex < targetLength)
{
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasFromUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar);
if (targetUniChar != missingCharMarker)
{
/*writes the char to the output stream */
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
if (myTargetIndex < targetLength)
{
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
if (targetUniChar == missingCharMarker)
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
_this->invalidUCharLength = 1;
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
FromU_CALLBACK_MACRO(_this,
(char *)myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidUCharLength = 0;
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
*target += myTargetIndex;
*source += mySourceIndex;;
return;
}
UChar32 T_UConverter_getNextUChar_DBCS(UConverter* converter,
const char** source,
const char* sourceLimit,
UErrorCode* err)
{
UChar myUChar;
/*Checks boundaries and set appropriate error codes*/
if ((*source)+2 > sourceLimit)
{
if ((*source) >= sourceLimit)
{
/*Either caller has reached the end of the byte stream*/
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
else if (((*source)+1) == sourceLimit)
{
/* a character was cut in half*/
*err = U_TRUNCATED_CHAR_FOUND;
}
return 0xFFFD;
}
/*Gets the corresponding codepoint*/
myUChar = ucmp16_getu((&converter->sharedData->table->dbcs.toUnicode),
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))));
/*update the input pointer*/
*source += 2;
if (myUChar != 0xFFFD) return myUChar;
else
{
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = *source;
/* rewinding the input buffer*/
(*source) -= 2;
/* Do the fallback stuff */
if ((converter->useFallback == TRUE) &&
(converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
myUChar = ucmp16_getu((&converter->sharedData->table->dbcs.toUnicodeFallback),
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)-1))));
if (myUChar != 0xFFFD)
{
*source += 2;
return myUChar;
}
}
*err = U_INVALID_CHAR_FOUND;
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
converter->fromCharErrorBehaviour(converter,
&myUCharPtr,
myUCharPtr + 1,
&sourceFinal,
sourceLimit,
NULL,
TRUE,
err);
/*makes the internal caching transparent to the user*/
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
return myUChar;
}
}
static const UConverterImpl _DBCSImpl={
UCNV_DBCS,
_DBCSLoad,
_DBCSUnload,
NULL,
NULL,
NULL,
T_UConverter_toUnicode_DBCS,
NULL,
T_UConverter_fromUnicode_DBCS,
NULL,
T_UConverter_getNextUChar_DBCS,
NULL
};
/* Static data is in tools/makeconv/ucnvstat.c for data-based
* converters. Be sure to update it as well.
*/
const UConverterSharedData _DBCSData={
sizeof(UConverterSharedData), 1,
NULL, NULL, NULL, FALSE, &_DBCSImpl,
0, /* tounicodestatus */
};