250331389c
X-SVN-Rev: 2230
780 lines
28 KiB
C
780 lines
28 KiB
C
/*
|
|
**********************************************************************
|
|
* Copyright (C) 2000, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
**********************************************************************
|
|
* file name: ucnvsbcs.cpp
|
|
* encoding: US-ASCII
|
|
* tab size: 8 (not used)
|
|
* indentation:4
|
|
*
|
|
* created on: 2000feb03
|
|
* created by: Markus W. Scherer
|
|
*
|
|
* Change history:
|
|
*
|
|
* 05/09/00 helena Added implementation to handle fallback mappings.
|
|
* 06/20/2000 helena OS/400 port changes; mostly typecast.
|
|
* 06/29/2000 helena Major rewrite of the callback APIs.
|
|
*/
|
|
|
|
#include "unicode/utypes.h"
|
|
#include "cmemory.h"
|
|
#include "ucmp16.h"
|
|
#include "ucmp8.h"
|
|
#include "unicode/ucnv_err.h"
|
|
#include "ucnv_bld.h"
|
|
#include "unicode/ucnv.h"
|
|
#include "ucnv_cnv.h"
|
|
|
|
/* SBCS --------------------------------------------------------------------- */
|
|
|
|
static void
|
|
_SBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
|
|
const uint8_t *oldraw = raw;
|
|
sharedData->table->sbcs.toUnicode = (UChar *)raw;
|
|
raw += sizeof(uint16_t)*256; oldraw = raw;
|
|
ucmp8_initFromData(&sharedData->table->sbcs.fromUnicode, &raw, pErrorCode);
|
|
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
|
|
{
|
|
if(((raw-oldraw)&3)!=0) {
|
|
raw+=4-((raw-oldraw)&3); /* pad to 4 */
|
|
}
|
|
ucmp8_initFromData(&sharedData->table->sbcs.fromUnicodeFallback, &raw, pErrorCode);
|
|
}
|
|
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
|
|
{
|
|
if(((raw-oldraw)&3)!=0) {
|
|
raw+=4-((raw-oldraw)&3); /* pad to 4 */
|
|
}
|
|
sharedData->table->sbcs.toUnicodeFallback = (UChar *)raw;
|
|
}
|
|
}
|
|
|
|
static void
|
|
_SBCSUnload(UConverterSharedData *sharedData) {
|
|
ucmp8_close (&sharedData->table->sbcs.fromUnicode);
|
|
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
|
|
ucmp8_close (&sharedData->table->sbcs.fromUnicodeFallback);
|
|
}
|
|
|
|
U_CFUNC void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
|
|
UErrorCode * err)
|
|
{
|
|
char *mySource = (char *) args->source;
|
|
UChar *myTarget = args->target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = args->targetLimit - myTarget;
|
|
int32_t sourceLength = args->sourceLimit - (char *) mySource;
|
|
UChar *myToUnicode = NULL, *myToUnicodeFallback = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
|
|
myToUnicode = args->converter->sharedData->table->sbcs.toUnicode;
|
|
myToUnicodeFallback = args->converter->sharedData->table->sbcs.toUnicodeFallback;
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
|
|
/*writing the UniChar to the output stream */
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
/*gets the corresponding UniChar */
|
|
targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]];
|
|
|
|
if (targetUniChar < 0xfffe)
|
|
{
|
|
/* writes the UniChar to the output stream */
|
|
myTarget[myTargetIndex++] = targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
if ((args->converter->useFallback == TRUE) &&
|
|
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
|
{
|
|
/* Look up in the fallback table first */
|
|
UChar fallbackUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
|
|
if (fallbackUniChar < 0xfffe)
|
|
{
|
|
myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
|
|
}
|
|
}
|
|
if (targetUniChar >= 0xfffe)
|
|
{
|
|
const char *saveSource = args->source;
|
|
UChar *saveTarget = args->target;
|
|
int32_t *saveOffsets = args->offsets;
|
|
UConverterCallbackReason reason;
|
|
|
|
if (targetUniChar == 0xfffe)
|
|
{
|
|
reason = UCNV_UNASSIGNED;
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
}
|
|
else
|
|
{
|
|
reason = UCNV_ILLEGAL;
|
|
*err = U_ILLEGAL_CHAR_FOUND;
|
|
}
|
|
|
|
args->converter->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
|
|
args->converter->invalidCharLength = 1;
|
|
|
|
args->target = myTarget + myTargetIndex;
|
|
args->source = mySource + mySourceIndex;
|
|
|
|
/* to do hsys: add more smarts to the codeUnits and length later */
|
|
ToU_CALLBACK_MACRO(args->converter->toUContext,
|
|
args,
|
|
args->converter->invalidCharBuffer,
|
|
args->converter->invalidCharLength,
|
|
reason,
|
|
err);
|
|
/* Hsys: calculate the source and target advancement */
|
|
args->source = saveSource;
|
|
args->target = saveTarget;
|
|
args->offsets = saveOffsets;
|
|
if (U_FAILURE (*err)) break;
|
|
args->converter->invalidCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_BUFFER_OVERFLOW_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
args->target += myTargetIndex;
|
|
args->source += mySourceIndex;
|
|
|
|
return;
|
|
}
|
|
|
|
U_CFUNC void T_UConverter_fromUnicode_SBCS (UConverterFromUnicodeArgs * args,
|
|
UErrorCode * err)
|
|
{
|
|
const UChar *mySource = args->source;
|
|
unsigned char *myTarget = (unsigned char *) args->target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = args->targetLimit - (char *) myTarget;
|
|
int32_t sourceLength = args->sourceLimit - mySource;
|
|
CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
|
|
unsigned char targetChar = 0x00;
|
|
UConverterCallbackReason reason;
|
|
|
|
myFromUnicode = &args->converter->sharedData->table->sbcs.fromUnicode;
|
|
myFromUnicodeFallback = &args->converter->sharedData->table->sbcs.fromUnicodeFallback;
|
|
/*writing the char to the output stream */
|
|
/* HSYS : to do : finish the combining of the surrogate characters later */
|
|
/*
|
|
if (args->converter->fromUSurrogateLead != 0 && UTF_IS_TRAIL(mySource[mySourceIndex]))
|
|
{
|
|
}
|
|
*/
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]);
|
|
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
mySourceIndex++;
|
|
if (targetChar != 0 || !mySource[mySourceIndex - 1])
|
|
{
|
|
/*writes the char to the output stream */
|
|
myTarget[myTargetIndex++] = targetChar;
|
|
}
|
|
else if ((args->converter->useFallback == TRUE) &&
|
|
(args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE))
|
|
{
|
|
/* Look up in the fallback table first */
|
|
targetChar = ucmp8_getu (myFromUnicodeFallback, mySource[mySourceIndex-1]);
|
|
if (targetChar != 0 || !mySource[mySourceIndex - 1])
|
|
{
|
|
/*writes the char to the output stream */
|
|
myTarget[myTargetIndex++] = targetChar;
|
|
}
|
|
}
|
|
if (targetChar == 0 && mySource[mySourceIndex-1] != 0)
|
|
{
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
reason = UCNV_UNASSIGNED;
|
|
|
|
args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
|
|
args->converter->invalidUCharLength = 1;
|
|
if (UTF_IS_LEAD(mySource[mySourceIndex-1]))
|
|
{
|
|
/*if (mySource < args->sourceLimit)*/
|
|
if(mySourceIndex < sourceLength)
|
|
{
|
|
if (UTF_IS_TRAIL(mySource[mySourceIndex]))
|
|
{
|
|
args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
|
|
args->converter->invalidUCharLength++;
|
|
mySourceIndex++;
|
|
}
|
|
else
|
|
{
|
|
reason = UCNV_ILLEGAL;
|
|
}
|
|
}
|
|
else if (args->flush == TRUE)
|
|
{
|
|
reason = UCNV_ILLEGAL;
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
}
|
|
else
|
|
{
|
|
args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
|
|
/* do not call the callback */
|
|
}
|
|
}
|
|
if (args->converter->fromUSurrogateLead == 0)
|
|
{
|
|
const UChar *saveSource = args->source;
|
|
char *saveTarget = args->target;
|
|
int32_t *saveOffsets = args->offsets;
|
|
args->target = (char *)myTarget+myTargetIndex;
|
|
args->source = mySource+mySourceIndex;
|
|
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
|
|
/* Check if we have encountered a surrogate pair. If first UChar is lead byte
|
|
and second UChar is trail byte, it's a surrogate char. If UChar is lead byte
|
|
but second UChar is not trail byte, it's illegal sequence. If neither, it's
|
|
plain unassigned code point.*/
|
|
FromU_CALLBACK_MACRO(args->converter->fromUContext,
|
|
args,
|
|
args->converter->invalidUCharBuffer,
|
|
args->converter->invalidUCharLength,
|
|
(UChar32) (args->converter->invalidUCharLength == 2 ?
|
|
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
|
|
args->converter->invalidUCharBuffer[1])
|
|
: args->converter->invalidUCharBuffer[0]),
|
|
reason,
|
|
err);
|
|
args->source = saveSource;
|
|
args->target = saveTarget;
|
|
args->offsets = saveOffsets;
|
|
if (U_FAILURE (*err))
|
|
{
|
|
break;
|
|
}
|
|
args->converter->invalidUCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_BUFFER_OVERFLOW_ERROR;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
args->target += myTargetIndex;
|
|
args->source += mySourceIndex;
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
U_CFUNC UChar32 T_UConverter_getNextUChar_SBCS(UConverterToUnicodeArgs* args,
|
|
UErrorCode* err)
|
|
{
|
|
UChar myUChar;
|
|
|
|
if (U_FAILURE(*err)) return 0xffff;
|
|
|
|
if (args->source+1 > args->sourceLimit)
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return 0xffff;
|
|
}
|
|
|
|
/*Gets the corresponding codepoint*/
|
|
myUChar = args->converter->sharedData->table->sbcs.toUnicode[(unsigned char)*(args->source++)];
|
|
|
|
if (myUChar < 0xfffe) return myUChar;
|
|
else
|
|
{
|
|
UChar* myUCharPtr = &myUChar;
|
|
UConverterCallbackReason reason;
|
|
|
|
/* Do the fallback stuff */
|
|
if ((args->converter->useFallback == TRUE)&&
|
|
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
|
{
|
|
UChar fallbackUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)];
|
|
if (fallbackUChar < 0xfffe) return fallbackUChar;
|
|
}
|
|
|
|
if (myUChar == 0xfffe)
|
|
{
|
|
reason = UCNV_UNASSIGNED;
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
}
|
|
else
|
|
{
|
|
reason = UCNV_ILLEGAL;
|
|
*err = U_ILLEGAL_CHAR_FOUND;
|
|
}
|
|
|
|
/*Calls the ErrorFunctor */
|
|
/*It's is very likely that the ErrorFunctor will write to the
|
|
*internal buffers */
|
|
args->target = myUCharPtr;
|
|
args->targetLimit = myUCharPtr + 1;
|
|
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
|
|
args,
|
|
args->source - 1,
|
|
1,
|
|
reason,
|
|
err);
|
|
|
|
/*makes the internal caching transparent to the user*/
|
|
if (*err == U_BUFFER_OVERFLOW_ERROR) *err = U_ZERO_ERROR;
|
|
|
|
return myUChar;
|
|
}
|
|
}
|
|
|
|
static const UConverterImpl _SBCSImpl={
|
|
UCNV_SBCS,
|
|
|
|
_SBCSLoad,
|
|
_SBCSUnload,
|
|
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
|
|
T_UConverter_toUnicode_SBCS,
|
|
NULL,
|
|
T_UConverter_fromUnicode_SBCS,
|
|
NULL,
|
|
T_UConverter_getNextUChar_SBCS,
|
|
|
|
NULL
|
|
};
|
|
|
|
|
|
/* Static data is in tools/makeconv/ucnvstat.c for data-based
|
|
* converters. Be sure to update it as well.
|
|
*/
|
|
|
|
const UConverterSharedData _SBCSData={
|
|
sizeof(UConverterSharedData), 1,
|
|
NULL, NULL, NULL, FALSE, &_SBCSImpl,
|
|
0
|
|
};
|
|
|
|
/* DBCS --------------------------------------------------------------------- */
|
|
|
|
U_CFUNC void
|
|
_DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
|
|
const uint8_t *oldraw = raw;
|
|
ucmp16_initFromData(&sharedData->table->dbcs.toUnicode,&raw, pErrorCode);
|
|
if(((raw-oldraw)&3)!=0) {
|
|
raw+=4-((raw-oldraw)&3); /* pad to 4 */
|
|
}
|
|
oldraw = raw;
|
|
ucmp16_initFromData(&sharedData->table->dbcs.fromUnicode, &raw, pErrorCode);
|
|
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
|
|
{
|
|
if(((raw-oldraw)&3)!=0) {
|
|
raw+=4-((raw-oldraw)&3); /* pad to 4 */
|
|
}
|
|
ucmp16_initFromData(&sharedData->table->dbcs.fromUnicodeFallback, &raw, pErrorCode);
|
|
oldraw = raw;
|
|
}
|
|
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
|
|
{
|
|
if(((raw-oldraw)&3)!=0) {
|
|
raw+=4-((raw-oldraw)&3); /* pad to 4 */
|
|
}
|
|
ucmp16_initFromData(&sharedData->table->dbcs.toUnicodeFallback, &raw, pErrorCode);
|
|
}
|
|
}
|
|
|
|
U_CFUNC void
|
|
_DBCSUnload(UConverterSharedData *sharedData) {
|
|
ucmp16_close (&sharedData->table->dbcs.fromUnicode);
|
|
ucmp16_close (&sharedData->table->dbcs.toUnicode);
|
|
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
|
|
ucmp16_close (&sharedData->table->dbcs.fromUnicodeFallback);
|
|
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
|
|
ucmp16_close (&sharedData->table->dbcs.toUnicodeFallback);
|
|
}
|
|
|
|
U_CFUNC void T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
|
|
UErrorCode * err)
|
|
{
|
|
const char *mySource = ( char *) args->source;
|
|
UChar *myTarget = args->target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = args->targetLimit - myTarget;
|
|
int32_t sourceLength = args->sourceLimit - (char *) mySource;
|
|
CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
UChar mySourceChar = 0x0000;
|
|
|
|
myToUnicode = &args->converter->sharedData->table->dbcs.toUnicode;
|
|
myToUnicodeFallback = &args->converter->sharedData->table->dbcs.toUnicodeFallback;
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
/*gets the corresponding UniChar */
|
|
mySourceChar = (unsigned char) mySource[mySourceIndex++];
|
|
|
|
/*We have no internal state, we should */
|
|
if (args->converter->toUnicodeStatus == 0x00)
|
|
{
|
|
args->converter->toUnicodeStatus = (unsigned char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
if (args->converter->toUnicodeStatus != 0x00)
|
|
{
|
|
mySourceChar = (UChar) ((args->converter->toUnicodeStatus << 8) | (mySourceChar & 0x00FF));
|
|
args->converter->toUnicodeStatus = 0x00;
|
|
}
|
|
|
|
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
|
|
|
/*writing the UniChar to the output stream */
|
|
if (targetUniChar < 0xfffe)
|
|
{
|
|
/*writes the UniChar to the output stream */
|
|
myTarget[myTargetIndex++] = targetUniChar;
|
|
}
|
|
else if ((args->converter->useFallback == TRUE) &&
|
|
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
|
{
|
|
UChar fallbackUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
|
|
if (fallbackUniChar < 0xfffe)
|
|
{
|
|
myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
|
|
}
|
|
}
|
|
if (targetUniChar >= 0xfffe)
|
|
{
|
|
const char *saveSource = args->source;
|
|
UChar *saveTarget = args->target;
|
|
int32_t *saveOffsets = args->offsets;
|
|
UConverterCallbackReason reason;
|
|
|
|
if (targetUniChar == 0xfffe)
|
|
{
|
|
reason = UCNV_UNASSIGNED;
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
}
|
|
else
|
|
{
|
|
reason = UCNV_ILLEGAL;
|
|
*err = U_ILLEGAL_CHAR_FOUND;
|
|
}
|
|
|
|
args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
|
args->converter->invalidCharBuffer[1] = (char) mySourceChar;
|
|
args->converter->invalidCharLength = 2;
|
|
|
|
args->target = myTarget + myTargetIndex;
|
|
args->source = mySource + mySourceIndex;
|
|
|
|
/* to do hsys: add more smarts to the codeUnits and length later */
|
|
ToU_CALLBACK_MACRO(args->converter->toUContext,
|
|
args,
|
|
args->converter->invalidCharBuffer,
|
|
args->converter->invalidCharLength,
|
|
reason,
|
|
err);
|
|
/* Hsys: calculate the source and target advancement */
|
|
args->source = saveSource;
|
|
args->target = saveTarget;
|
|
args->offsets = saveOffsets;
|
|
if (U_FAILURE (*err)) break;
|
|
args->converter->invalidCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_BUFFER_OVERFLOW_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*If at the end of conversion we are still carrying state information
|
|
*flush is TRUE, we can deduce that the input stream is truncated
|
|
*/
|
|
if ((args->flush == TRUE)
|
|
&& (mySourceIndex == sourceLength)
|
|
&& (args->converter->toUnicodeStatus != 0x00))
|
|
{
|
|
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
args->converter->toUnicodeStatus = 0x00;
|
|
}
|
|
}
|
|
|
|
args->target += myTargetIndex;
|
|
args->source += mySourceIndex;
|
|
|
|
return;
|
|
}
|
|
|
|
U_CFUNC void T_UConverter_fromUnicode_DBCS (UConverterFromUnicodeArgs * args,
|
|
UErrorCode * err)
|
|
{
|
|
const UChar *mySource = args->source;
|
|
unsigned char *myTarget = (unsigned char *) args->target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = args->targetLimit - (char *) myTarget;
|
|
int32_t sourceLength = args->sourceLimit - mySource;
|
|
CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
UChar mySourceChar = 0x0000;
|
|
UConverterCallbackReason reason;
|
|
|
|
myFromUnicode = &args->converter->sharedData->table->dbcs.fromUnicode;
|
|
myFromUnicodeFallback = &args->converter->sharedData->table->dbcs.fromUnicodeFallback;
|
|
|
|
/*writing the char to the output stream */
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
mySourceChar = (UChar) mySource[mySourceIndex++];
|
|
|
|
/*Gets the corresponding codepoint */
|
|
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
|
if (targetUniChar != missingCharMarker)
|
|
{
|
|
/*writes the char to the output stream */
|
|
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
args->converter->charErrorBuffer[0] = (char) targetUniChar;
|
|
args->converter->charErrorBufferLength = 1;
|
|
*err = U_BUFFER_OVERFLOW_ERROR;
|
|
}
|
|
}
|
|
else if ((args->converter->useFallback == TRUE) &&
|
|
(args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE))
|
|
{
|
|
|
|
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar);
|
|
if (targetUniChar != missingCharMarker)
|
|
{
|
|
/*writes the char to the output stream */
|
|
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
args->converter->charErrorBuffer[0] = (char) targetUniChar;
|
|
args->converter->charErrorBufferLength = 1;
|
|
*err = U_BUFFER_OVERFLOW_ERROR;
|
|
}
|
|
}
|
|
}
|
|
if (targetUniChar == missingCharMarker)
|
|
{
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
reason = UCNV_UNASSIGNED;
|
|
|
|
args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
|
|
args->converter->invalidUCharLength = 1;
|
|
if (UTF_IS_LEAD(mySource[mySourceIndex-1]))
|
|
{
|
|
/*if (mySource < args->sourceLimit) */
|
|
if(mySourceIndex < sourceLength)
|
|
{
|
|
if (UTF_IS_TRAIL(mySource[mySourceIndex]))
|
|
{
|
|
args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
|
|
args->converter->invalidUCharLength++;
|
|
mySourceIndex++;
|
|
}
|
|
else
|
|
{
|
|
reason = UCNV_ILLEGAL;
|
|
}
|
|
}
|
|
else if (args->flush == TRUE)
|
|
{
|
|
reason = UCNV_ILLEGAL;
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
}
|
|
else
|
|
{
|
|
args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
|
|
/* do not call the callback */
|
|
}
|
|
}
|
|
if (args->converter->fromUSurrogateLead == 0)
|
|
{
|
|
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
|
|
/* Check if we have encountered a surrogate pair. If first UChar is lead byte
|
|
and second UChar is trail byte, it's a surrogate char. If UChar is lead byte
|
|
but second UChar is not trail byte, it's illegal sequence. If neither, it's
|
|
plain unassigned code point.*/
|
|
const UChar *saveSource = args->source;
|
|
char *saveTarget = args->target;
|
|
int32_t *saveOffsets = args->offsets;
|
|
args->target = (char*)myTarget + myTargetIndex;
|
|
args->source = mySource + mySourceIndex;
|
|
FromU_CALLBACK_MACRO(args->converter->fromUContext,
|
|
args,
|
|
args->converter->invalidUCharBuffer,
|
|
args->converter->invalidUCharLength,
|
|
(UChar32) (args->converter->invalidUCharLength == 2 ?
|
|
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
|
|
args->converter->invalidUCharBuffer[1])
|
|
: args->converter->invalidUCharBuffer[0]),
|
|
reason,
|
|
err);
|
|
args->source = saveSource;
|
|
args->target = saveTarget;
|
|
args->offsets = saveOffsets;
|
|
if (U_FAILURE (*err))
|
|
{
|
|
break;
|
|
}
|
|
args->converter->invalidUCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_BUFFER_OVERFLOW_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
args->target += myTargetIndex;
|
|
args->source += mySourceIndex;;
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
U_CFUNC UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
|
|
UErrorCode* err)
|
|
{
|
|
UChar myUChar;
|
|
|
|
if (U_FAILURE(*err)) return 0xffff;
|
|
/*Checks boundaries and set appropriate error codes*/
|
|
if (args->source+2 > args->sourceLimit)
|
|
{
|
|
if (args->source >= args->sourceLimit)
|
|
{
|
|
/*Either caller has reached the end of the byte stream*/
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
else if ((args->source+1) == args->sourceLimit)
|
|
{
|
|
/* a character was cut in half*/
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
}
|
|
|
|
return 0xffff;
|
|
}
|
|
|
|
/*Gets the corresponding codepoint*/
|
|
myUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicode),
|
|
(uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source+1))));
|
|
|
|
/*update the input pointer*/
|
|
args->source += 2;
|
|
if (myUChar < 0xfffe) return myUChar;
|
|
else
|
|
{
|
|
UChar* myUCharPtr = &myUChar;
|
|
UConverterCallbackReason reason;
|
|
|
|
/* Do the fallback stuff */
|
|
if ((args->converter->useFallback == TRUE) &&
|
|
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
|
{
|
|
UChar fallbackUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback),
|
|
(uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source-1))));
|
|
if (fallbackUChar < 0xfffe)
|
|
{
|
|
args->source += 2;
|
|
return fallbackUChar;
|
|
}
|
|
}
|
|
|
|
if (myUChar == 0xfffe)
|
|
{
|
|
reason = UCNV_UNASSIGNED;
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
}
|
|
else
|
|
{
|
|
reason = UCNV_ILLEGAL;
|
|
*err = U_ILLEGAL_CHAR_FOUND;
|
|
}
|
|
|
|
args->target = myUCharPtr;
|
|
args->targetLimit = myUCharPtr + 1;
|
|
/*It's is very likely that the ErrorFunctor will write to the
|
|
*internal buffers */
|
|
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
|
|
args,
|
|
args->source - 2,
|
|
2,
|
|
reason,
|
|
err);
|
|
/*makes the internal caching transparent to the user*/
|
|
if (*err == U_BUFFER_OVERFLOW_ERROR) *err = U_ZERO_ERROR;
|
|
|
|
return myUChar;
|
|
}
|
|
}
|
|
|
|
static const UConverterImpl _DBCSImpl={
|
|
UCNV_DBCS,
|
|
|
|
_DBCSLoad,
|
|
_DBCSUnload,
|
|
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
|
|
T_UConverter_toUnicode_DBCS,
|
|
NULL,
|
|
T_UConverter_fromUnicode_DBCS,
|
|
NULL,
|
|
T_UConverter_getNextUChar_DBCS,
|
|
|
|
NULL
|
|
};
|
|
|
|
|
|
/* Static data is in tools/makeconv/ucnvstat.c for data-based
|
|
* converters. Be sure to update it as well.
|
|
*/
|
|
|
|
const UConverterSharedData _DBCSData={
|
|
sizeof(UConverterSharedData), 1,
|
|
NULL, NULL, NULL, FALSE, &_DBCSImpl,
|
|
0, /* tounicodestatus */
|
|
};
|