ICU-502 clean up 'unassigned' handling and callback calls
X-SVN-Rev: 1871
This commit is contained in:
parent
2c9d62de49
commit
7d721ba16c
@ -591,7 +591,7 @@ static UChar32 T_UConverter_getNextUChar_ISO_2022(UConverterToUnicodeArgs* args,
|
||||
if (args->sourceLimit < args->source)
|
||||
{
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
@ -621,7 +621,7 @@ static UChar32 T_UConverter_getNextUChar_ISO_2022(UConverterToUnicodeArgs* args,
|
||||
args->source++;
|
||||
}
|
||||
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
static const UConverterImpl _ISO2022Impl={
|
||||
@ -711,7 +711,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverterToUnicodeArgs *args,
|
||||
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (targetUniChar != missingUCharMarker)
|
||||
if (targetUniChar < 0xfffe)
|
||||
{
|
||||
/*writes the UniChar to the output stream */
|
||||
args->target[myTargetIndex++] = targetUniChar;
|
||||
@ -722,8 +722,19 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverterToUnicodeArgs *args,
|
||||
const char* saveSource = args->source;
|
||||
UChar* saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (targetUniChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
if (mySourceChar > 0xff)
|
||||
{
|
||||
args->converter->invalidCharLength = 2;
|
||||
@ -740,9 +751,9 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverterToUnicodeArgs *args,
|
||||
args->source += mySourceIndex;
|
||||
ToU_CALLBACK_MACRO(args->converter->toUContext,
|
||||
args,
|
||||
args->source,
|
||||
1,
|
||||
UCNV_UNASSIGNED,
|
||||
args->converter->invalidCharBuffer,
|
||||
args->converter->invalidCharLength,
|
||||
reason,
|
||||
err);
|
||||
|
||||
args->source = saveSource;
|
||||
@ -828,7 +839,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeAr
|
||||
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (targetUniChar != missingUCharMarker)
|
||||
if (targetUniChar < 0xfffe)
|
||||
{
|
||||
/*writes the UniChar to the output stream */
|
||||
{
|
||||
@ -846,8 +857,19 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeAr
|
||||
const char* saveSource = args->source;
|
||||
UChar* saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (targetUniChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
if (mySourceChar > 0xFF)
|
||||
{
|
||||
args->converter->invalidCharLength = 2;
|
||||
@ -869,7 +891,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeAr
|
||||
args,
|
||||
args->source,
|
||||
1,
|
||||
UCNV_UNASSIGNED,
|
||||
reason,
|
||||
err);
|
||||
|
||||
args->source = saveSource;
|
||||
@ -1160,24 +1182,25 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverterToUnicodeArgs* args,
|
||||
/*safe keeps a ptr to the beginning in case we need to step back*/
|
||||
|
||||
/*Input boundary check*/
|
||||
if (args->source+1 > args->sourceLimit)
|
||||
if (args->source >= args->sourceLimit)
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/*Checks to see if with have SI/SO shifters
|
||||
if we do we change the mode appropriately and we consume the byte*/
|
||||
if ((*(args->source) == UCNV_SI) || (*(args->source) == UCNV_SO))
|
||||
while ((*(args->source) == UCNV_SI) || (*(args->source) == UCNV_SO))
|
||||
{
|
||||
args->converter->mode = *(args->source);
|
||||
args->source++;
|
||||
sourceInitial = args->source;
|
||||
|
||||
/*Rechecks boundary after consuming the shift sequence*/
|
||||
if (args->source+1 > args->sourceLimit)
|
||||
if (args->source >= args->sourceLimit)
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1195,7 +1218,7 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverterToUnicodeArgs* args,
|
||||
if ((args->source + 2) > args->sourceLimit)
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
myUChar = ucmp16_getu( (&(args->converter->sharedData->table->dbcs.toUnicode)),
|
||||
@ -1204,28 +1227,34 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverterToUnicodeArgs* args,
|
||||
args->source += 2;
|
||||
}
|
||||
|
||||
if (myUChar != 0xFFFD) return myUChar;
|
||||
if (myUChar < 0xfffe) return myUChar;
|
||||
else
|
||||
{
|
||||
/*rewinds source*/
|
||||
/* HSYS: Check logic here */
|
||||
const char* sourceFinal = args->source;
|
||||
UChar* myUCharPtr = &myUChar;
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
args->source = sourceInitial;
|
||||
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (myUChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
/*It's is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
args->target = myUCharPtr;
|
||||
args->targetLimit = myUCharPtr + 1;
|
||||
args->source = sourceFinal;
|
||||
|
||||
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
|
||||
args,
|
||||
sourceFinal,
|
||||
1,
|
||||
UCNV_UNASSIGNED,
|
||||
sourceInitial,
|
||||
args->source - sourceInitial,
|
||||
reason,
|
||||
err);
|
||||
|
||||
/*makes the internal caching transparent to the user*/
|
||||
|
@ -106,7 +106,7 @@ isCnvAcceptable(void *context,
|
||||
pInfo->dataFormat[1]==0x6e &&
|
||||
pInfo->dataFormat[2]==0x76 &&
|
||||
pInfo->dataFormat[3]==0x74 &&
|
||||
(pInfo->formatVersion[0]==4 || pInfo->formatVersion[0]==5);
|
||||
pInfo->formatVersion[0]==5;
|
||||
}
|
||||
|
||||
#define DATA_TYPE "cnv"
|
||||
@ -418,7 +418,7 @@ UConverter *
|
||||
|
||||
UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status)
|
||||
{
|
||||
UDataInfo info;
|
||||
/* UDataInfo info; -- necessary only if some converters have different formatVersion */
|
||||
const uint8_t *raw = (const uint8_t *)udata_getMemory(pData);
|
||||
const UConverterStaticData *source = (const UConverterStaticData *) raw;
|
||||
UConverterSharedData *data;
|
||||
@ -435,6 +435,8 @@ UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *s
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* necessary only if some converters have different formatVersion; now everything is at version 5 */
|
||||
/* test for the format version: MBCS is at version 5, the rest still at 4 */
|
||||
info.size=sizeof(UDataInfo);
|
||||
udata_getInfo(pData, &info);
|
||||
@ -442,6 +444,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *s
|
||||
*status = U_INVALID_TABLE_FORMAT;
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData));
|
||||
if(data == NULL) {
|
||||
|
@ -54,8 +54,16 @@ union UConverterTable
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/* this is used in fromUnicode DBCS tables as an "unassigned" marker */
|
||||
#define missingCharMarker 0xFFFF
|
||||
#define missingUCharMarker 0xFFFD
|
||||
|
||||
/*
|
||||
* #define missingUCharMarker 0xfffe
|
||||
*
|
||||
* there are actually two values used in toUnicode tables:
|
||||
* U+fffe "unassigned"
|
||||
* U+ffff "illegal"
|
||||
*/
|
||||
|
||||
#define FromU_CALLBACK_MACRO(context, args, codeUnits, length, codePoint, reason, err) \
|
||||
if (args->converter->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
|
||||
|
@ -967,7 +967,7 @@ GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode
|
||||
if (args->source+index > args->sourceLimit){\
|
||||
*err = U_TRUNCATED_CHAR_FOUND;\
|
||||
args->source = saveSource;\
|
||||
return missingUCharMarker;}
|
||||
return 0xffff;}
|
||||
|
||||
|
||||
/* Return the Unicode representation for the current LMBCS character
|
||||
@ -990,7 +990,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
|
||||
if (args->source >= args->sourceLimit)
|
||||
{
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return missingUCharMarker;
|
||||
return 0xffff;
|
||||
}
|
||||
/* Grab first byte & save address for error recovery */
|
||||
CurByte = *((ulmbcs_byte_t *) (saveSource = args->source++));
|
||||
@ -1133,7 +1133,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (((uint32_t)uniChar - 0xfffd) <= 2) /* 0xfffd<=uniChar<=0xffff, was: uniChar == missingUCharMarker */
|
||||
if (((uint32_t)uniChar - 0xfffe) <= 1) /* 0xfffe<=uniChar<=0xffff */
|
||||
{
|
||||
/*It is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
@ -1141,10 +1141,21 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
|
||||
/* This code needs updating when new error callbacks are installed */
|
||||
|
||||
UChar * pUniChar = (UChar *)&uniChar;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (uniChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
args->target = pUniChar;
|
||||
args->targetLimit = pUniChar + 1;
|
||||
args->source = saveSource;
|
||||
args->flush = TRUE;
|
||||
args->offsets = NULL;
|
||||
args->size = sizeof(args);
|
||||
@ -1152,7 +1163,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
|
||||
args,
|
||||
saveSource,
|
||||
args->sourceLimit - saveSource,
|
||||
UCNV_UNASSIGNED,
|
||||
reason,
|
||||
err);
|
||||
args->source = saveSource;
|
||||
}
|
||||
@ -1237,7 +1248,7 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
|
||||
}
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
if (uniChar != missingUCharMarker)
|
||||
if (uniChar < 0xfffe)
|
||||
{
|
||||
*(args->target)++ = uniChar;
|
||||
if(args->offsets)
|
||||
@ -1245,10 +1256,14 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
|
||||
*(args->offsets)++ = saveSource - pStartLMBCS;
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (uniChar == 0xfffe)
|
||||
{
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else /* if (uniChar == 0xffff) */
|
||||
{
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* if target ran out before source, return U_INDEX_OUTOFBOUNDS_ERROR */
|
||||
|
@ -178,7 +178,7 @@ void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
|
||||
args,
|
||||
args->converter->invalidCharBuffer,
|
||||
args->converter->invalidCharLength,
|
||||
UCNV_UNASSIGNED,
|
||||
UCNV_ILLEGAL,
|
||||
err);
|
||||
|
||||
args->source = saveSource;
|
||||
@ -569,7 +569,7 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
|
||||
if (args->source >= args->sourceLimit)
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
myByte = (uint8_t)*(args->source++);
|
||||
@ -587,7 +587,7 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
|
||||
if ((args->source + extraBytesToWrite - 1) > args->sourceLimit)
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -635,24 +635,20 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
|
||||
|
||||
CALL_ERROR_FUNCTION:
|
||||
{
|
||||
/*rewinds source*/
|
||||
const char* sourceFinal = args->source;
|
||||
UChar myUChar = (UChar)ch; /* ### TODO: this is a hack until we prepare the callbacks for code points */
|
||||
UChar* myUCharPtr = &myUChar;
|
||||
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
args->source = sourceInitial;
|
||||
|
||||
/*It is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
args->target = myUCharPtr;
|
||||
args->targetLimit = myUCharPtr + 1;
|
||||
args->source = sourceFinal;
|
||||
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
|
||||
args,
|
||||
sourceFinal,
|
||||
args->sourceLimit-sourceFinal,
|
||||
UCNV_UNASSIGNED,
|
||||
sourceInitial,
|
||||
args->source-sourceInitial,
|
||||
UCNV_ILLEGAL,
|
||||
err);
|
||||
|
||||
|
||||
@ -820,7 +816,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverterToUnicodeArgs* args,
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
|
||||
@ -834,7 +830,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverterToUnicodeArgs* args,
|
||||
|
||||
if (args->source+2 > args->sourceLimit) {
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/* get the second surrogate and assemble the code point */
|
||||
@ -1009,7 +1005,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverterToUnicodeArgs* args,
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
|
||||
@ -1023,7 +1019,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverterToUnicodeArgs* args,
|
||||
|
||||
if (args->source+2 > args->sourceLimit) {
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/* get the second surrogate and assemble the code point */
|
||||
|
@ -128,7 +128,7 @@ static void T_UConverter_fromUnicode_LATIN_1 (UConverterFromUnicodeArgs * args
|
||||
args->converter->invalidUCharLength,
|
||||
(UChar32) (args->converter->invalidUCharLength == 2 ?
|
||||
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
|
||||
args->converter->invalidUCharBuffer[2])
|
||||
args->converter->invalidUCharBuffer[1])
|
||||
: args->converter->invalidUCharBuffer[0]),
|
||||
reason,
|
||||
err);
|
||||
@ -168,7 +168,7 @@ static UChar32 T_UConverter_getNextUChar_LATIN_1(UConverterToUnicodeArgs* args,
|
||||
if (args->source+1 > args->sourceLimit)
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/* make sure that we zero-extend, not sign-extend, the byte */
|
||||
|
@ -82,7 +82,7 @@ void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
|
||||
/*gets the corresponding UniChar */
|
||||
targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]];
|
||||
|
||||
if (targetUniChar != missingUCharMarker)
|
||||
if (targetUniChar < 0xfffe)
|
||||
{
|
||||
/* writes the UniChar to the output stream */
|
||||
myTarget[myTargetIndex++] = targetUniChar;
|
||||
@ -93,19 +93,30 @@ void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
|
||||
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
||||
{
|
||||
/* Look up in the fallback table first */
|
||||
targetUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
|
||||
if (targetUniChar != missingUCharMarker)
|
||||
UChar fallbackUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
|
||||
if (fallbackUniChar < 0xfffe)
|
||||
{
|
||||
myTarget[myTargetIndex++] = targetUniChar;
|
||||
myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
|
||||
}
|
||||
}
|
||||
if (targetUniChar == missingUCharMarker)
|
||||
if (targetUniChar >= 0xfffe)
|
||||
{
|
||||
const char *saveSource = args->source;
|
||||
UChar *saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (targetUniChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
args->converter->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
|
||||
args->converter->invalidCharLength = 1;
|
||||
|
||||
@ -117,7 +128,7 @@ void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
|
||||
args,
|
||||
args->converter->invalidCharBuffer,
|
||||
args->converter->invalidCharLength,
|
||||
UCNV_UNASSIGNED,
|
||||
reason,
|
||||
err);
|
||||
/* Hsys: calculate the source and target advancement */
|
||||
args->source = saveSource;
|
||||
@ -238,7 +249,7 @@ void T_UConverter_fromUnicode_SBCS (UConverterFromUnicodeArgs * args,
|
||||
args->converter->invalidUCharLength,
|
||||
(UChar32) (args->converter->invalidUCharLength == 2 ?
|
||||
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
|
||||
args->converter->invalidUCharBuffer[2])
|
||||
args->converter->invalidUCharBuffer[1])
|
||||
: args->converter->invalidUCharBuffer[0]),
|
||||
reason,
|
||||
err);
|
||||
@ -273,45 +284,52 @@ UChar32 T_UConverter_getNextUChar_SBCS(UConverterToUnicodeArgs* args,
|
||||
{
|
||||
UChar myUChar;
|
||||
|
||||
if (U_FAILURE(*err)) return 0xFFFD;
|
||||
if (U_FAILURE(*err)) return 0xffff;
|
||||
|
||||
if (args->source+1 > args->sourceLimit)
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/*Gets the corresponding codepoint*/
|
||||
myUChar = args->converter->sharedData->table->sbcs.toUnicode[(unsigned char)*(args->source++)];
|
||||
|
||||
if (myUChar != 0xFFFD) return myUChar;
|
||||
if (myUChar < 0xfffe) return myUChar;
|
||||
else
|
||||
{
|
||||
UChar* myUCharPtr = &myUChar;
|
||||
const char* sourceFinal = args->source;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
/* Do the fallback stuff */
|
||||
if ((args->converter->useFallback == TRUE)&&
|
||||
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
||||
{
|
||||
myUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)];
|
||||
if (myUChar != 0xFFFD) return myUChar;
|
||||
UChar fallbackUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)];
|
||||
if (fallbackUChar < 0xfffe) return fallbackUChar;
|
||||
}
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
|
||||
/*Calls the ErrorFunctor after rewinding the input buffer*/
|
||||
args->source--;
|
||||
if (myUChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
/*Calls the ErrorFunctor */
|
||||
/*It's is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
args->target = myUCharPtr;
|
||||
args->targetLimit = myUCharPtr + 1;
|
||||
args->source = sourceFinal;
|
||||
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
|
||||
args,
|
||||
sourceFinal,
|
||||
args->source - 1,
|
||||
1,
|
||||
UCNV_UNASSIGNED,
|
||||
reason,
|
||||
err);
|
||||
|
||||
/*makes the internal caching transparent to the user*/
|
||||
@ -429,7 +447,7 @@ void T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
|
||||
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (targetUniChar != missingUCharMarker)
|
||||
if (targetUniChar < 0xfffe)
|
||||
{
|
||||
/*writes the UniChar to the output stream */
|
||||
myTarget[myTargetIndex++] = targetUniChar;
|
||||
@ -437,19 +455,30 @@ void T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
|
||||
else if ((args->converter->useFallback == TRUE) &&
|
||||
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
||||
{
|
||||
targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
|
||||
if (targetUniChar != missingUCharMarker)
|
||||
UChar fallbackUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
|
||||
if (fallbackUniChar < 0xfffe)
|
||||
{
|
||||
myTarget[myTargetIndex++] = targetUniChar;
|
||||
myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
|
||||
}
|
||||
}
|
||||
if (targetUniChar == missingUCharMarker)
|
||||
if (targetUniChar >= 0xfffe)
|
||||
{
|
||||
const char *saveSource = args->source;
|
||||
UChar *saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (targetUniChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
||||
args->converter->invalidCharBuffer[1] = (char) mySourceChar;
|
||||
args->converter->invalidCharLength = 2;
|
||||
@ -462,7 +491,7 @@ void T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
|
||||
args,
|
||||
args->converter->invalidCharBuffer,
|
||||
args->converter->invalidCharLength,
|
||||
UCNV_UNASSIGNED,
|
||||
reason,
|
||||
err);
|
||||
/* Hsys: calculate the source and target advancement */
|
||||
args->source = saveSource;
|
||||
@ -616,7 +645,7 @@ void T_UConverter_fromUnicode_DBCS (UConverterFromUnicodeArgs * args,
|
||||
args->converter->invalidUCharLength,
|
||||
(UChar32) (args->converter->invalidUCharLength == 2 ?
|
||||
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
|
||||
args->converter->invalidUCharBuffer[2])
|
||||
args->converter->invalidUCharBuffer[1])
|
||||
: args->converter->invalidUCharBuffer[0]),
|
||||
reason,
|
||||
err);
|
||||
@ -650,7 +679,7 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
|
||||
{
|
||||
UChar myUChar;
|
||||
|
||||
if (U_FAILURE(*err)) return 0xFFFD;
|
||||
if (U_FAILURE(*err)) return 0xffff;
|
||||
/*Checks boundaries and set appropriate error codes*/
|
||||
if (args->source+2 > args->sourceLimit)
|
||||
{
|
||||
@ -665,7 +694,7 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
|
||||
return 0xFFFD;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/*Gets the corresponding codepoint*/
|
||||
@ -674,39 +703,45 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
|
||||
|
||||
/*update the input pointer*/
|
||||
args->source += 2;
|
||||
if (myUChar != 0xFFFD) return myUChar;
|
||||
if (myUChar < 0xfffe) return myUChar;
|
||||
else
|
||||
{
|
||||
UChar* myUCharPtr = &myUChar;
|
||||
const char* sourceFinal = args->source;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
/* rewinding the input buffer*/
|
||||
args->source -= 2;
|
||||
/* Do the fallback stuff */
|
||||
if ((args->converter->useFallback == TRUE) &&
|
||||
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
||||
{
|
||||
myUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback),
|
||||
UChar fallbackUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback),
|
||||
(uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source-1))));
|
||||
if (myUChar != 0xFFFD)
|
||||
if (fallbackUChar < 0xfffe)
|
||||
{
|
||||
args->source += 2;
|
||||
return myUChar;
|
||||
return fallbackUChar;
|
||||
}
|
||||
}
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
|
||||
if (myUChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
args->target = myUCharPtr;
|
||||
args->targetLimit = myUCharPtr + 1;
|
||||
args->source = sourceFinal;
|
||||
/*It's is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
|
||||
args,
|
||||
sourceFinal,
|
||||
args->source - 2,
|
||||
2,
|
||||
UCNV_UNASSIGNED,
|
||||
reason,
|
||||
err);
|
||||
/*makes the internal caching transparent to the user*/
|
||||
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
|
||||
|
@ -279,7 +279,7 @@ void TestToUnicodeErrorBehaviour()
|
||||
|
||||
}
|
||||
void TestGetNextErrorBehaviour(){
|
||||
/*Test for Illegal character*/
|
||||
/*Test for unassigned character*/
|
||||
static const char input1[]={ (char)0x70 };
|
||||
const char* source=(const char*)input1;
|
||||
UErrorCode err=U_ZERO_ERROR;
|
||||
@ -287,10 +287,11 @@ void TestGetNextErrorBehaviour(){
|
||||
UConverter *cnv=ucnv_open("ibm-1159", &err);
|
||||
if(U_FAILURE(err)) {
|
||||
log_err("Unable to open a SBCS(ibm-1159) converter: %s\n", u_errorName(err));
|
||||
return;
|
||||
}
|
||||
c=ucnv_getNextUChar(cnv, &source, source+sizeof(source), &err);
|
||||
if(err != U_INVALID_CHAR_FOUND && c!= 0xFFFD){
|
||||
log_err("FAIL: Expected: U_INVALID_CHAR_ERROR ----Got:%s\n Expected 0xFFFD Got %lx\n", myErrorName(err), c);
|
||||
if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
|
||||
log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n", myErrorName(err), c);
|
||||
}
|
||||
ucnv_close(cnv);
|
||||
|
||||
|
@ -1678,13 +1678,13 @@ TestLMBCS() {
|
||||
{
|
||||
log_err("Unexpected pointer move in 0 byte source request \n");
|
||||
}
|
||||
/*0 byte source request - GetNextUChar : error & value == FFFD */
|
||||
/*0 byte source request - GetNextUChar : error & value == fffe or ffff */
|
||||
uniChar = ucnv_getNextUChar(cnv, &pLIn, pLIn, &errorCode);
|
||||
if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
|
||||
{
|
||||
log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
|
||||
}
|
||||
if (uniChar != 0xFFFD) /* would like to use an exported define here */
|
||||
if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
|
||||
{
|
||||
log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
|
||||
}
|
||||
|
@ -193,7 +193,7 @@ static UDataInfo dataInfo={
|
||||
0,
|
||||
|
||||
0x63, 0x6e, 0x76, 0x74, /* dataFormat="cnvt" */
|
||||
4, 0, 0, 0, /* formatVersion -- the new MBCS format needs at least 5.0.0.0 */
|
||||
5, 0, 0, 0, /* formatVersion */
|
||||
1, 6, 0, 0 /* dataVersion */
|
||||
};
|
||||
|
||||
@ -203,7 +203,6 @@ void writeConverterData(UConverterSharedData *mySharedData,
|
||||
const char *cnvDir,
|
||||
UErrorCode *status)
|
||||
{
|
||||
UVersionInfo generalFormatVersion;
|
||||
UNewDataMemory *mem = NULL;
|
||||
uint32_t sz2;
|
||||
|
||||
@ -212,16 +211,7 @@ void writeConverterData(UConverterSharedData *mySharedData,
|
||||
return;
|
||||
}
|
||||
|
||||
uprv_memcpy(&generalFormatVersion, &dataInfo.formatVersion, sizeof(UVersionInfo));
|
||||
if(mySharedData->staticData->conversionType==UCNV_MBCS && dataInfo.formatVersion[0]<5) {
|
||||
/* adjust the formatVersion for MBCS if necessary */
|
||||
dataInfo.formatVersion[0]=5;
|
||||
dataInfo.formatVersion[1]=0;
|
||||
dataInfo.formatVersion[2]=0;
|
||||
dataInfo.formatVersion[3]=0;
|
||||
}
|
||||
mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
|
||||
uprv_memcpy(&dataInfo.formatVersion, &generalFormatVersion, sizeof(UVersionInfo));
|
||||
|
||||
if(U_FAILURE(*status))
|
||||
{
|
||||
@ -639,17 +629,15 @@ UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticDat
|
||||
char storageLine[UCNV_MAX_LINE_TEXT];
|
||||
char* line = NULL;
|
||||
UConverterTable* myUConverterTable = NULL;
|
||||
UChar unicodeValue = 0xFFFF;
|
||||
UChar unicodeValue = 0xfffe;
|
||||
int32_t sbcsCodepageValue = 0, fallback = 0;
|
||||
UBool seenFallback = FALSE;
|
||||
char codepointBytes[5];
|
||||
unsigned char replacementChar = '\0';
|
||||
int32_t i = 0;
|
||||
CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
|
||||
|
||||
|
||||
if (U_FAILURE(*err)) return NULL;
|
||||
replacementChar = myConverter->subChar[0];
|
||||
myUConverterTable = (UConverterTable*)uprv_malloc(sizeof(UConverterSBCSTable));
|
||||
|
||||
if (myUConverterTable == NULL)
|
||||
@ -725,8 +713,8 @@ UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticDat
|
||||
seenFallback = FALSE;
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((myUConverterTable->sbcs.toUnicode[i] == 0xFFFF) &&
|
||||
(myUConverterTable->sbcs.toUnicodeFallback[i] != 0xFFFF))
|
||||
if ((myUConverterTable->sbcs.toUnicode[i] >= 0xfffe) &&
|
||||
(myUConverterTable->sbcs.toUnicodeFallback[i] < 0xfffe))
|
||||
|
||||
{
|
||||
seenFallback = TRUE;
|
||||
@ -835,7 +823,7 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
|
||||
char storageLine[UCNV_MAX_LINE_TEXT];
|
||||
char* line = NULL;
|
||||
UConverterTable* myUConverterTable = NULL;
|
||||
UChar unicodeValue = 0xFFFF;
|
||||
UChar unicodeValue = 0xfffe;
|
||||
int32_t mbcsCodepageValue = '\0';
|
||||
char codepointBytes[6];
|
||||
int32_t replacementChar = 0x0000, fallback = 0;
|
||||
@ -862,7 +850,7 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
|
||||
myFromUnicode = &myUConverterTable->dbcs.fromUnicode;
|
||||
ucmp16_init(myFromUnicode, (uint16_t)replacementChar);
|
||||
myToUnicode = &myUConverterTable->dbcs.toUnicode;
|
||||
ucmp16_init(myToUnicode, (int16_t)0xFFFD);
|
||||
ucmp16_init(myToUnicode, (int16_t)0xfffe);
|
||||
|
||||
myFromUnicodeFallback = &myUConverterTable->dbcs.fromUnicodeFallback;
|
||||
ucmp16_initBogus(myFromUnicodeFallback);
|
||||
@ -907,7 +895,7 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
|
||||
{
|
||||
myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE;
|
||||
ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar);
|
||||
ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD);
|
||||
ucmp16_init(myToUnicodeFallback, (uint16_t)0xfffe);
|
||||
}
|
||||
ucmp16_set(myToUnicodeFallback, (int16_t)mbcsCodepageValue, unicodeValue);
|
||||
ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)mbcsCodepageValue);
|
||||
@ -919,8 +907,8 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
|
||||
{
|
||||
for (i = 0; i < (uint32_t)ucmp16_getkUnicodeCount(); i++)
|
||||
{
|
||||
if ((ucmp16_getu(myToUnicode, i) == 0xFFFD) &&
|
||||
(ucmp16_getu(myToUnicodeFallback, i) != 0xFFFD))
|
||||
if ((ucmp16_getu(myToUnicode, i) >= 0xfffe) &&
|
||||
(ucmp16_getu(myToUnicodeFallback, i) < 0xfffe))
|
||||
{
|
||||
seenFallback = TRUE;
|
||||
break;
|
||||
@ -950,7 +938,7 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
|
||||
char storageLine[UCNV_MAX_LINE_TEXT];
|
||||
char* line = NULL;
|
||||
UConverterTable* myUConverterTable = NULL;
|
||||
UChar unicodeValue = 0xFFFD;
|
||||
UChar unicodeValue = 0xfffe;
|
||||
int32_t dbcsCodepageValue = '\0';
|
||||
char codepointBytes[6];
|
||||
int32_t replacementChar = 0x0000, fallback = 0;
|
||||
@ -976,7 +964,7 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
|
||||
myFromUnicode = &(myUConverterTable->dbcs.fromUnicode);
|
||||
ucmp16_init(myFromUnicode, (int16_t)replacementChar);
|
||||
myToUnicode = &(myUConverterTable->dbcs.toUnicode);
|
||||
ucmp16_init(myToUnicode, (int16_t)0xFFFD);
|
||||
ucmp16_init(myToUnicode, (int16_t)0xfffe);
|
||||
|
||||
myFromUnicodeFallback = &(myUConverterTable->dbcs.fromUnicodeFallback);
|
||||
ucmp16_initBogus(myFromUnicodeFallback);
|
||||
@ -1020,7 +1008,7 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
|
||||
{
|
||||
myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE;
|
||||
ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar);
|
||||
ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD);
|
||||
ucmp16_init(myToUnicodeFallback, (uint16_t)0xfffe);
|
||||
}
|
||||
ucmp16_set(myToUnicodeFallback, (int16_t)dbcsCodepageValue, unicodeValue);
|
||||
ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)dbcsCodepageValue);
|
||||
@ -1031,8 +1019,8 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
|
||||
{
|
||||
for (i = 0; i < (uint32_t)ucmp16_getkUnicodeCount(); i++)
|
||||
{
|
||||
if ((ucmp16_getu(myToUnicode, i) == 0xFFFD) &&
|
||||
(ucmp16_getu(myToUnicodeFallback, i) != 0xFFFD))
|
||||
if ((ucmp16_getu(myToUnicode, i) >= 0xfffe) &&
|
||||
(ucmp16_getu(myToUnicodeFallback, i) < 0xfffe))
|
||||
{
|
||||
seenFallback = TRUE;
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user