ICU-502 clean up 'unassigned' handling and callback calls

X-SVN-Rev: 1871
This commit is contained in:
Markus Scherer 2000-07-13 23:55:33 +00:00
parent 2c9d62de49
commit 7d721ba16c
10 changed files with 204 additions and 129 deletions

View File

@ -591,7 +591,7 @@ static UChar32 T_UConverter_getNextUChar_ISO_2022(UConverterToUnicodeArgs* args,
if (args->sourceLimit < args->source)
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return 0xFFFD;
return 0xffff;
}
for (;;)
@ -621,7 +621,7 @@ static UChar32 T_UConverter_getNextUChar_ISO_2022(UConverterToUnicodeArgs* args,
args->source++;
}
return 0xFFFD;
return 0xffff;
}
static const UConverterImpl _ISO2022Impl={
@ -711,7 +711,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverterToUnicodeArgs *args,
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
/*writing the UniChar to the output stream */
if (targetUniChar != missingUCharMarker)
if (targetUniChar < 0xfffe)
{
/*writes the UniChar to the output stream */
args->target[myTargetIndex++] = targetUniChar;
@ -722,8 +722,19 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverterToUnicodeArgs *args,
const char* saveSource = args->source;
UChar* saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
UConverterCallbackReason reason;
if (targetUniChar == 0xfffe)
{
reason = UCNV_UNASSIGNED;
*err = U_INVALID_CHAR_FOUND;
}
else
{
reason = UCNV_ILLEGAL;
*err = U_ILLEGAL_CHAR_FOUND;
}
*err = U_INVALID_CHAR_FOUND;
if (mySourceChar > 0xff)
{
args->converter->invalidCharLength = 2;
@ -740,9 +751,9 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverterToUnicodeArgs *args,
args->source += mySourceIndex;
ToU_CALLBACK_MACRO(args->converter->toUContext,
args,
args->source,
1,
UCNV_UNASSIGNED,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
reason,
err);
args->source = saveSource;
@ -828,7 +839,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeAr
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
/*writing the UniChar to the output stream */
if (targetUniChar != missingUCharMarker)
if (targetUniChar < 0xfffe)
{
/*writes the UniChar to the output stream */
{
@ -846,8 +857,19 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeAr
const char* saveSource = args->source;
UChar* saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
*err = U_INVALID_CHAR_FOUND;
UConverterCallbackReason reason;
if (targetUniChar == 0xfffe)
{
reason = UCNV_UNASSIGNED;
*err = U_INVALID_CHAR_FOUND;
}
else
{
reason = UCNV_ILLEGAL;
*err = U_ILLEGAL_CHAR_FOUND;
}
if (mySourceChar > 0xFF)
{
args->converter->invalidCharLength = 2;
@ -869,7 +891,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeAr
args,
args->source,
1,
UCNV_UNASSIGNED,
reason,
err);
args->source = saveSource;
@ -1160,24 +1182,25 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverterToUnicodeArgs* args,
/*safe keeps a ptr to the beginning in case we need to step back*/
/*Input boundary check*/
if (args->source+1 > args->sourceLimit)
if (args->source >= args->sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
return 0xffff;
}
/*Checks to see if with have SI/SO shifters
if we do we change the mode appropriately and we consume the byte*/
if ((*(args->source) == UCNV_SI) || (*(args->source) == UCNV_SO))
while ((*(args->source) == UCNV_SI) || (*(args->source) == UCNV_SO))
{
args->converter->mode = *(args->source);
args->source++;
sourceInitial = args->source;
/*Rechecks boundary after consuming the shift sequence*/
if (args->source+1 > args->sourceLimit)
if (args->source >= args->sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
return 0xffff;
}
}
@ -1195,7 +1218,7 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverterToUnicodeArgs* args,
if ((args->source + 2) > args->sourceLimit)
{
*err = U_TRUNCATED_CHAR_FOUND;
return 0xFFFD;
return 0xffff;
}
myUChar = ucmp16_getu( (&(args->converter->sharedData->table->dbcs.toUnicode)),
@ -1204,28 +1227,34 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverterToUnicodeArgs* args,
args->source += 2;
}
if (myUChar != 0xFFFD) return myUChar;
if (myUChar < 0xfffe) return myUChar;
else
{
/*rewinds source*/
/* HSYS: Check logic here */
const char* sourceFinal = args->source;
UChar* myUCharPtr = &myUChar;
*err = U_INVALID_CHAR_FOUND;
args->source = sourceInitial;
UConverterCallbackReason reason;
if (myUChar == 0xfffe)
{
reason = UCNV_UNASSIGNED;
*err = U_INVALID_CHAR_FOUND;
}
else
{
reason = UCNV_ILLEGAL;
*err = U_ILLEGAL_CHAR_FOUND;
}
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
args->target = myUCharPtr;
args->targetLimit = myUCharPtr + 1;
args->source = sourceFinal;
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
args,
sourceFinal,
1,
UCNV_UNASSIGNED,
sourceInitial,
args->source - sourceInitial,
reason,
err);
/*makes the internal caching transparent to the user*/

View File

@ -106,7 +106,7 @@ isCnvAcceptable(void *context,
pInfo->dataFormat[1]==0x6e &&
pInfo->dataFormat[2]==0x76 &&
pInfo->dataFormat[3]==0x74 &&
(pInfo->formatVersion[0]==4 || pInfo->formatVersion[0]==5);
pInfo->formatVersion[0]==5;
}
#define DATA_TYPE "cnv"
@ -418,7 +418,7 @@ UConverter *
UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status)
{
UDataInfo info;
/* UDataInfo info; -- necessary only if some converters have different formatVersion */
const uint8_t *raw = (const uint8_t *)udata_getMemory(pData);
const UConverterStaticData *source = (const UConverterStaticData *) raw;
UConverterSharedData *data;
@ -435,6 +435,8 @@ UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *s
return NULL;
}
#if 0
/* necessary only if some converters have different formatVersion; now everything is at version 5 */
/* test for the format version: MBCS is at version 5, the rest still at 4 */
info.size=sizeof(UDataInfo);
udata_getInfo(pData, &info);
@ -442,6 +444,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *s
*status = U_INVALID_TABLE_FORMAT;
return NULL;
}
#endif
data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData));
if(data == NULL) {

View File

@ -54,8 +54,16 @@ union UConverterTable
U_CDECL_BEGIN
/* this is used in fromUnicode DBCS tables as an "unassigned" marker */
#define missingCharMarker 0xFFFF
#define missingUCharMarker 0xFFFD
/*
* #define missingUCharMarker 0xfffe
*
* there are actually two values used in toUnicode tables:
* U+fffe "unassigned"
* U+ffff "illegal"
*/
#define FromU_CALLBACK_MACRO(context, args, codeUnits, length, codePoint, reason, err) \
if (args->converter->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\

View File

@ -967,7 +967,7 @@ GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode
if (args->source+index > args->sourceLimit){\
*err = U_TRUNCATED_CHAR_FOUND;\
args->source = saveSource;\
return missingUCharMarker;}
return 0xffff;}
/* Return the Unicode representation for the current LMBCS character
@ -990,7 +990,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
if (args->source >= args->sourceLimit)
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return missingUCharMarker;
return 0xffff;
}
/* Grab first byte & save address for error recovery */
CurByte = *((ulmbcs_byte_t *) (saveSource = args->source++));
@ -1133,7 +1133,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
}
}
}
if (((uint32_t)uniChar - 0xfffd) <= 2) /* 0xfffd<=uniChar<=0xffff, was: uniChar == missingUCharMarker */
if (((uint32_t)uniChar - 0xfffe) <= 1) /* 0xfffe<=uniChar<=0xffff */
{
/*It is very likely that the ErrorFunctor will write to the
*internal buffers */
@ -1141,10 +1141,21 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
/* This code needs updating when new error callbacks are installed */
UChar * pUniChar = (UChar *)&uniChar;
*err = U_INVALID_CHAR_FOUND;
UConverterCallbackReason reason;
if (uniChar == 0xfffe)
{
reason = UCNV_UNASSIGNED;
*err = U_INVALID_CHAR_FOUND;
}
else
{
reason = UCNV_ILLEGAL;
*err = U_ILLEGAL_CHAR_FOUND;
}
args->target = pUniChar;
args->targetLimit = pUniChar + 1;
args->source = saveSource;
args->flush = TRUE;
args->offsets = NULL;
args->size = sizeof(args);
@ -1152,7 +1163,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
args,
saveSource,
args->sourceLimit - saveSource,
UCNV_UNASSIGNED,
reason,
err);
args->source = saveSource;
}
@ -1237,7 +1248,7 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
}
if (U_SUCCESS(*err))
{
if (uniChar != missingUCharMarker)
if (uniChar < 0xfffe)
{
*(args->target)++ = uniChar;
if(args->offsets)
@ -1245,10 +1256,14 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
*(args->offsets)++ = saveSource - pStartLMBCS;
}
}
else
else if (uniChar == 0xfffe)
{
*err = U_INVALID_CHAR_FOUND;
}
else /* if (uniChar == 0xffff) */
{
*err = U_ILLEGAL_CHAR_FOUND;
}
}
}
/* if target ran out before source, return U_INDEX_OUTOFBOUNDS_ERROR */

View File

@ -178,7 +178,7 @@ void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
args,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
UCNV_UNASSIGNED,
UCNV_ILLEGAL,
err);
args->source = saveSource;
@ -569,7 +569,7 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
if (args->source >= args->sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
return 0xffff;
}
myByte = (uint8_t)*(args->source++);
@ -587,7 +587,7 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
if ((args->source + extraBytesToWrite - 1) > args->sourceLimit)
{
*err = U_TRUNCATED_CHAR_FOUND;
return 0xFFFD;
return 0xffff;
}
else
{
@ -635,24 +635,20 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
CALL_ERROR_FUNCTION:
{
/*rewinds source*/
const char* sourceFinal = args->source;
UChar myUChar = (UChar)ch; /* ### TODO: this is a hack until we prepare the callbacks for code points */
UChar* myUCharPtr = &myUChar;
*err = U_ILLEGAL_CHAR_FOUND;
args->source = sourceInitial;
/*It is very likely that the ErrorFunctor will write to the
*internal buffers */
args->target = myUCharPtr;
args->targetLimit = myUCharPtr + 1;
args->source = sourceFinal;
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
args,
sourceFinal,
args->sourceLimit-sourceFinal,
UCNV_UNASSIGNED,
sourceInitial,
args->source-sourceInitial,
UCNV_ILLEGAL,
err);
@ -820,7 +816,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverterToUnicodeArgs* args,
*err = U_TRUNCATED_CHAR_FOUND;
}
return 0xFFFD;
return 0xffff;
}
@ -834,7 +830,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverterToUnicodeArgs* args,
if (args->source+2 > args->sourceLimit) {
*err = U_TRUNCATED_CHAR_FOUND;
return 0xFFFD;
return 0xffff;
}
/* get the second surrogate and assemble the code point */
@ -1009,7 +1005,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverterToUnicodeArgs* args,
*err = U_TRUNCATED_CHAR_FOUND;
}
return 0xFFFD;
return 0xffff;
}
@ -1023,7 +1019,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverterToUnicodeArgs* args,
if (args->source+2 > args->sourceLimit) {
*err = U_TRUNCATED_CHAR_FOUND;
return 0xFFFD;
return 0xffff;
}
/* get the second surrogate and assemble the code point */

View File

@ -128,7 +128,7 @@ static void T_UConverter_fromUnicode_LATIN_1 (UConverterFromUnicodeArgs * args
args->converter->invalidUCharLength,
(UChar32) (args->converter->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
args->converter->invalidUCharBuffer[2])
args->converter->invalidUCharBuffer[1])
: args->converter->invalidUCharBuffer[0]),
reason,
err);
@ -168,7 +168,7 @@ static UChar32 T_UConverter_getNextUChar_LATIN_1(UConverterToUnicodeArgs* args,
if (args->source+1 > args->sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
return 0xffff;
}
/* make sure that we zero-extend, not sign-extend, the byte */

View File

@ -82,7 +82,7 @@ void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
/*gets the corresponding UniChar */
targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]];
if (targetUniChar != missingUCharMarker)
if (targetUniChar < 0xfffe)
{
/* writes the UniChar to the output stream */
myTarget[myTargetIndex++] = targetUniChar;
@ -93,19 +93,30 @@ void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
/* Look up in the fallback table first */
targetUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
if (targetUniChar != missingUCharMarker)
UChar fallbackUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
if (fallbackUniChar < 0xfffe)
{
myTarget[myTargetIndex++] = targetUniChar;
myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
}
}
if (targetUniChar == missingUCharMarker)
if (targetUniChar >= 0xfffe)
{
const char *saveSource = args->source;
UChar *saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
UConverterCallbackReason reason;
if (targetUniChar == 0xfffe)
{
reason = UCNV_UNASSIGNED;
*err = U_INVALID_CHAR_FOUND;
}
else
{
reason = UCNV_ILLEGAL;
*err = U_ILLEGAL_CHAR_FOUND;
}
*err = U_INVALID_CHAR_FOUND;
args->converter->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
args->converter->invalidCharLength = 1;
@ -117,7 +128,7 @@ void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
args,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
UCNV_UNASSIGNED,
reason,
err);
/* Hsys: calculate the source and target advancement */
args->source = saveSource;
@ -238,7 +249,7 @@ void T_UConverter_fromUnicode_SBCS (UConverterFromUnicodeArgs * args,
args->converter->invalidUCharLength,
(UChar32) (args->converter->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
args->converter->invalidUCharBuffer[2])
args->converter->invalidUCharBuffer[1])
: args->converter->invalidUCharBuffer[0]),
reason,
err);
@ -273,45 +284,52 @@ UChar32 T_UConverter_getNextUChar_SBCS(UConverterToUnicodeArgs* args,
{
UChar myUChar;
if (U_FAILURE(*err)) return 0xFFFD;
if (U_FAILURE(*err)) return 0xffff;
if (args->source+1 > args->sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
return 0xffff;
}
/*Gets the corresponding codepoint*/
myUChar = args->converter->sharedData->table->sbcs.toUnicode[(unsigned char)*(args->source++)];
if (myUChar != 0xFFFD) return myUChar;
if (myUChar < 0xfffe) return myUChar;
else
{
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = args->source;
UConverterCallbackReason reason;
/* Do the fallback stuff */
if ((args->converter->useFallback == TRUE)&&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
myUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)];
if (myUChar != 0xFFFD) return myUChar;
UChar fallbackUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)];
if (fallbackUChar < 0xfffe) return fallbackUChar;
}
*err = U_INVALID_CHAR_FOUND;
/*Calls the ErrorFunctor after rewinding the input buffer*/
args->source--;
if (myUChar == 0xfffe)
{
reason = UCNV_UNASSIGNED;
*err = U_INVALID_CHAR_FOUND;
}
else
{
reason = UCNV_ILLEGAL;
*err = U_ILLEGAL_CHAR_FOUND;
}
/*Calls the ErrorFunctor */
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
args->target = myUCharPtr;
args->targetLimit = myUCharPtr + 1;
args->source = sourceFinal;
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
args,
sourceFinal,
args->source - 1,
1,
UCNV_UNASSIGNED,
reason,
err);
/*makes the internal caching transparent to the user*/
@ -429,7 +447,7 @@ void T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
/*writing the UniChar to the output stream */
if (targetUniChar != missingUCharMarker)
if (targetUniChar < 0xfffe)
{
/*writes the UniChar to the output stream */
myTarget[myTargetIndex++] = targetUniChar;
@ -437,19 +455,30 @@ void T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
else if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
if (targetUniChar != missingUCharMarker)
UChar fallbackUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
if (fallbackUniChar < 0xfffe)
{
myTarget[myTargetIndex++] = targetUniChar;
myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
}
}
if (targetUniChar == missingUCharMarker)
if (targetUniChar >= 0xfffe)
{
const char *saveSource = args->source;
UChar *saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
UConverterCallbackReason reason;
if (targetUniChar == 0xfffe)
{
reason = UCNV_UNASSIGNED;
*err = U_INVALID_CHAR_FOUND;
}
else
{
reason = UCNV_ILLEGAL;
*err = U_ILLEGAL_CHAR_FOUND;
}
*err = U_INVALID_CHAR_FOUND;
args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
args->converter->invalidCharBuffer[1] = (char) mySourceChar;
args->converter->invalidCharLength = 2;
@ -462,7 +491,7 @@ void T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
args,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
UCNV_UNASSIGNED,
reason,
err);
/* Hsys: calculate the source and target advancement */
args->source = saveSource;
@ -616,7 +645,7 @@ void T_UConverter_fromUnicode_DBCS (UConverterFromUnicodeArgs * args,
args->converter->invalidUCharLength,
(UChar32) (args->converter->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
args->converter->invalidUCharBuffer[2])
args->converter->invalidUCharBuffer[1])
: args->converter->invalidUCharBuffer[0]),
reason,
err);
@ -650,7 +679,7 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
{
UChar myUChar;
if (U_FAILURE(*err)) return 0xFFFD;
if (U_FAILURE(*err)) return 0xffff;
/*Checks boundaries and set appropriate error codes*/
if (args->source+2 > args->sourceLimit)
{
@ -665,7 +694,7 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
*err = U_TRUNCATED_CHAR_FOUND;
}
return 0xFFFD;
return 0xffff;
}
/*Gets the corresponding codepoint*/
@ -674,39 +703,45 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
/*update the input pointer*/
args->source += 2;
if (myUChar != 0xFFFD) return myUChar;
if (myUChar < 0xfffe) return myUChar;
else
{
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = args->source;
UConverterCallbackReason reason;
/* rewinding the input buffer*/
args->source -= 2;
/* Do the fallback stuff */
if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
myUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback),
UChar fallbackUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback),
(uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source-1))));
if (myUChar != 0xFFFD)
if (fallbackUChar < 0xfffe)
{
args->source += 2;
return myUChar;
return fallbackUChar;
}
}
*err = U_INVALID_CHAR_FOUND;
if (myUChar == 0xfffe)
{
reason = UCNV_UNASSIGNED;
*err = U_INVALID_CHAR_FOUND;
}
else
{
reason = UCNV_ILLEGAL;
*err = U_ILLEGAL_CHAR_FOUND;
}
args->target = myUCharPtr;
args->targetLimit = myUCharPtr + 1;
args->source = sourceFinal;
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
args,
sourceFinal,
args->source - 2,
2,
UCNV_UNASSIGNED,
reason,
err);
/*makes the internal caching transparent to the user*/
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;

View File

@ -279,7 +279,7 @@ void TestToUnicodeErrorBehaviour()
}
void TestGetNextErrorBehaviour(){
/*Test for Illegal character*/
/*Test for unassigned character*/
static const char input1[]={ (char)0x70 };
const char* source=(const char*)input1;
UErrorCode err=U_ZERO_ERROR;
@ -287,10 +287,11 @@ void TestGetNextErrorBehaviour(){
UConverter *cnv=ucnv_open("ibm-1159", &err);
if(U_FAILURE(err)) {
log_err("Unable to open a SBCS(ibm-1159) converter: %s\n", u_errorName(err));
return;
}
c=ucnv_getNextUChar(cnv, &source, source+sizeof(source), &err);
if(err != U_INVALID_CHAR_FOUND && c!= 0xFFFD){
log_err("FAIL: Expected: U_INVALID_CHAR_ERROR ----Got:%s\n Expected 0xFFFD Got %lx\n", myErrorName(err), c);
if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n", myErrorName(err), c);
}
ucnv_close(cnv);

View File

@ -1678,13 +1678,13 @@ TestLMBCS() {
{
log_err("Unexpected pointer move in 0 byte source request \n");
}
/*0 byte source request - GetNextUChar : error & value == FFFD */
/*0 byte source request - GetNextUChar : error & value == fffe or ffff */
uniChar = ucnv_getNextUChar(cnv, &pLIn, pLIn, &errorCode);
if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
{
log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
}
if (uniChar != 0xFFFD) /* would like to use an exported define here */
if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
{
log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
}

View File

@ -193,7 +193,7 @@ static UDataInfo dataInfo={
0,
0x63, 0x6e, 0x76, 0x74, /* dataFormat="cnvt" */
4, 0, 0, 0, /* formatVersion -- the new MBCS format needs at least 5.0.0.0 */
5, 0, 0, 0, /* formatVersion */
1, 6, 0, 0 /* dataVersion */
};
@ -203,7 +203,6 @@ void writeConverterData(UConverterSharedData *mySharedData,
const char *cnvDir,
UErrorCode *status)
{
UVersionInfo generalFormatVersion;
UNewDataMemory *mem = NULL;
uint32_t sz2;
@ -212,16 +211,7 @@ void writeConverterData(UConverterSharedData *mySharedData,
return;
}
uprv_memcpy(&generalFormatVersion, &dataInfo.formatVersion, sizeof(UVersionInfo));
if(mySharedData->staticData->conversionType==UCNV_MBCS && dataInfo.formatVersion[0]<5) {
/* adjust the formatVersion for MBCS if necessary */
dataInfo.formatVersion[0]=5;
dataInfo.formatVersion[1]=0;
dataInfo.formatVersion[2]=0;
dataInfo.formatVersion[3]=0;
}
mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
uprv_memcpy(&dataInfo.formatVersion, &generalFormatVersion, sizeof(UVersionInfo));
if(U_FAILURE(*status))
{
@ -639,17 +629,15 @@ UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticDat
char storageLine[UCNV_MAX_LINE_TEXT];
char* line = NULL;
UConverterTable* myUConverterTable = NULL;
UChar unicodeValue = 0xFFFF;
UChar unicodeValue = 0xfffe;
int32_t sbcsCodepageValue = 0, fallback = 0;
UBool seenFallback = FALSE;
char codepointBytes[5];
unsigned char replacementChar = '\0';
int32_t i = 0;
CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
if (U_FAILURE(*err)) return NULL;
replacementChar = myConverter->subChar[0];
myUConverterTable = (UConverterTable*)uprv_malloc(sizeof(UConverterSBCSTable));
if (myUConverterTable == NULL)
@ -725,8 +713,8 @@ UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticDat
seenFallback = FALSE;
for (i = 0; i < 256; i++)
{
if ((myUConverterTable->sbcs.toUnicode[i] == 0xFFFF) &&
(myUConverterTable->sbcs.toUnicodeFallback[i] != 0xFFFF))
if ((myUConverterTable->sbcs.toUnicode[i] >= 0xfffe) &&
(myUConverterTable->sbcs.toUnicodeFallback[i] < 0xfffe))
{
seenFallback = TRUE;
@ -835,7 +823,7 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
char storageLine[UCNV_MAX_LINE_TEXT];
char* line = NULL;
UConverterTable* myUConverterTable = NULL;
UChar unicodeValue = 0xFFFF;
UChar unicodeValue = 0xfffe;
int32_t mbcsCodepageValue = '\0';
char codepointBytes[6];
int32_t replacementChar = 0x0000, fallback = 0;
@ -862,7 +850,7 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
myFromUnicode = &myUConverterTable->dbcs.fromUnicode;
ucmp16_init(myFromUnicode, (uint16_t)replacementChar);
myToUnicode = &myUConverterTable->dbcs.toUnicode;
ucmp16_init(myToUnicode, (int16_t)0xFFFD);
ucmp16_init(myToUnicode, (int16_t)0xfffe);
myFromUnicodeFallback = &myUConverterTable->dbcs.fromUnicodeFallback;
ucmp16_initBogus(myFromUnicodeFallback);
@ -907,7 +895,7 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
{
myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE;
ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar);
ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD);
ucmp16_init(myToUnicodeFallback, (uint16_t)0xfffe);
}
ucmp16_set(myToUnicodeFallback, (int16_t)mbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)mbcsCodepageValue);
@ -919,8 +907,8 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
{
for (i = 0; i < (uint32_t)ucmp16_getkUnicodeCount(); i++)
{
if ((ucmp16_getu(myToUnicode, i) == 0xFFFD) &&
(ucmp16_getu(myToUnicodeFallback, i) != 0xFFFD))
if ((ucmp16_getu(myToUnicode, i) >= 0xfffe) &&
(ucmp16_getu(myToUnicodeFallback, i) < 0xfffe))
{
seenFallback = TRUE;
break;
@ -950,7 +938,7 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
char storageLine[UCNV_MAX_LINE_TEXT];
char* line = NULL;
UConverterTable* myUConverterTable = NULL;
UChar unicodeValue = 0xFFFD;
UChar unicodeValue = 0xfffe;
int32_t dbcsCodepageValue = '\0';
char codepointBytes[6];
int32_t replacementChar = 0x0000, fallback = 0;
@ -976,7 +964,7 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
myFromUnicode = &(myUConverterTable->dbcs.fromUnicode);
ucmp16_init(myFromUnicode, (int16_t)replacementChar);
myToUnicode = &(myUConverterTable->dbcs.toUnicode);
ucmp16_init(myToUnicode, (int16_t)0xFFFD);
ucmp16_init(myToUnicode, (int16_t)0xfffe);
myFromUnicodeFallback = &(myUConverterTable->dbcs.fromUnicodeFallback);
ucmp16_initBogus(myFromUnicodeFallback);
@ -1020,7 +1008,7 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
{
myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE;
ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar);
ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD);
ucmp16_init(myToUnicodeFallback, (uint16_t)0xfffe);
}
ucmp16_set(myToUnicodeFallback, (int16_t)dbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)dbcsCodepageValue);
@ -1031,8 +1019,8 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
{
for (i = 0; i < (uint32_t)ucmp16_getkUnicodeCount(); i++)
{
if ((ucmp16_getu(myToUnicode, i) == 0xFFFD) &&
(ucmp16_getu(myToUnicodeFallback, i) != 0xFFFD))
if ((ucmp16_getu(myToUnicode, i) >= 0xfffe) &&
(ucmp16_getu(myToUnicodeFallback, i) < 0xfffe))
{
seenFallback = TRUE;
break;