ICU-2449 refactor conversion - call toUnicode and getNextUChar callbacks only from ucnv.c framework, fix/rewrite ISO 2022 toUnicode

X-SVN-Rev: 12729
This commit is contained in:
Markus Scherer 2003-08-01 14:42:29 +00:00
parent 19fb36e3bd
commit 03cf995d9e

View File

@ -124,16 +124,9 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
UErrorCode *pErrorCode);
static void
T_UConverter_toUnicode_ISO_2022(UConverterToUnicodeArgs * args,
UErrorCode * err);
static void
T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
UErrorCode * err);
static UChar32
T_UConverter_getNextUChar_ISO_2022 (UConverterToUnicodeArgs * args,
UErrorCode * err);
/***************** ISO-2022-JP ********************************/
static void
@ -367,11 +360,11 @@ static const UConverterImpl _ISO2022Impl={
_ISO2022Close,
_ISO2022Reset,
T_UConverter_toUnicode_ISO_2022,
T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
T_UConverter_fromUnicode_UTF8,
T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC,
T_UConverter_getNextUChar_ISO_2022,
NULL,
NULL,
_ISO2022getName,
@ -949,245 +942,110 @@ MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
*
*/
static UChar32
T_UConverter_getNextUChar_ISO_2022(UConverterToUnicodeArgs* args,
UErrorCode* err){
const char* mySourceLimit;
int plane=0; /*dummy variable*/
UConverterDataISO2022* myData =((UConverterDataISO2022*)(args->converter->extraInfo));
/*Arguments Check*/
if (args->sourceLimit < args->source){
*err = U_ILLEGAL_ARGUMENT_ERROR;
return 0xffff;
}
while(1){
mySourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, TRUE);
/*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
if (args->converter->mode == UCNV_SO && mySourceLimit!=args->source)
/*Already doing some conversion*/{
return ucnv_getNextUChar(myData->currentConverter,
&(args->source),
mySourceLimit,
err);
}
/*-Done with buffer with entire buffer
*-Error while converting
*/
changeState_2022(args->converter,
&(args->source),
args->sourceLimit,
TRUE,
ISO_2022,
&plane,
err);
if(args->source >= args->sourceLimit){
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
if( (args->source == args->sourceLimit) && args->flush){
_ISO2022Reset(args->converter,UCNV_RESET_TO_UNICODE);
}
return 0xffff;
}
static void
T_UConverter_toUnicode_ISO_2022(UConverterToUnicodeArgs *args,
UErrorCode* err){
const char *mySourceLimit;
char const* sourceStart;
UConverter *saveThis;
int plane =0; /*dummy variable*/
UConverterDataISO2022* myData;
if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
*err = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
myData= ((UConverterDataISO2022*)(args->converter->extraInfo));
while (args->source < args->sourceLimit) {
/*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
mySourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
if (args->converter->mode == UCNV_SO) /*Already doing some conversion*/{
saveThis = args->converter;
args->offsets = NULL;
args->converter = myData->currentConverter;
/*
* ### TODO this does not maintain overflow and error buffers between
* the sub-converter and this one;
* idea: just copy those parts of the sub-UConverter into the 2022 UConverter
* after ucnv_toUnicode()
*/
ucnv_toUnicode(args->converter,
&args->target,
args->targetLimit,
&args->source,
mySourceLimit,
args->offsets,
args->flush,
err);
args->converter = saveThis;
myData->isFirstBuffer = FALSE;
}
if((myData->isFirstBuffer) && (args->source[0]!=(char)ESC_2022)
&& (myData->currentConverter==NULL)){
saveThis = args->converter;
args->offsets = NULL;
myData->currentConverter = ucnv_open("ASCII",err);
if(U_FAILURE(*err)){
break;
}
args->converter = myData->currentConverter;
ucnv_toUnicode(args->converter,
&args->target,
args->targetLimit,
&args->source,
mySourceLimit,
args->offsets,
args->flush,
err);
args->converter = saveThis;
args->converter->mode = UCNV_SO;
myData->isFirstBuffer=FALSE;
}
/*-Done with buffer with entire buffer
-Error while converting
*/
if (U_FAILURE(*err) || (args->source == args->sourceLimit))
return;
sourceStart = args->source;
changeState_2022(args->converter,
&(args->source),
args->sourceLimit,
TRUE,
ISO_2022,
&plane,
err);
/* args->source = sourceStart; */
}
myData->isFirstBuffer=FALSE;
}
static void
T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
UErrorCode* err){
int32_t myOffset=0;
int32_t base = 0;
const char* mySourceLimit;
char const* sourceStart;
const char* mySourceLimit, *realSourceLimit;
const char* sourceStart;
const UChar* myTargetStart;
UConverter* saveThis;
int plane =0;/*dummy variable*/
UConverterDataISO2022* myData;
int8_t length;
if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){
*err = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
saveThis = args->converter;
myData=((UConverterDataISO2022*)(saveThis->extraInfo));
myData=((UConverterDataISO2022*)(args->converter->extraInfo));
while (args->source < args->sourceLimit) {
mySourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
/*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
realSourceLimit = args->sourceLimit;
while (args->source < realSourceLimit) {
if(myData->key == 0) { /* are we in the middle of an escape sequence? */
/*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
if (args->converter->mode == UCNV_SO) /*Already doing some conversion*/{
const UChar* myTargetStart = args->target;
if(args->source < mySourceLimit) {
if(myData->currentConverter==NULL) {
myData->currentConverter = ucnv_open("ASCII",err);
if(U_FAILURE(*err)){
return;
}
saveThis = args->converter;
args->converter = myData->currentConverter;
ucnv_toUnicode(args->converter,
&(args->target),
args->targetLimit,
&(args->source),
mySourceLimit,
args->offsets,
args->flush,
err);
myData->isFirstBuffer = FALSE;
args->converter = saveThis;
{
int32_t lim = args->target - myTargetStart;
int32_t i = 0;
for (i=base; i < lim;i++){
args->offsets[i] += myOffset;
myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
saveThis->mode = UCNV_SO;
}
base += lim;
}
}
if(myData->isFirstBuffer && args->source[0]!=ESC_2022
&& (myData->currentConverter==NULL)){
/* convert to before the ESC or until the end of the buffer */
myData->isFirstBuffer=FALSE;
sourceStart = args->source;
myTargetStart = args->target;
args->converter = myData->currentConverter;
ucnv_toUnicode(args->converter,
&args->target,
args->targetLimit,
&args->source,
mySourceLimit,
args->offsets,
(UBool)(args->flush && mySourceLimit == realSourceLimit),
err);
args->converter = saveThis;
const UChar* myTargetStart = args->target;
saveThis = args->converter;
args->offsets = NULL;
myData->currentConverter = ucnv_open("ASCII",err);
if(U_FAILURE(*err)){
break;
}
args->converter = myData->currentConverter;
ucnv_toUnicode(args->converter,
&args->target,
args->targetLimit,
&args->source,
mySourceLimit,
args->offsets,
args->flush,
err);
args->converter = saveThis;
args->converter->mode = UCNV_SO;
myData->isFirstBuffer=FALSE;
/* args->converter = saveThis;*/
{
int32_t lim = args->target - myTargetStart;
int32_t i = 0;
for (i=base; i < lim;i++){
args->offsets[i] += myOffset;
if (*err == U_BUFFER_OVERFLOW_ERROR) {
/* move the overflow buffer */
length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
myData->currentConverter->UCharErrorBufferLength = 0;
if(length > 0) {
uprv_memcpy(saveThis->UCharErrorBuffer,
myData->currentConverter->UCharErrorBuffer,
length*U_SIZEOF_UCHAR);
}
return;
}
/*
* At least one of:
* -Error while converting
* -Done with entire buffer
* -Need to write offsets or update the current offset
* (leave that up to the code in ucnv.c)
*
* or else we just stopped at an ESC byte and continue with changeState_2022()
*/
if (U_FAILURE(*err) ||
(args->source == realSourceLimit) ||
(args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
(mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
) {
/* copy partial or error input for truncated detection and error handling */
if(U_FAILURE(*err)) {
length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
if(length > 0) {
uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
}
} else {
length = saveThis->toULength = myData->currentConverter->toULength;
if(length > 0) {
uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
if(args->source < mySourceLimit) {
*err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
}
}
}
return;
}
base += lim;
}
}
/*-Done with buffer with entire buffer
-Error while converting
*/
if (U_FAILURE(*err) || (args->source == args->sourceLimit))
return;
sourceStart = args->source;
changeState_2022(args->converter,
&(args->source),
args->sourceLimit,
realSourceLimit,
TRUE,
ISO_2022,
&plane,
err);
myOffset += args->source - sourceStart;
if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
/* let the ucnv.c code update its current offset */
return;
}
}
}
@ -1283,68 +1141,25 @@ getEndOfBuffer_2022(const char** source,
* To Unicode Callback helper function
*/
static void
toUnicodeCallback(UConverterToUnicodeArgs* args, const uint32_t sourceChar,const char** pSource,
const uint32_t targetUniChar,UChar** pTarget,UErrorCode* err){
const char *saveSource = args->source;
UChar *saveTarget = args->target;
const char* source = *pSource;
UChar* target = *pTarget;
int32_t *saveOffsets = NULL;
UConverterCallbackReason reason;
int32_t currentOffset;
int32_t saveIndex = target - args->target;
args->converter->invalidCharLength=0;
toUnicodeCallback(UConverter *cnv,
const uint32_t sourceChar, const uint32_t targetUniChar,
UErrorCode* err){
if(sourceChar>0xff){
currentOffset= source - args->source - 2;
args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(sourceChar>>8);
args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)sourceChar;
cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
cnv->toUBytes[1] = (uint8_t)sourceChar;
cnv->toULength = 2;
}
else{
currentOffset= source - args->source -1;
args->converter->invalidCharBuffer[args->converter->invalidCharLength++] =(char) sourceChar;
cnv->toUBytes[0] =(char) sourceChar;
cnv->toULength = 2;
}
if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
reason = UCNV_UNASSIGNED;
*err = U_INVALID_CHAR_FOUND;
}
else{
reason = UCNV_ILLEGAL;
*err = U_ILLEGAL_CHAR_FOUND;
}
if(args->offsets){
saveOffsets=args->offsets;
args->offsets = args->offsets+(target - args->target);
}
args->target =target;
target =saveTarget;
args->source = source;
args->converter->fromCharErrorBehaviour (
args->converter->toUContext,
args,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
reason,
err);
if(args->offsets){
args->offsets = saveOffsets;
for (;saveIndex < (args->target - target);saveIndex++) {
args->offsets[saveIndex] += currentOffset;
}
}
target=args->target;
*pTarget=target;
args->source = saveSource;
args->target = saveTarget;
}
/**************************************ISO-2022-JP*************************************************/
@ -1906,13 +1721,9 @@ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
}
else{
CALLBACK:
/* Call the callback function*/
toUnicodeCallback(args,mySourceChar,&mySource,targetUniChar,&myTarget,err);
/*args->offsets = saveOffsets;*/
if(U_FAILURE(*err))
break;
toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
break;
}
}
else{
@ -1948,11 +1759,11 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs*
saveConv->charErrorBufferLength=args->converter->charErrorBufferLength;
args->converter->charErrorBufferLength=0;
}
if(args->converter->invalidUCharLength!=0){
uprv_memcpy(saveConv->invalidUCharBuffer, args->converter->invalidUCharBuffer,
args->converter->invalidUCharLength);
saveConv->invalidUCharLength=args->converter->invalidUCharLength;
args->converter->invalidCharLength=0;
if(args->converter->toULength!=0){
uprv_memcpy(saveConv->toUBytes, args->converter->toUBytes,
args->converter->toULength);
saveConv->toULength=args->converter->toULength;
args->converter->toULength=0;
}
}
args->converter=saveConv;
@ -2265,12 +2076,9 @@ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
*(myTarget++)=(UChar)targetUniChar;
}
else {
/* Call the callback function*/
toUnicodeCallback(args,mySourceChar,&mySource,targetUniChar,&myTarget,err);
if(U_FAILURE(*err)){
break;
}
toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
break;
}
}
else{
@ -2896,8 +2704,12 @@ DONE:
/*Customize the converter with the attributes set on the 2022 converter*/
myUConverter->fromUCharErrorBehaviour = _this->fromUCharErrorBehaviour;
myUConverter->fromUContext = _this->fromUContext;
myUConverter->fromCharErrorBehaviour = _this->fromCharErrorBehaviour;
myUConverter->toUContext = _this->toUContext;
if(var == ISO_2022) {
myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
} else {
myUConverter->fromCharErrorBehaviour = _this->fromCharErrorBehaviour;
myUConverter->toUContext = _this->toUContext;
}
uprv_memcpy(myUConverter->subChar,
_this->subChar,
@ -3059,11 +2871,8 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
}
else{
/* Call the callback function*/
toUnicodeCallback(args,mySourceChar,&mySource,targetUniChar,&myTarget,err);
/*args->offsets = saveOffsets;*/
if(U_FAILURE(*err))
break;
toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
break;
}
}
else{