scuffed-code/icu4c/source/common/ucnv_err.c
Helena Chapman 377889ccd9 ICU-311 Major rewrite of the callback APIs. Needs to be checked
in without more testing to make the 6/29/2000 API deadline.  Lots
of warnings still, offset handling is not implemented.  More unit tests
is required because the current implementation actually passes the tests.

X-SVN-Rev: 1661
2000-06-27 20:47:56 +00:00

431 lines
13 KiB
C

/*
*******************************************************************************
*
* Copyright (C) 1998-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* ucnv_err.c
* Implements error behaviour functions called by T_UConverter_{from,to}Unicode
*
*
* Change history:
*
* 06/29/2000 helena Major rewrite of the callback APIs.
*/
#include "ucmp8.h"
#include "ucmp16.h"
#include "unicode/ucnv_err.h"
#include "ucnv_cnv.h"
#include "cmemory.h"
#include "unicode/ucnv.h"
#define VALUE_STRING_LENGTH 32
/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
#define CODEPOINT_STRING_LENGTH 7
#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
#define UNICODE_U_CODEPOINT 0x0055
#define UNICODE_X_CODEPOINT 0x0058
#define ToOffset(a) a<=9?(0x0030+a):(0x0030+a+7)
UBool
CONVERSION_U_SUCCESS (UErrorCode err)
{
if ((err == U_INVALID_CHAR_FOUND) || (err == U_ILLEGAL_CHAR_FOUND)) return FALSE;
else return TRUE;
}
/*Takes a int32_t and fills in a UChar* string with that number "radix"-based
* and padded with "pad" zeroes
*/
static void itou (UChar * buffer, int32_t i, int32_t radix, int32_t pad)
{
int32_t length = 0;
int32_t num = 0;
int8_t digit;
int32_t j;
UChar temp;
while (i >= radix)
{
num = i / radix;
digit = (int8_t) (i - num * radix);
buffer[length++] = (UChar) (ToOffset (digit));
i = num;
}
buffer[length] = (UChar) (ToOffset (i));
while (length < pad) buffer[++length] = (UChar) 0x0030; /*zero padding */
buffer[length--] = (UChar) 0x0000;
/*Reverses the string */
for (j = 0; j < (pad / 2); j++)
{
temp = buffer[length - j];
buffer[length - j] = buffer[j];
buffer[j] = temp;
}
return;
}
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
void UCNV_FROM_U_CALLBACK_STOP (
void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err)
{
reason = UCNV_ILLEGAL;
return;
}
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
void UCNV_TO_U_CALLBACK_STOP (
void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codePoints,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err)
{
reason = UCNV_ILLEGAL;
return;
}
void UCNV_FROM_U_CALLBACK_SKIP (
void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err)
{
if (CONVERSION_U_SUCCESS (*err)) return;
*err = U_ZERO_ERROR;
}
void UCNV_FROM_U_CALLBACK_SUBSTITUTE (
void *context,
UConverterFromUnicodeArgs *fromArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err)
{
char togo[5];
int32_t togoLen;
if (CONVERSION_U_SUCCESS (*err)) return;
/*In case we're dealing with a modal converter a la UCNV_EBCDIC_STATEFUL,
we need to make sure that the emitting of the substitution charater in the right mode*/
uprv_memcpy(togo, fromArgs->converter->subChar, togoLen = fromArgs->converter->subCharLen);
if (ucnv_getType(fromArgs->converter) == UCNV_EBCDIC_STATEFUL)
{
if ((fromArgs->converter->fromUnicodeStatus)&&(togoLen != 2))
{
togo[0] = UCNV_SI;
togo[1] = fromArgs->converter->subChar[0];
togo[2] = UCNV_SO;
togoLen = 3;
}
else if (!(fromArgs->converter->fromUnicodeStatus)&&(togoLen != 1))
{
togo[0] = UCNV_SO;
togo[1] = fromArgs->converter->subChar[0];
togo[2] = fromArgs->converter->subChar[1];
togo[3] = UCNV_SI;
togoLen = 4;
}
}
/*if we have enough space on the output buffer we just copy
the subchar there and update the pointer */
if ((fromArgs->targetLimit - *(fromArgs->pTarget)) >= togoLen)
{
uprv_memcpy (*(fromArgs->pTarget), togo, togoLen);
*(fromArgs->pTarget) += togoLen;
*err = U_ZERO_ERROR;
if (fromArgs->offsets)
{
int i=0;
for (i=0;i<togoLen;i++) fromArgs->offsets[i]=0;
fromArgs->offsets += togoLen;
}
}
else
{
/*if we don't have enough space on the output buffer
*we copy as much as we can to it, update that pointer.
*copy the rest in the internal buffer, and increase the
*length marker
*/
uprv_memcpy (*(fromArgs->pTarget), togo, (fromArgs->targetLimit - *(fromArgs->pTarget)));
if (fromArgs->offsets)
{
int i=0;
for (i=0;i<(fromArgs->targetLimit - *(fromArgs->pTarget));i++) fromArgs->offsets[i]=0;
fromArgs->offsets += (fromArgs->targetLimit - *(fromArgs->pTarget));
}
uprv_memcpy (fromArgs->converter->charErrorBuffer + fromArgs->converter->charErrorBufferLength,
togo + (fromArgs->targetLimit - *(fromArgs->pTarget)),
togoLen - (fromArgs->targetLimit - *(fromArgs->pTarget)));
fromArgs->converter->charErrorBufferLength += togoLen - (fromArgs->targetLimit - *(fromArgs->pTarget));
*(fromArgs->pTarget) += (fromArgs->targetLimit - *(fromArgs->pTarget));
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
return;
}
/*uses itou to get a unicode escape sequence of the offensive sequence,
*uses a clean copy (resetted) of the converter, to convert that unicode
*escape sequence to the target codepage (if conversion failure happens then
*we revert to substituting with subchar)
*/
void UCNV_FROM_U_CALLBACK_ESCAPE (
void *context,
UConverterFromUnicodeArgs *fromArgs,
const UChar *codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err)
{
UChar valueString[VALUE_STRING_LENGTH];
int32_t valueStringLength = 0;
UChar codepoint[CODEPOINT_STRING_LENGTH];
int32_t i = 0;
/*Makes a bitwise copy of the converter passwd in */
UConverter myConverter = *(fromArgs->converter);
char myTarget[VALUE_STRING_LENGTH];
char *myTargetAlias = myTarget;
const UChar *myValueSource = NULL;
UErrorCode err2 = U_ZERO_ERROR;
uint32_t myFromUnicodeStatus = fromArgs->converter->fromUnicodeStatus;
UConverterFromUCallback original = NULL;
if (CONVERSION_U_SUCCESS (*err)) return;
ucnv_reset (&myConverter);
myConverter.fromUnicodeStatus = myFromUnicodeStatus;
ucnv_setFromUCallBack (&myConverter,
(UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP,
NULL, /* To Do for HSYS: context is null? */
original,
NULL,
&err2);
if (U_FAILURE (err2))
{
*err = err2;
return;
}
codepoint[0] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
codepoint[1] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
while (i < fromArgs->converter->invalidUCharLength)
{
itou (codepoint + 2, fromArgs->converter->invalidUCharBuffer[i++], 16, 4);
uprv_memcpy (valueString + valueStringLength, codepoint, sizeof (UChar) * 6);
valueStringLength += CODEPOINT_STRING_LENGTH - 1;
}
myValueSource = valueString;
/*converts unicode escape sequence */
ucnv_fromUnicode (&myConverter,
&myTargetAlias,
myTargetAlias + VALUE_STRING_LENGTH,
&myValueSource,
myValueSource + CODEPOINT_STRING_LENGTH - 1,
NULL,
TRUE,
&err2);
if (U_FAILURE (err2))
{
UCNV_FROM_U_CALLBACK_SUBSTITUTE (
NULL, /* TO do for HSYS: context */
fromArgs,
codeUnits,
length,
codePoint,
reason,
err);
return;
}
valueStringLength = myTargetAlias - myTarget;
/*if we have enough space on the output buffer we just copy
* the subchar there and update the pointer
*/
if ((fromArgs->targetLimit - *(fromArgs->pTarget)) >= valueStringLength)
{
uprv_memcpy (*(fromArgs->pTarget), myTarget, valueStringLength);
*(fromArgs->pTarget) += valueStringLength;
*err = U_ZERO_ERROR;
if (fromArgs->offsets)
{
int j=0;
for (j=0;j<valueStringLength;j++) fromArgs->offsets[j]=0;
fromArgs->offsets += valueStringLength;
}
}
else
{
/*if we don't have enough space on the output buffer
*we copy as much as we can to it, update that pointer.
*copy the rest in the internal buffer, and increase the
*length marker
*/
if (fromArgs->offsets)
{
int j=0;
for (j=0;j<(fromArgs->targetLimit - *(fromArgs->pTarget));j++) fromArgs->offsets[j]=0;
fromArgs->offsets += (fromArgs->targetLimit - *(fromArgs->pTarget));
}
uprv_memcpy (*(fromArgs->pTarget), myTarget, (fromArgs->targetLimit - *(fromArgs->pTarget)));
uprv_memcpy (fromArgs->converter->charErrorBuffer + fromArgs->converter->charErrorBufferLength,
myTarget + (fromArgs->targetLimit - *(fromArgs->pTarget)),
valueStringLength - (fromArgs->targetLimit - *(fromArgs->pTarget)));
fromArgs->converter->charErrorBufferLength += valueStringLength - (fromArgs->targetLimit - *(fromArgs->pTarget));
*(fromArgs->pTarget) += (fromArgs->targetLimit - *(fromArgs->pTarget));
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
return;
}
void UCNV_TO_U_CALLBACK_SKIP (
void *context,
UConverterToUnicodeArgs *toArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err)
{
if (CONVERSION_U_SUCCESS (*err)) return;
*err = U_ZERO_ERROR;
}
void UCNV_TO_U_CALLBACK_SUBSTITUTE (
void *context,
UConverterToUnicodeArgs *toArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err)
{
if (CONVERSION_U_SUCCESS (*err)) return;
if ((toArgs->targetLimit - *(toArgs->pTarget)) >= 1)
{
**(toArgs->pTarget) = 0xFFFD;
(*(toArgs->pTarget))++;
if (toArgs->offsets) *(toArgs->offsets) = 0;
*err = U_ZERO_ERROR;
}
else
{
toArgs->converter->UCharErrorBuffer[toArgs->converter->UCharErrorBufferLength] = 0xFFFD;
toArgs->converter->UCharErrorBufferLength++;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
return;
}
/*uses itou to get a unicode escape sequence of the offensive sequence,
*and uses that as the substitution sequence
*/
void UCNV_TO_U_CALLBACK_ESCAPE (
void *context,
UConverterToUnicodeArgs *toArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err)
{
UChar uniValueString[VALUE_STRING_LENGTH];
int32_t valueStringLength = 0;
UChar codepoint[CODEPOINT_STRING_LENGTH];
int32_t i = 0;
if (CONVERSION_U_SUCCESS (*err)) return;
codepoint[0] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
codepoint[1] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
while (i < toArgs->converter->invalidCharLength)
{
itou (codepoint + 2, toArgs->converter->invalidCharBuffer[i++], 16, 2);
uprv_memcpy (uniValueString + valueStringLength, codepoint, sizeof (UChar) * 4);
valueStringLength += 4;
}
if ((toArgs->targetLimit - *(toArgs->pTarget)) >= valueStringLength)
{
/*if we have enough space on the output buffer we just copy
* the subchar there and update the pointer
*/
uprv_memcpy (*(toArgs->pTarget), uniValueString, (sizeof (UChar)) * (valueStringLength));
if (toArgs->offsets)
{
for (i = 0; i < valueStringLength; i++) toArgs->offsets[i] = 0;
}
*(toArgs->pTarget) += valueStringLength;
*err = U_ZERO_ERROR;
}
else
{
/*if we don't have enough space on the output buffer
*we copy as much as we can to it, update that pointer.
*copy the rest in the internal buffer, and increase the
*length marker
*/
uprv_memcpy (*(toArgs->pTarget), uniValueString, (sizeof (UChar)) * (toArgs->targetLimit - *(toArgs->pTarget)));
if (toArgs->offsets)
{
for (i = 0; i < (toArgs->targetLimit - *(toArgs->pTarget)); i++) toArgs->offsets[i] = 0;
}
uprv_memcpy (toArgs->converter->UCharErrorBuffer,
uniValueString + (toArgs->targetLimit - *(toArgs->pTarget)),
(sizeof (UChar)) * (valueStringLength - (toArgs->targetLimit - *(toArgs->pTarget))));
toArgs->converter->UCharErrorBufferLength += valueStringLength - (toArgs->targetLimit - *(toArgs->pTarget));
*(toArgs->pTarget) += (toArgs->targetLimit - *(toArgs->pTarget));
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
return;
}