b942cae860
X-SVN-Rev: 18
436 lines
12 KiB
C
436 lines
12 KiB
C
/*
|
|
********************************************************************************
|
|
* *
|
|
* Copyright: *
|
|
* (C) Copyright International Business Machines Corporation, 1998 *
|
|
* Licensed Material - Program-Property of IBM - All Rights Reserved. *
|
|
* US Government Users Restricted Rights - Use, duplication, or disclosure *
|
|
* restricted by GSA ADP Schedule Contract with IBM Corp. *
|
|
* *
|
|
********************************************************************************
|
|
*
|
|
*
|
|
*
|
|
* ucnv_err.c
|
|
* Implements error behaviour functions called by T_UConverter_{from,to}Unicode
|
|
*
|
|
*/
|
|
|
|
#include "uhash.h"
|
|
#include "ucmp8.h"
|
|
#include "ucmp16.h"
|
|
#include "ucnv_bld.h"
|
|
#include "ucnv_err.h"
|
|
#include "ucnv_cnv.h"
|
|
#include "cmemory.h"
|
|
#include "ucnv.h"
|
|
|
|
#define VALUE_STRING_LENGTH 32
|
|
/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
|
|
#define CODEPOINT_STRING_LENGTH 7
|
|
#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
|
|
#define UNICODE_U_CODEPOINT 0x0055
|
|
#define UNICODE_X_CODEPOINT 0x0058
|
|
|
|
|
|
#define ToOffset(a) a<=9?(0x0030+a):(0x0030+a+7)
|
|
|
|
bool_t
|
|
CONVERSION_SUCCESS (UErrorCode err)
|
|
{
|
|
if ((err == U_INVALID_CHAR_FOUND) || (err == U_ILLEGAL_CHAR_FOUND)) return FALSE;
|
|
else return TRUE;
|
|
}
|
|
|
|
/*Takes a int32_t and fills in a UChar* string with that number "radix"-based
|
|
* and padded with "pad" zeroes
|
|
*/
|
|
static void itou (UChar * buffer, int32_t i, int32_t radix, int32_t pad)
|
|
{
|
|
int32_t length = 0;
|
|
int32_t num = 0;
|
|
int8_t digit;
|
|
int32_t j;
|
|
UChar temp;
|
|
|
|
while (i > radix)
|
|
{
|
|
num = i / radix;
|
|
digit = (int8_t) (i - num * radix);
|
|
buffer[length++] = (UChar) (ToOffset (digit));
|
|
i = num;
|
|
}
|
|
|
|
buffer[length] = (UChar) (ToOffset (i));
|
|
|
|
while (length < pad) buffer[++length] = (UChar) 0x0030; /*zero padding */
|
|
buffer[length--] = (UChar) 0x0000;
|
|
|
|
/*Reverses the string */
|
|
for (j = 0; j < (pad / 2); j++)
|
|
{
|
|
temp = buffer[length - j];
|
|
buffer[length - j] = buffer[j];
|
|
buffer[j] = temp;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
|
|
void MissingUnicodeAction_STOP (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
return;
|
|
}
|
|
|
|
|
|
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
|
|
void MissingCharAction_STOP (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
return;
|
|
}
|
|
|
|
void MissingUnicodeAction_SKIP (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
if (CONVERSION_SUCCESS (*err)) return;
|
|
*err = U_ZERO_ERROR;
|
|
}
|
|
|
|
void MissingUnicodeAction_SUBSTITUTE (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
char togo[5];
|
|
int32_t togoLen;
|
|
|
|
|
|
|
|
if (CONVERSION_SUCCESS (*err)) return;
|
|
|
|
/*In case we're dealing with a modal converter a la EBCDIC_STATEFUL,
|
|
we need to make sure that the emitting of the substitution charater in the right mode*/
|
|
icu_memcpy(togo, _this->subChar, togoLen = _this->subCharLen);
|
|
if (ucnv_getType(_this) == EBCDIC_STATEFUL)
|
|
{
|
|
if ((_this->fromUnicodeStatus)&&(togoLen != 2))
|
|
{
|
|
togo[0] = UCNV_SI;
|
|
togo[1] = _this->subChar[0];
|
|
togo[2] = UCNV_SO;
|
|
togoLen = 3;
|
|
}
|
|
else if (!(_this->fromUnicodeStatus)&&(togoLen != 1))
|
|
{
|
|
togo[0] = UCNV_SO;
|
|
togo[1] = _this->subChar[0];
|
|
togo[2] = _this->subChar[1];
|
|
togo[3] = UCNV_SI;
|
|
togoLen = 4;
|
|
}
|
|
}
|
|
|
|
/*if we have enough space on the output buffer we just copy
|
|
the subchar there and update the pointer */
|
|
if ((targetLimit - *target) >= togoLen)
|
|
{
|
|
icu_memcpy (*target, togo, togoLen);
|
|
*target += togoLen;
|
|
*err = U_ZERO_ERROR;
|
|
if (offsets)
|
|
{
|
|
int i=0;
|
|
for (i=0;i<togoLen;i++) offsets[i]=0;
|
|
offsets += togoLen;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*if we don't have enough space on the output buffer
|
|
*we copy as much as we can to it, update that pointer.
|
|
*copy the rest in the internal buffer, and increase the
|
|
*length marker
|
|
*/
|
|
icu_memcpy (*target, togo, (targetLimit - *target));
|
|
if (offsets)
|
|
{
|
|
int i=0;
|
|
for (i=0;i<(targetLimit - *target);i++) offsets[i]=0;
|
|
offsets += (targetLimit - *target);
|
|
}
|
|
icu_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
|
|
togo + (targetLimit - *target),
|
|
togoLen - (targetLimit - *target));
|
|
_this->charErrorBufferLength += togoLen - (targetLimit - *target);
|
|
*target += (targetLimit - *target);
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/*uses itou to get a unicode escape sequence of the offensive sequence,
|
|
*uses a clean copy (resetted) of the converter, to convert that unicode
|
|
*escape sequence to the target codepage (if conversion failure happens then
|
|
*we revert to substituting with subchar)
|
|
*/
|
|
void MissingUnicodeAction_SUBSTITUTEwithValue (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
|
|
UChar valueString[VALUE_STRING_LENGTH];
|
|
int32_t valueStringLength = 0;
|
|
const UChar *mySource = *source;
|
|
UChar codepoint[CODEPOINT_STRING_LENGTH];
|
|
int32_t i = 0;
|
|
/*Makes a bitwise copy of the converter passwd in */
|
|
UConverter myConverter = *_this;
|
|
char myTarget[VALUE_STRING_LENGTH];
|
|
char *myTargetAlias = myTarget;
|
|
const UChar *myValueSource = NULL;
|
|
UErrorCode err2 = U_ZERO_ERROR;
|
|
uint32_t myFromUnicodeStatus = _this->fromUnicodeStatus;
|
|
|
|
|
|
if (CONVERSION_SUCCESS (*err)) return;
|
|
|
|
ucnv_reset (&myConverter);
|
|
myConverter.fromUnicodeStatus = myFromUnicodeStatus;
|
|
|
|
ucnv_setFromUCallBack (&myConverter,
|
|
(UCNV_FromUCallBack) MissingUnicodeAction_STOP,
|
|
&err2);
|
|
if (FAILURE (err2))
|
|
{
|
|
*err = err2;
|
|
return;
|
|
}
|
|
|
|
codepoint[0] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
|
|
codepoint[1] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
|
|
|
|
while (i < _this->invalidUCharLength)
|
|
{
|
|
itou (codepoint + 2, _this->invalidUCharBuffer[i++], 16, 4);
|
|
icu_memcpy (valueString + valueStringLength, codepoint, sizeof (UChar) * 6);
|
|
valueStringLength += CODEPOINT_STRING_LENGTH - 1;
|
|
}
|
|
|
|
myValueSource = valueString;
|
|
|
|
/*converts unicode escape sequence */
|
|
ucnv_fromUnicode (&myConverter,
|
|
&myTargetAlias,
|
|
myTargetAlias + VALUE_STRING_LENGTH,
|
|
&myValueSource,
|
|
myValueSource + CODEPOINT_STRING_LENGTH - 1,
|
|
NULL,
|
|
TRUE,
|
|
&err2);
|
|
|
|
if (FAILURE (err2))
|
|
{
|
|
MissingUnicodeAction_SUBSTITUTE (_this,
|
|
target,
|
|
targetLimit,
|
|
source,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
valueStringLength = myTargetAlias - myTarget;
|
|
|
|
/*if we have enough space on the output buffer we just copy
|
|
* the subchar there and update the pointer
|
|
*/
|
|
if ((targetLimit - *target) >= valueStringLength)
|
|
{
|
|
icu_memcpy (*target, myTarget, valueStringLength);
|
|
*target += valueStringLength;
|
|
*err = U_ZERO_ERROR;
|
|
|
|
if (offsets)
|
|
{
|
|
int i=0;
|
|
for (i=0;i<valueStringLength;i++) offsets[i]=0;
|
|
offsets += valueStringLength;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*if we don't have enough space on the output buffer
|
|
*we copy as much as we can to it, update that pointer.
|
|
*copy the rest in the internal buffer, and increase the
|
|
*length marker
|
|
*/
|
|
|
|
if (offsets)
|
|
{
|
|
int i=0;
|
|
for (i=0;i<(targetLimit - *target);i++) offsets[i]=0;
|
|
offsets += (targetLimit - *target);
|
|
}
|
|
icu_memcpy (*target, myTarget, (targetLimit - *target));
|
|
icu_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
|
|
myTarget + (targetLimit - *target),
|
|
valueStringLength - (targetLimit - *target));
|
|
_this->charErrorBufferLength += valueStringLength - (targetLimit - *target);
|
|
*target += (targetLimit - *target);
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
void MissingCharAction_SKIP (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
if (CONVERSION_SUCCESS (*err)) return;
|
|
*err = U_ZERO_ERROR;
|
|
}
|
|
|
|
void MissingCharAction_SUBSTITUTE (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
|
|
if (CONVERSION_SUCCESS (*err)) return;
|
|
|
|
if ((targetLimit - *target) >= 1)
|
|
{
|
|
**target = 0xFFFD;
|
|
(*target)++;
|
|
if (offsets) *offsets = 0;
|
|
*err = U_ZERO_ERROR;
|
|
}
|
|
else
|
|
{
|
|
_this->UCharErrorBuffer[_this->UCharErrorBufferLength] = 0xFFFD;
|
|
_this->UCharErrorBufferLength++;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/*uses itou to get a unicode escape sequence of the offensive sequence,
|
|
*and uses that as the substitution sequence
|
|
*/
|
|
void MissingCharAction_SUBSTITUTEwithValue (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
UChar uniValueString[VALUE_STRING_LENGTH];
|
|
int32_t valueStringLength = 0;
|
|
const unsigned char *mySource = (const unsigned char *) *source;
|
|
UChar codepoint[CODEPOINT_STRING_LENGTH];
|
|
int32_t j = 0, i = 0;
|
|
const int32_t* offsets_end = offsets +( targetLimit - *target);
|
|
|
|
if (CONVERSION_SUCCESS (*err)) return;
|
|
|
|
codepoint[0] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
|
|
codepoint[1] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
|
|
|
|
while (i < _this->invalidCharLength)
|
|
{
|
|
itou (codepoint + 2, _this->invalidCharBuffer[i++], 16, 2);
|
|
icu_memcpy (uniValueString + valueStringLength, codepoint, sizeof (UChar) * 4);
|
|
valueStringLength += 4;
|
|
}
|
|
|
|
if ((targetLimit - *target) >= valueStringLength)
|
|
{
|
|
/*if we have enough space on the output buffer we just copy
|
|
* the subchar there and update the pointer
|
|
*/
|
|
icu_memcpy (*target, uniValueString, (sizeof (UChar)) * (valueStringLength));
|
|
if (offsets)
|
|
{
|
|
for (i = 0; i < valueStringLength; i++) offsets[i] = 0;
|
|
}
|
|
*target += valueStringLength;
|
|
|
|
*err = U_ZERO_ERROR;
|
|
}
|
|
else
|
|
{
|
|
/*if we don't have enough space on the output buffer
|
|
*we copy as much as we can to it, update that pointer.
|
|
*copy the rest in the internal buffer, and increase the
|
|
*length marker
|
|
*/
|
|
icu_memcpy (*target, uniValueString, (sizeof (UChar)) * (targetLimit - *target));
|
|
if (offsets)
|
|
{
|
|
for (i = 0; i < (targetLimit - *target); i++) offsets[i] = 0;
|
|
}
|
|
|
|
|
|
icu_memcpy (_this->UCharErrorBuffer,
|
|
uniValueString + (targetLimit - *target),
|
|
(sizeof (UChar)) * (valueStringLength - (targetLimit - *target)));
|
|
_this->UCharErrorBufferLength += valueStringLength - (targetLimit - *target);
|
|
*target += (targetLimit - *target);
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
|
|
return;
|
|
}
|