1999-08-16 21:50:52 +00:00
|
|
|
/*
|
2000-01-13 23:54:23 +00:00
|
|
|
*******************************************************************************
|
1999-08-16 21:50:52 +00:00
|
|
|
*
|
2000-01-13 23:54:23 +00:00
|
|
|
* Copyright (C) 1998-1999, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
1999-08-16 21:50:52 +00:00
|
|
|
*
|
2000-01-13 23:54:23 +00:00
|
|
|
*******************************************************************************
|
1999-08-16 21:50:52 +00:00
|
|
|
*
|
|
|
|
* ucnv_err.c
|
|
|
|
* Implements error behaviour functions called by T_UConverter_{from,to}Unicode
|
|
|
|
*
|
2000-06-27 20:47:56 +00:00
|
|
|
*
|
|
|
|
* Change history:
|
|
|
|
*
|
|
|
|
* 06/29/2000 helena Major rewrite of the callback APIs.
|
|
|
|
*/
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
#include "ucmp8.h"
|
|
|
|
#include "ucmp16.h"
|
1999-12-28 23:39:02 +00:00
|
|
|
#include "unicode/ucnv_err.h"
|
2000-07-31 23:39:28 +00:00
|
|
|
#include "unicode/ucnv_cb.h"
|
1999-08-16 21:50:52 +00:00
|
|
|
#include "ucnv_cnv.h"
|
|
|
|
#include "cmemory.h"
|
1999-12-28 23:39:02 +00:00
|
|
|
#include "unicode/ucnv.h"
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
#define VALUE_STRING_LENGTH 32
|
|
|
|
/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
|
|
|
|
#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
|
|
|
|
#define UNICODE_U_CODEPOINT 0x0055
|
|
|
|
#define UNICODE_X_CODEPOINT 0x0058
|
|
|
|
|
|
|
|
|
|
|
|
#define ToOffset(a) a<=9?(0x0030+a):(0x0030+a+7)
|
|
|
|
|
2000-05-18 22:08:39 +00:00
|
|
|
UBool
|
1999-10-18 22:48:32 +00:00
|
|
|
CONVERSION_U_SUCCESS (UErrorCode err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
1999-10-07 00:07:53 +00:00
|
|
|
if ((err == U_INVALID_CHAR_FOUND) || (err == U_ILLEGAL_CHAR_FOUND)) return FALSE;
|
1999-08-16 21:50:52 +00:00
|
|
|
else return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*Takes a int32_t and fills in a UChar* string with that number "radix"-based
|
|
|
|
* and padded with "pad" zeroes
|
|
|
|
*/
|
2000-07-13 15:17:52 +00:00
|
|
|
static void itou (UChar * buffer, uint32_t i, uint32_t radix, int32_t pad)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
int32_t length = 0;
|
|
|
|
int32_t num = 0;
|
|
|
|
int8_t digit;
|
|
|
|
int32_t j;
|
|
|
|
UChar temp;
|
|
|
|
|
2000-02-15 01:05:58 +00:00
|
|
|
while (i >= radix)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
num = i / radix;
|
|
|
|
digit = (int8_t) (i - num * radix);
|
|
|
|
buffer[length++] = (UChar) (ToOffset (digit));
|
|
|
|
i = num;
|
|
|
|
}
|
|
|
|
|
|
|
|
buffer[length] = (UChar) (ToOffset (i));
|
|
|
|
|
|
|
|
while (length < pad) buffer[++length] = (UChar) 0x0030; /*zero padding */
|
|
|
|
buffer[length--] = (UChar) 0x0000;
|
|
|
|
|
|
|
|
/*Reverses the string */
|
|
|
|
for (j = 0; j < (pad / 2); j++)
|
|
|
|
{
|
|
|
|
temp = buffer[length - j];
|
|
|
|
buffer[length - j] = buffer[j];
|
|
|
|
buffer[j] = temp;
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
|
2000-06-27 20:47:56 +00:00
|
|
|
void UCNV_FROM_U_CALLBACK_STOP (
|
|
|
|
void *context,
|
|
|
|
UConverterFromUnicodeArgs *fromUArgs,
|
|
|
|
const UChar* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UChar32 codePoint,
|
|
|
|
UConverterCallbackReason reason,
|
2000-07-26 00:20:19 +00:00
|
|
|
UErrorCode * err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-07-13 00:14:10 +00:00
|
|
|
/* the caller must have set the error code accordingly */
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
|
2000-06-27 20:47:56 +00:00
|
|
|
void UCNV_TO_U_CALLBACK_STOP (
|
|
|
|
void *context,
|
|
|
|
UConverterToUnicodeArgs *toUArgs,
|
|
|
|
const char* codePoints,
|
|
|
|
int32_t length,
|
|
|
|
UConverterCallbackReason reason,
|
2000-07-26 00:20:19 +00:00
|
|
|
UErrorCode * err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-07-13 00:14:10 +00:00
|
|
|
/* the caller must have set the error code accordingly */
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2000-06-27 20:47:56 +00:00
|
|
|
void UCNV_FROM_U_CALLBACK_SKIP (
|
|
|
|
void *context,
|
|
|
|
UConverterFromUnicodeArgs *fromUArgs,
|
|
|
|
const UChar* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UChar32 codePoint,
|
|
|
|
UConverterCallbackReason reason,
|
2000-07-26 00:20:19 +00:00
|
|
|
UErrorCode * err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-07-13 00:14:10 +00:00
|
|
|
if (reason <= UCNV_IRREGULAR)
|
|
|
|
{
|
|
|
|
*err = U_ZERO_ERROR;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2000-06-27 20:47:56 +00:00
|
|
|
void UCNV_FROM_U_CALLBACK_SUBSTITUTE (
|
|
|
|
void *context,
|
|
|
|
UConverterFromUnicodeArgs *fromArgs,
|
|
|
|
const UChar* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UChar32 codePoint,
|
|
|
|
UConverterCallbackReason reason,
|
|
|
|
UErrorCode * err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-08-02 19:35:17 +00:00
|
|
|
if (reason > UCNV_IRREGULAR)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
*err = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
ucnv_cbFromUWriteSub(fromArgs, 0, err);
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*uses itou to get a unicode escape sequence of the offensive sequence,
|
|
|
|
*uses a clean copy (resetted) of the converter, to convert that unicode
|
|
|
|
*escape sequence to the target codepage (if conversion failure happens then
|
|
|
|
*we revert to substituting with subchar)
|
|
|
|
*/
|
2000-06-27 20:47:56 +00:00
|
|
|
void UCNV_FROM_U_CALLBACK_ESCAPE (
|
|
|
|
void *context,
|
|
|
|
UConverterFromUnicodeArgs *fromArgs,
|
|
|
|
const UChar *codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UChar32 codePoint,
|
|
|
|
UConverterCallbackReason reason,
|
2000-07-26 00:20:19 +00:00
|
|
|
UErrorCode * err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
UChar valueString[VALUE_STRING_LENGTH];
|
|
|
|
int32_t valueStringLength = 0;
|
2000-08-02 19:35:17 +00:00
|
|
|
int32_t i = 0;
|
2000-07-31 14:38:10 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
const UChar *myValueSource = NULL;
|
1999-10-07 00:07:53 +00:00
|
|
|
UErrorCode err2 = U_ZERO_ERROR;
|
2000-06-27 20:47:56 +00:00
|
|
|
UConverterFromUCallback original = NULL;
|
2000-07-14 19:40:36 +00:00
|
|
|
void *originalContext;
|
1999-08-16 21:50:52 +00:00
|
|
|
|
2000-07-31 14:38:10 +00:00
|
|
|
UConverterFromUCallback ignoredCallback = NULL;
|
|
|
|
void *ignoredContext;
|
|
|
|
|
2000-07-13 00:14:10 +00:00
|
|
|
if (reason > UCNV_IRREGULAR)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2000-07-31 14:38:10 +00:00
|
|
|
ucnv_setFromUCallBack (fromArgs->converter,
|
|
|
|
(UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
|
2000-06-27 20:47:56 +00:00
|
|
|
NULL, /* To Do for HSYS: context is null? */
|
2000-07-05 20:56:42 +00:00
|
|
|
&original,
|
2000-07-14 19:40:36 +00:00
|
|
|
&originalContext,
|
2000-07-26 00:20:19 +00:00
|
|
|
&err2);
|
1999-10-18 22:48:32 +00:00
|
|
|
if (U_FAILURE (err2))
|
2000-08-02 19:35:17 +00:00
|
|
|
{
|
|
|
|
*err = err2;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2000-07-13 00:14:10 +00:00
|
|
|
/*
|
|
|
|
* ### TODO:
|
|
|
|
* This should actually really work with the codePoint, not with the codeUnits;
|
|
|
|
* how do we represent a code point > 0xffff? It should be one single escape, not
|
|
|
|
* two for a surrogate pair!
|
|
|
|
*/
|
|
|
|
while (i < length)
|
2000-08-02 19:35:17 +00:00
|
|
|
{
|
|
|
|
valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
|
|
|
|
valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
|
|
|
|
itou (valueString + valueStringLength, codeUnits[i++], 16, 4);
|
|
|
|
valueStringLength += 4;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
|
|
|
|
myValueSource = valueString;
|
|
|
|
|
2000-07-31 14:38:10 +00:00
|
|
|
/* reset the error */
|
|
|
|
*err = U_ZERO_ERROR;
|
2000-07-26 00:20:19 +00:00
|
|
|
|
2000-08-02 19:35:17 +00:00
|
|
|
ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
|
2000-07-31 14:38:10 +00:00
|
|
|
|
|
|
|
ucnv_setFromUCallBack (fromArgs->converter,
|
|
|
|
original,
|
|
|
|
originalContext,
|
|
|
|
&ignoredCallback,
|
|
|
|
&ignoredContext,
|
|
|
|
&err2);
|
1999-10-18 22:48:32 +00:00
|
|
|
if (U_FAILURE (err2))
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-07-31 14:38:10 +00:00
|
|
|
*err = err2;
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2000-06-27 20:47:56 +00:00
|
|
|
void UCNV_TO_U_CALLBACK_SKIP (
|
|
|
|
void *context,
|
|
|
|
UConverterToUnicodeArgs *toArgs,
|
|
|
|
const char* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UConverterCallbackReason reason,
|
2000-07-26 00:20:19 +00:00
|
|
|
UErrorCode * err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-07-26 00:20:19 +00:00
|
|
|
if (reason <= UCNV_IRREGULAR)
|
|
|
|
{
|
|
|
|
*err = U_ZERO_ERROR;
|
|
|
|
}
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
2000-06-27 20:47:56 +00:00
|
|
|
void UCNV_TO_U_CALLBACK_SUBSTITUTE (
|
|
|
|
void *context,
|
|
|
|
UConverterToUnicodeArgs *toArgs,
|
|
|
|
const char* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UConverterCallbackReason reason,
|
2000-07-26 00:20:19 +00:00
|
|
|
UErrorCode * err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-07-26 00:20:19 +00:00
|
|
|
if (reason > UCNV_IRREGULAR)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
2000-07-31 14:38:10 +00:00
|
|
|
|
|
|
|
*err = U_ZERO_ERROR;
|
|
|
|
ucnv_cbToUWriteSub(toArgs,0,err);
|
2000-07-26 00:20:19 +00:00
|
|
|
|
|
|
|
return;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*uses itou to get a unicode escape sequence of the offensive sequence,
|
|
|
|
*and uses that as the substitution sequence
|
|
|
|
*/
|
2000-06-27 20:47:56 +00:00
|
|
|
void UCNV_TO_U_CALLBACK_ESCAPE (
|
|
|
|
void *context,
|
|
|
|
UConverterToUnicodeArgs *toArgs,
|
|
|
|
const char* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UConverterCallbackReason reason,
|
2000-07-26 00:20:19 +00:00
|
|
|
UErrorCode * err)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
|
|
|
UChar uniValueString[VALUE_STRING_LENGTH];
|
|
|
|
int32_t valueStringLength = 0;
|
2000-04-13 23:00:43 +00:00
|
|
|
int32_t i = 0;
|
2000-08-02 19:35:17 +00:00
|
|
|
|
2000-07-13 00:14:10 +00:00
|
|
|
if (reason > UCNV_IRREGULAR)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ### TODO:
|
|
|
|
* This should use the new ucnv_cbWrite...() functions instead of doing
|
|
|
|
* "tricks" as before we had a good callback API!
|
|
|
|
* (Actually, this function is not all that bad.)
|
|
|
|
*/
|
|
|
|
|
|
|
|
while (i < length)
|
1999-08-16 21:50:52 +00:00
|
|
|
{
|
2000-07-13 00:14:10 +00:00
|
|
|
uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
|
|
|
|
uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
|
2000-07-13 15:17:52 +00:00
|
|
|
itou (uniValueString + valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
|
2000-07-13 00:14:10 +00:00
|
|
|
valueStringLength += 2;
|
1999-08-16 21:50:52 +00:00
|
|
|
}
|
2000-07-26 00:20:19 +00:00
|
|
|
|
2000-08-02 19:35:17 +00:00
|
|
|
|
|
|
|
/* reset the error */
|
|
|
|
*err = U_ZERO_ERROR;
|
|
|
|
|
|
|
|
ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
|
2000-07-26 00:20:19 +00:00
|
|
|
|
1999-08-16 21:50:52 +00:00
|
|
|
return;
|
|
|
|
}
|