1999-12-28 23:39:02 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
2001-03-21 20:44:20 +00:00
|
|
|
* Copyright (C) 1999-2001, International Business Machines
|
1999-12-28 23:39:02 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
**********************************************************************
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* ucnv_err.h:
|
2000-12-08 18:43:57 +00:00
|
|
|
*/
|
|
|
|
/**
|
|
|
|
* \file
|
2000-12-13 19:41:47 +00:00
|
|
|
* \brief C UConverter predefined error callbacks
|
2000-12-08 18:43:57 +00:00
|
|
|
*
|
|
|
|
* <h2> Error Behaviour Fnctions </h2>
|
2000-12-13 19:41:47 +00:00
|
|
|
* Defines some error behaviour functions called by ucnv_{from,to}Unicode
|
|
|
|
* These are provided as part of ICU and many are stable, but they
|
|
|
|
* can also be considered only as an example of what can be done with
|
|
|
|
* callbacks. You may of course write your own.
|
1999-12-28 23:39:02 +00:00
|
|
|
*
|
|
|
|
* These Functions, although public, should NEVER be called directly, they should be used as parameters to
|
2000-12-13 19:41:47 +00:00
|
|
|
* the ucnv_setFromUCallback and ucnv_setToUCallback functions, to
|
|
|
|
* set the behaviour of a converter
|
1999-12-28 23:39:02 +00:00
|
|
|
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
|
|
|
|
*
|
2000-12-13 19:41:47 +00:00
|
|
|
* usage example: 'STOP' doesn't need any context, but newContext
|
|
|
|
* could be set to something other than 'NULL' if needed.
|
1999-12-28 23:39:02 +00:00
|
|
|
*
|
2000-12-08 18:43:57 +00:00
|
|
|
* \code
|
|
|
|
* UErrorCode err = U_ZERO_ERROR;
|
2000-12-13 19:41:47 +00:00
|
|
|
* UConverter* myConverter = ucnv_open("ibm-949", &err);
|
2001-06-26 22:51:14 +00:00
|
|
|
* const void *newContext = NULL;
|
|
|
|
* const void *oldContext;
|
2000-12-13 19:41:47 +00:00
|
|
|
* UConverterFromUCallback oldAction;
|
|
|
|
*
|
1999-12-28 23:39:02 +00:00
|
|
|
*
|
2000-12-08 18:43:57 +00:00
|
|
|
* if (U_SUCCESS(err))
|
|
|
|
* {
|
2000-12-13 19:41:47 +00:00
|
|
|
* ucnv_setFromUCallBack(myConverter,
|
|
|
|
* UCNV_FROM_U_CALLBACK_STOP,
|
|
|
|
* newContext,
|
|
|
|
* &oldAction,
|
|
|
|
* &oldContext,
|
|
|
|
* &status);
|
2000-12-08 18:43:57 +00:00
|
|
|
* }
|
|
|
|
* \endcode
|
1999-12-28 23:39:02 +00:00
|
|
|
*
|
|
|
|
* The code above tells "myConverter" to stop when it encounters a ILLEGAL/TRUNCATED/INVALID sequences when it is used to
|
|
|
|
* convert from Unicode -> Codepage.
|
2000-12-13 19:41:47 +00:00
|
|
|
* The behavior from Codepage to Unicode is not changed.
|
1999-12-28 23:39:02 +00:00
|
|
|
*/
|
|
|
|
|
2000-12-13 19:41:47 +00:00
|
|
|
/* This file isn't designed to be included all by itself. */
|
2000-06-29 00:14:01 +00:00
|
|
|
#ifndef UCNV_H
|
|
|
|
# include "unicode/ucnv.h"
|
|
|
|
/* and the rest of this file will be ignored. */
|
|
|
|
#endif
|
1999-12-28 23:39:02 +00:00
|
|
|
|
|
|
|
#ifndef UCNV_ERR_H
|
|
|
|
#define UCNV_ERR_H
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
|
2001-10-08 23:26:58 +00:00
|
|
|
|
2001-02-16 20:12:50 +00:00
|
|
|
/**
|
|
|
|
* FROM_U, TO_U options for sub and skip callbacks
|
2001-11-13 22:47:47 +00:00
|
|
|
* @draft ICU 1.8
|
2001-02-16 20:12:50 +00:00
|
|
|
*/
|
|
|
|
#define UCNV_SUB_STOP_ON_ILLEGAL "i"
|
|
|
|
#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
|
|
|
|
|
|
|
|
/**
|
|
|
|
* FROM_U_CALLBACK_ESCAPE options
|
2001-11-13 22:47:47 +00:00
|
|
|
* @draft ICU 1.8
|
2001-02-16 20:12:50 +00:00
|
|
|
*/
|
|
|
|
#define UCNV_ESCAPE_ICU NULL
|
|
|
|
#define UCNV_ESCAPE_JAVA "J"
|
|
|
|
#define UCNV_ESCAPE_C "C"
|
|
|
|
#define UCNV_ESCAPE_XML_DEC "D"
|
|
|
|
#define UCNV_ESCAPE_XML_HEX "X"
|
|
|
|
|
2000-06-27 20:47:56 +00:00
|
|
|
/**
|
|
|
|
* The process condition code to be used with the callbacks.
|
2001-11-13 22:47:47 +00:00
|
|
|
* @stable
|
2000-06-27 20:47:56 +00:00
|
|
|
*/
|
|
|
|
typedef enum {
|
2001-10-12 18:54:09 +00:00
|
|
|
UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
|
|
|
|
The error code U_INVALID_CHAR_FOUND will be set. */
|
|
|
|
UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
|
2000-12-13 19:41:47 +00:00
|
|
|
\x81\x2E is illegal in SJIS because \x2E
|
|
|
|
is not a valid trail byte for the \x81
|
2001-10-12 18:54:09 +00:00
|
|
|
lead byte.
|
|
|
|
Also, starting with Unicode 3.0.1, non-shortest byte sequences
|
|
|
|
in UTF-8 (like \xC1\xA1 instead of \x61 for U+0061)
|
|
|
|
are also illegal, not just irregular.
|
|
|
|
The error code U_ILLEGAL_CHAR_FOUND will be set. */
|
|
|
|
UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
|
|
|
|
the encoding. For example, \xED\xA0\x80..\xED\xBF\xBF
|
|
|
|
are irregular UTF-8 byte sequences for single surrogate
|
|
|
|
code points.
|
|
|
|
The error code U_INVALID_CHAR_FOUND will be set. */
|
|
|
|
UCNV_RESET = 3, /**< The callback is called with this reason when a
|
2000-12-13 19:41:47 +00:00
|
|
|
'reset' has occured. Callback should reset all
|
|
|
|
state. */
|
2001-10-12 18:54:09 +00:00
|
|
|
UCNV_CLOSE = 4 /**< Called when the converter is closed. The
|
2000-12-13 19:41:47 +00:00
|
|
|
callback should release any allocated memory.*/
|
2000-06-27 20:47:56 +00:00
|
|
|
} UConverterCallbackReason;
|
|
|
|
|
1999-12-28 23:39:02 +00:00
|
|
|
|
2000-03-22 18:31:40 +00:00
|
|
|
/**
|
2000-06-27 20:47:56 +00:00
|
|
|
* The structure for the fromUnicode callback function parameter.
|
2001-11-13 22:47:47 +00:00
|
|
|
* @stable
|
2000-06-27 20:47:56 +00:00
|
|
|
*/
|
|
|
|
typedef struct {
|
2001-03-21 20:44:20 +00:00
|
|
|
uint16_t size;
|
|
|
|
UBool flush;
|
|
|
|
UConverter *converter;
|
|
|
|
const UChar *source;
|
|
|
|
const UChar *sourceLimit;
|
|
|
|
char *target;
|
|
|
|
const char *targetLimit;
|
|
|
|
int32_t *offsets; /* *offset = blah ; offset++; */
|
2000-06-27 20:47:56 +00:00
|
|
|
} UConverterFromUnicodeArgs;
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The structure for the toUnicode callback function parameter.
|
2001-11-13 22:47:47 +00:00
|
|
|
* @stable
|
2000-03-22 18:31:40 +00:00
|
|
|
*/
|
2000-06-27 20:47:56 +00:00
|
|
|
typedef struct {
|
2001-03-21 20:44:20 +00:00
|
|
|
uint16_t size;
|
|
|
|
UBool flush;
|
|
|
|
UConverter *converter;
|
|
|
|
const char *source;
|
|
|
|
const char *sourceLimit;
|
|
|
|
UChar *target;
|
|
|
|
const UChar *targetLimit;
|
|
|
|
int32_t *offsets;
|
2000-06-27 20:47:56 +00:00
|
|
|
} UConverterToUnicodeArgs;
|
1999-12-28 23:39:02 +00:00
|
|
|
|
|
|
|
|
2000-03-22 18:31:40 +00:00
|
|
|
/**
|
2000-12-13 19:41:47 +00:00
|
|
|
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
|
|
|
* This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
|
|
|
|
* returning the error code back to the caller immediately.
|
2000-03-22 18:31:40 +00:00
|
|
|
* @stable
|
|
|
|
*/
|
2000-06-27 20:47:56 +00:00
|
|
|
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
|
2001-06-26 22:51:14 +00:00
|
|
|
const void *context,
|
2000-06-27 20:47:56 +00:00
|
|
|
UConverterFromUnicodeArgs *fromUArgs,
|
|
|
|
const UChar* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UChar32 codePoint,
|
|
|
|
UConverterCallbackReason reason,
|
2001-03-21 20:44:20 +00:00
|
|
|
UErrorCode * err);
|
1999-12-28 23:39:02 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
2000-06-27 20:47:56 +00:00
|
|
|
/**
|
2000-12-13 19:41:47 +00:00
|
|
|
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
|
|
|
* This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
|
|
|
|
* returning the error code back to the caller immediately.
|
2001-02-16 20:12:50 +00:00
|
|
|
*
|
2000-06-27 20:47:56 +00:00
|
|
|
* @stable
|
|
|
|
*/
|
|
|
|
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
|
2001-06-26 22:51:14 +00:00
|
|
|
const void *context,
|
2000-06-27 20:47:56 +00:00
|
|
|
UConverterToUnicodeArgs *fromUArgs,
|
|
|
|
const char* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UConverterCallbackReason reason,
|
2001-03-21 20:44:20 +00:00
|
|
|
UErrorCode * err);
|
1999-12-28 23:39:02 +00:00
|
|
|
|
2000-03-22 18:31:40 +00:00
|
|
|
/**
|
2000-12-13 19:41:47 +00:00
|
|
|
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
2001-02-16 20:12:50 +00:00
|
|
|
* This From Unicode callback skips any ILLEGAL_SEQUENCE, or
|
|
|
|
* skips only UNASSINGED_SEQUENCE depending on the context parameter
|
2000-12-13 19:41:47 +00:00
|
|
|
* simply ignoring those characters.
|
2001-02-16 20:12:50 +00:00
|
|
|
* @param context: the function currently recognizes the callback options:
|
|
|
|
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
|
|
|
* returning the error code back to the caller immediately.
|
|
|
|
* NULL: Skips any ILLEGAL_SEQUENCE
|
|
|
|
*
|
2000-03-22 18:31:40 +00:00
|
|
|
* @stable
|
|
|
|
*/
|
2000-06-27 20:47:56 +00:00
|
|
|
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
|
2001-06-26 22:51:14 +00:00
|
|
|
const void *context,
|
2000-06-27 20:47:56 +00:00
|
|
|
UConverterFromUnicodeArgs *fromUArgs,
|
|
|
|
const UChar* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UChar32 codePoint,
|
|
|
|
UConverterCallbackReason reason,
|
2001-03-21 20:44:20 +00:00
|
|
|
UErrorCode * err);
|
1999-12-28 23:39:02 +00:00
|
|
|
|
2000-03-22 18:31:40 +00:00
|
|
|
/**
|
2000-12-13 19:41:47 +00:00
|
|
|
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
2001-02-16 20:12:50 +00:00
|
|
|
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
|
|
|
|
* UNASSIGNED_SEQUENCE depending on context parameter, with the
|
2000-12-13 19:41:47 +00:00
|
|
|
* current substitution string for the converter. This is the default
|
|
|
|
* callback.
|
2001-02-16 20:12:50 +00:00
|
|
|
* @param context: the function currently recognizes the callback options:
|
|
|
|
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
|
|
|
* returning the error code back to the caller immediately.
|
|
|
|
* NULL: Substitutes any ILLEGAL_SEQUENCE
|
2000-12-13 19:41:47 +00:00
|
|
|
* @see ucnv_setSubstChars
|
2000-03-22 18:31:40 +00:00
|
|
|
* @stable
|
1999-12-28 23:39:02 +00:00
|
|
|
*/
|
|
|
|
|
2000-06-27 20:47:56 +00:00
|
|
|
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
|
2001-06-26 22:51:14 +00:00
|
|
|
const void *context,
|
2000-06-27 20:47:56 +00:00
|
|
|
UConverterFromUnicodeArgs *fromUArgs,
|
|
|
|
const UChar* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UChar32 codePoint,
|
|
|
|
UConverterCallbackReason reason,
|
2001-03-21 20:44:20 +00:00
|
|
|
UErrorCode * err);
|
1999-12-28 23:39:02 +00:00
|
|
|
|
2000-03-22 18:31:40 +00:00
|
|
|
/**
|
2000-12-13 19:41:47 +00:00
|
|
|
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
|
|
|
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
|
|
|
|
* hexadecimal representation of the illegal codepoints
|
2001-02-16 20:12:50 +00:00
|
|
|
|
|
|
|
* @param context: the function currently recognizes the callback options:
|
|
|
|
*
|
|
|
|
* UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal
|
|
|
|
* representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
|
|
|
|
* In the Event the converter doesn't support the characters {u,%}[A-F][0-9],
|
|
|
|
* it will substitute the illegal sequence with the substitution characters.
|
|
|
|
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
|
|
|
* %UD84D%UDC56
|
|
|
|
* UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal
|
|
|
|
* representation in the format \uXXXX, e.g. "\uFFFE\u00AC\uC8FE").
|
|
|
|
* In the Event the converter doesn't support the characters {u,\}[A-F][0-9],
|
|
|
|
* it will substitute the illegal sequence with the substitution characters.
|
|
|
|
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
|
|
|
* \uD84D\uDC56
|
|
|
|
* UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal
|
|
|
|
* representation in the format \uXXXX, e.g. "\uFFFE\u00AC\uC8FE").
|
|
|
|
* In the Event the converter doesn't support the characters {u,U,\}[A-F][0-9],
|
|
|
|
* it will substitute the illegal sequence with the substitution characters.
|
|
|
|
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
|
|
|
* \U00023456
|
|
|
|
* UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal
|
|
|
|
* representation in the format &#DDDDDDDD, e.g. "¬죾").
|
|
|
|
* In the Event the converter doesn't support the characters {&,#}[0-9],
|
|
|
|
* it will substitute the illegal sequence with the substitution characters.
|
|
|
|
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
|
|
|
* 𣑖 and Zero padding is ignored.
|
|
|
|
* UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal
|
|
|
|
* representation in the format &#xXXXX, e.g. "¬죾").
|
|
|
|
* In the Event the converter doesn't support the characters {&,#,x}[0-9],
|
|
|
|
* it will substitute the illegal sequence with the substitution characters.
|
|
|
|
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
|
|
|
|
* 𣑖
|
2000-03-22 18:31:40 +00:00
|
|
|
* @stable
|
1999-12-28 23:39:02 +00:00
|
|
|
*/
|
|
|
|
|
2000-06-27 20:47:56 +00:00
|
|
|
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
|
2001-06-26 22:51:14 +00:00
|
|
|
const void *context,
|
2000-06-27 20:47:56 +00:00
|
|
|
UConverterFromUnicodeArgs *fromUArgs,
|
|
|
|
const UChar* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UChar32 codePoint,
|
|
|
|
UConverterCallbackReason reason,
|
2001-03-21 20:44:20 +00:00
|
|
|
UErrorCode * err);
|
1999-12-28 23:39:02 +00:00
|
|
|
|
|
|
|
|
2000-03-22 18:31:40 +00:00
|
|
|
/**
|
2000-12-13 19:41:47 +00:00
|
|
|
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
2001-02-16 20:12:50 +00:00
|
|
|
* This To Unicode callback skips any ILLEGAL_SEQUENCE, or
|
|
|
|
* skips only UNASSINGED_SEQUENCE depending on the context parameter
|
2000-12-13 19:41:47 +00:00
|
|
|
* simply ignoring those characters.
|
2001-02-16 20:12:50 +00:00
|
|
|
* @param context: the function currently recognizes the callback options:
|
|
|
|
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
|
|
|
* returning the error code back to the caller immediately.
|
|
|
|
* NULL: Skips any ILLEGAL_SEQUENCE
|
|
|
|
*
|
2000-03-22 18:31:40 +00:00
|
|
|
* @stable
|
|
|
|
*/
|
2000-06-27 20:47:56 +00:00
|
|
|
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
|
2001-06-26 22:51:14 +00:00
|
|
|
const void *context,
|
2000-06-27 20:47:56 +00:00
|
|
|
UConverterToUnicodeArgs *fromUArgs,
|
|
|
|
const char* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UConverterCallbackReason reason,
|
2001-03-21 20:44:20 +00:00
|
|
|
UErrorCode * err);
|
1999-12-28 23:39:02 +00:00
|
|
|
|
2000-03-22 18:31:40 +00:00
|
|
|
/**
|
2000-12-13 19:41:47 +00:00
|
|
|
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
2001-02-16 20:12:50 +00:00
|
|
|
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
|
|
|
|
* UNASSIGNED_SEQUENCE depending on context parameter, with the
|
2000-12-13 19:41:47 +00:00
|
|
|
* Unicode substitution character, U+FFFD.
|
2001-02-16 20:12:50 +00:00
|
|
|
* @param context: the function currently recognizes the callback options:
|
|
|
|
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
|
|
|
|
* returning the error code back to the caller immediately.
|
|
|
|
* NULL: Substitutes any ILLEGAL_SEQUENCE
|
2000-03-22 18:31:40 +00:00
|
|
|
* @stable
|
1999-12-28 23:39:02 +00:00
|
|
|
*/
|
2000-06-27 20:47:56 +00:00
|
|
|
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
|
2001-06-26 22:51:14 +00:00
|
|
|
const void *context,
|
2000-06-27 20:47:56 +00:00
|
|
|
UConverterToUnicodeArgs *fromUArgs,
|
|
|
|
const char* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UConverterCallbackReason reason,
|
2001-03-21 20:44:20 +00:00
|
|
|
UErrorCode * err);
|
1999-12-28 23:39:02 +00:00
|
|
|
|
2000-03-22 18:31:40 +00:00
|
|
|
/**
|
2000-12-13 19:41:47 +00:00
|
|
|
* DO NOT CALL THIS FUNCTION DIRECTLY!
|
|
|
|
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
|
|
|
|
* hexadecimal representation of the illegal bytes
|
|
|
|
* (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
|
2000-03-22 18:31:40 +00:00
|
|
|
* @stable
|
1999-12-28 23:39:02 +00:00
|
|
|
*/
|
|
|
|
|
2000-06-27 20:47:56 +00:00
|
|
|
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
|
2001-06-26 22:51:14 +00:00
|
|
|
const void *context,
|
2000-06-27 20:47:56 +00:00
|
|
|
UConverterToUnicodeArgs *fromUArgs,
|
|
|
|
const char* codeUnits,
|
|
|
|
int32_t length,
|
|
|
|
UConverterCallbackReason reason,
|
2001-03-21 20:44:20 +00:00
|
|
|
UErrorCode * err);
|
1999-12-28 23:39:02 +00:00
|
|
|
|
2000-12-13 19:41:47 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*UCNV_ERR_H*/
|