1bf5a90665
X-SVN-Rev: 145
3439 lines
91 KiB
C
3439 lines
91 KiB
C
/*
|
|
********************************************************************************
|
|
* *
|
|
* COPYRIGHT: *
|
|
* (C) Copyright International Business Machines Corporation, 1998 *
|
|
* Licensed Material - Program-Property of IBM - All Rights Reserved. *
|
|
* US Government Users Restricted Rights - Use, duplication, or disclosure *
|
|
* restricted by GSA ADP Schedule Contract with IBM Corp. *
|
|
* *
|
|
********************************************************************************
|
|
*
|
|
*
|
|
* uconv_cnv.c:
|
|
* Implements all the low level conversion functions
|
|
* T_UnicodeConverter_{to,from}Unicode_$ConversionType
|
|
*
|
|
*/
|
|
|
|
#include "utypes.h"
|
|
#include "uhash.h"
|
|
#include "ucmp16.h"
|
|
#include "ucmp8.h"
|
|
#include "ucnv_bld.h"
|
|
#include "ucnv_err.h"
|
|
#include "ucnv_cnv.h"
|
|
#include "ucnv.h"
|
|
#include "cmemory.h"
|
|
|
|
#ifdef Debug
|
|
#include <stdio.h>
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
void flushInternalUnicodeBuffer (UConverter * _this,
|
|
UChar * myTarget,
|
|
int32_t * myTargetIndex,
|
|
int32_t targetLength,
|
|
int32_t** offsets,
|
|
UErrorCode * err);
|
|
|
|
void flushInternalCharBuffer (UConverter * _this,
|
|
char *myTarget,
|
|
int32_t * myTargetIndex,
|
|
int32_t targetLength,
|
|
int32_t** offsets,
|
|
UErrorCode * err);
|
|
|
|
#define FromU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
|
|
if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
|
|
else \
|
|
{ \
|
|
char *myTargetCopy = myTarget + myTargetIndex; \
|
|
const UChar *mySourceCopy = mySource + mySourceIndex; \
|
|
/*copies current values for the ErrorFunctor to update */ \
|
|
/*Calls the ErrorFunctor */ \
|
|
_this->fromUCharErrorBehaviour (_this, \
|
|
(char **) &myTargetCopy, \
|
|
targetLimit, \
|
|
(const UChar **) &mySourceCopy, \
|
|
sourceLimit, \
|
|
offsets, \
|
|
flush, \
|
|
err); \
|
|
/*Update the local Indexes so that the conversion can restart at the right points */ \
|
|
mySourceIndex = (mySourceCopy - mySource) ; \
|
|
myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \
|
|
}
|
|
|
|
#define ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
|
|
if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \
|
|
else \
|
|
{ \
|
|
UChar *myTargetCopy = myTarget + myTargetIndex; \
|
|
const char *mySourceCopy = mySource + mySourceIndex; \
|
|
/*Calls the ErrorFunctor */ \
|
|
_this->fromCharErrorBehaviour (_this, \
|
|
&myTargetCopy, \
|
|
targetLimit, \
|
|
(const char **) &mySourceCopy, \
|
|
sourceLimit, \
|
|
offsets, \
|
|
flush, \
|
|
err); \
|
|
/*Update the local Indexes so that the conversion can restart at the right points */ \
|
|
mySourceIndex = ((char*)mySourceCopy - (char*)mySource); \
|
|
myTargetIndex = (myTargetCopy - myTarget); \
|
|
}
|
|
|
|
#define FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
|
|
if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
|
|
else \
|
|
{ \
|
|
char *myTargetCopy = myTarget + myTargetIndex; \
|
|
const UChar *mySourceCopy = mySource + mySourceIndex; \
|
|
int32_t My_i = myTargetIndex; \
|
|
/*copies current values for the ErrorFunctor to update */ \
|
|
/*Calls the ErrorFunctor */ \
|
|
_this->fromUCharErrorBehaviour (_this, \
|
|
(char **) &myTargetCopy, \
|
|
targetLimit, \
|
|
(const UChar **) &mySourceCopy, \
|
|
sourceLimit, \
|
|
offsets + myTargetIndex, \
|
|
flush, \
|
|
err); \
|
|
/*Update the local Indexes so that the conversion can restart at the right points */ \
|
|
mySourceIndex = mySourceCopy - mySource ; \
|
|
myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \
|
|
for (;My_i < myTargetIndex;My_i++) offsets[My_i] += currentOffset ; \
|
|
}
|
|
|
|
|
|
|
|
#define ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
|
|
if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \
|
|
else \
|
|
{ \
|
|
UChar *myTargetCopy = myTarget + myTargetIndex; \
|
|
const char *mySourceCopy = mySource + mySourceIndex; \
|
|
int32_t My_i = myTargetIndex; \
|
|
_this->fromCharErrorBehaviour (_this, \
|
|
&myTargetCopy, \
|
|
targetLimit, \
|
|
(const char **) &mySourceCopy, \
|
|
sourceLimit, \
|
|
offsets + myTargetIndex, \
|
|
flush, \
|
|
err); \
|
|
/*Update the local Indexes so that the conversion can restart at the right points */ \
|
|
mySourceIndex = (char *)mySourceCopy - (char*)mySource; \
|
|
myTargetIndex = ((UChar*)myTargetCopy - (UChar*)myTarget); \
|
|
for (;My_i < myTargetIndex;My_i++) {offsets[My_i] += currentOffset ; } \
|
|
}
|
|
|
|
|
|
|
|
/* UTF-8 Conversion DATA
|
|
* for more information see Unicode Strandard 2.0 , Transformation Formats Appendix A-9
|
|
*/
|
|
const uint32_t kReplacementCharacter = 0x0000FFFD;
|
|
const uint32_t kMaximumUCS2 = 0x0000FFFF;
|
|
const uint32_t kMaximumUTF16 = 0x0010FFFF;
|
|
const uint32_t kMaximumUCS4 = 0x7FFFFFFF;
|
|
const int8_t halfShift = 10;
|
|
const uint32_t halfBase = 0x0010000;
|
|
const uint32_t halfMask = 0x3FF;
|
|
const uint32_t kSurrogateHighStart = 0xD800;
|
|
const uint32_t kSurrogateHighEnd = 0xDBFF;
|
|
const uint32_t kSurrogateLowStart = 0xDC00;
|
|
const uint32_t kSurrogateLowEnd = 0xDFFF;
|
|
|
|
const uint32_t offsetsFromUTF8[7] = {0,
|
|
(uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
|
|
(uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080
|
|
};
|
|
|
|
#define ESC_2022 0x1B /*ESC*/
|
|
typedef enum
|
|
{
|
|
INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
|
|
VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
|
|
VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
|
|
VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
|
|
} UCNV_TableStates_2022;
|
|
|
|
/*Below are the 3 arrays depicting a state transition table*/
|
|
int8_t normalize_esq_chars_2022[256] = {
|
|
0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,0 ,0
|
|
,2 ,0 ,0 ,0 ,0 ,3 ,0 ,6 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12
|
|
,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,0 ,0
|
|
,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
|
,0 ,0 ,0 ,0 ,0 ,0};
|
|
#define MAX_STATES_2022 54
|
|
int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
|
|
1 ,34 ,36 ,39 ,1093 ,1096 ,1097 ,1098 ,1099 ,1100
|
|
,1101 ,1102 ,1103 ,1104 ,1105 ,1106 ,1109 ,1154 ,1157 ,1160
|
|
,1161 ,1254 ,1257 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940
|
|
,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,40133 ,40136 ,40138
|
|
,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630
|
|
,35947631 ,35947635 ,35947636 ,35947638};
|
|
|
|
const char* escSeqStateTable_Result_2022[MAX_STATES_2022] = {
|
|
NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865"
|
|
,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-895" ,"ibm-943" ,"latin1" ,"latin1" ,NULL ,"ibm-955" ,"ibm-367"
|
|
,"ibm-952" ,NULL ,"UTF8" ,NULL ,"ibm-955" ,"bm-367" ,"ibm-952" ,"ibm-949" ,"ibm-953" ,"ibm-1383"
|
|
,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
|
|
,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089"
|
|
,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"};
|
|
|
|
UCNV_TableStates_2022 escSeqStateTable_Value_2022[MAX_STATES_2022] = {
|
|
VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
|
|
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
|
|
,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
|
|
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
|
|
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
|
|
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022};
|
|
|
|
/*for 2022 looks ahead in the stream
|
|
*to determine the longest possible convertible
|
|
*data stream*/
|
|
static const char* getEndOfBuffer_2022(const char* source,
|
|
const char* sourceLimit,
|
|
bool_t flush);
|
|
/*runs through a state machine to determine the escape sequence - codepage correspondance
|
|
*changes the pointer pointed to be _this->extraInfo*/
|
|
static void changeState_2022(UConverter* _this,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
bool_t flush,
|
|
UErrorCode* err);
|
|
|
|
UCNV_TableStates_2022 getKey_2022(char source,
|
|
int32_t* key,
|
|
int32_t* offset);
|
|
|
|
/* END OF UTF-8 Conversion DATA */
|
|
|
|
const int8_t bytesFromUTF8[256] = {
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
|
|
};
|
|
|
|
const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};
|
|
#define missingCharMarker 0xFFFF
|
|
#define missingUCharMarker 0xFFFD
|
|
|
|
|
|
|
|
void T_UConverter_toUnicode_SBCS (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
char *mySource = (char *) *source;
|
|
UChar *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - (char *) mySource;
|
|
UChar *myToUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
|
|
myToUnicode = _this->sharedData->table->sbcs.toUnicode;
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
|
|
/*writing the UniChar to the output stream */
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
/*gets the corresponding UniChar */
|
|
targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]];
|
|
|
|
if (targetUniChar != missingUCharMarker)
|
|
{
|
|
/* writes the UniChar to the output stream */
|
|
myTarget[myTargetIndex++] = targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
_this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
|
|
_this->invalidCharLength = 1;
|
|
|
|
ToU_CALLBACK_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidCharLength = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
void T_UConverter_toUnicode_DBCS (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const char *mySource = ( char *) *source;
|
|
UChar *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - (char *) mySource;
|
|
CompactShortArray *myToUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
UChar mySourceChar = 0x0000;
|
|
|
|
myToUnicode = _this->sharedData->table->dbcs.toUnicode;
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
/*gets the corresponding UniChar */
|
|
mySourceChar = (unsigned char) mySource[mySourceIndex++];
|
|
|
|
/*We have no internal state, we should */
|
|
if (_this->toUnicodeStatus == 0x00)
|
|
{
|
|
_this->toUnicodeStatus = (unsigned char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
if (_this->toUnicodeStatus != 0x00)
|
|
{
|
|
mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | (mySourceChar & 0x00FF));
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
|
|
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
|
|
|
/*writing the UniChar to the output stream */
|
|
if (targetUniChar != missingUCharMarker)
|
|
{
|
|
/*writes the UniChar to the output stream */
|
|
myTarget[myTargetIndex++] = targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
|
_this->invalidCharBuffer[1] = (char) mySourceChar;
|
|
_this->invalidCharLength = 2;
|
|
|
|
ToU_CALLBACK_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*If at the end of conversion we are still carrying state information
|
|
*flush is TRUE, we can deduce that the input stream is truncated
|
|
*/
|
|
if ((flush == TRUE)
|
|
&& (mySourceIndex == sourceLength)
|
|
&& (_this->toUnicodeStatus != 0x00))
|
|
{
|
|
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
void T_UConverter_toUnicode_LATIN_1 (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
unsigned char *mySource = (unsigned char *) *source;
|
|
UChar *myTarget = *target;
|
|
int32_t sourceLength = sourceLimit - (char *) mySource;
|
|
int32_t readLen = 0;
|
|
int32_t i = 0;
|
|
|
|
/*Since there is no risk of encountering illegal Chars
|
|
*we need to pad our latin1 chars to create Unicode codepoints
|
|
*we need to go as far a min(targetLen, sourceLen)
|
|
*in case we don't have enough buffer space
|
|
*we set the error flag accordingly
|
|
*/
|
|
if ((targetLimit - *target) < sourceLength)
|
|
{
|
|
readLen = targetLimit - *target;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
else
|
|
{
|
|
readLen = sourceLimit - (char *) mySource;
|
|
}
|
|
|
|
for (i = 0; i < readLen; i++) myTarget[i] = (UChar) mySource[i];
|
|
|
|
*target += i;
|
|
*source += i;
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_fromUnicode_LATIN_1 (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const UChar *mySource = *source;
|
|
unsigned char *myTarget = (unsigned char *) *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - (char *) myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
|
|
/*writing the char to the output stream */
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
if (mySource[mySourceIndex] < 0x0100)
|
|
{
|
|
/*writes the char to the output stream */
|
|
myTarget[myTargetIndex++] = (char) mySource[mySourceIndex++];
|
|
}
|
|
else
|
|
{
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
_this->invalidUCharBuffer[0] = (UChar) mySource[mySourceIndex++];
|
|
_this->invalidUCharLength = 1;
|
|
|
|
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
|
|
FromU_CALLBACK_MACRO(_this,
|
|
(char *)myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidUCharLength = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
void T_UConverter_fromUnicode_SBCS (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const UChar *mySource = *source;
|
|
unsigned char *myTarget = (unsigned char *) *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - (char *) myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
CompactByteArray *myFromUnicode;
|
|
unsigned char targetChar = 0x00;
|
|
|
|
myFromUnicode = _this->sharedData->table->sbcs.fromUnicode;
|
|
|
|
/*writing the char to the output stream */
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]);
|
|
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
mySourceIndex++;
|
|
if (targetChar != 0 || !mySource[mySourceIndex - 1])
|
|
{
|
|
/*writes the char to the output stream */
|
|
myTarget[myTargetIndex++] = targetChar;
|
|
}
|
|
else
|
|
{
|
|
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
_this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
|
|
_this->invalidUCharLength = 1;
|
|
|
|
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
|
|
FromU_CALLBACK_MACRO(_this,
|
|
(char *)myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
if (U_FAILURE (*err))
|
|
{
|
|
break;
|
|
}
|
|
_this->invalidUCharLength = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const char *mySource = *source;
|
|
UChar *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
CompactShortArray *myToUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
UChar mySourceChar = 0x0000;
|
|
int32_t myMode = _this->mode;
|
|
|
|
|
|
myToUnicode = _this->sharedData->table->dbcs.toUnicode;
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
/*gets the corresponding UniChar */
|
|
mySourceChar = (unsigned char) (mySource[mySourceIndex++]);
|
|
if (mySourceChar == UCNV_SI) myMode = UCNV_SI;
|
|
else if (mySourceChar == UCNV_SO) myMode = UCNV_SO;
|
|
else if ((myMode == UCNV_SO) &&
|
|
(_this->toUnicodeStatus == 0x00))
|
|
{
|
|
_this->toUnicodeStatus = (unsigned char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
/*In case there is a state, we update the source char
|
|
*by concatenating the previous char with the current
|
|
*one
|
|
*/
|
|
if (_this->toUnicodeStatus != 0x00)
|
|
{
|
|
mySourceChar |= (UChar) (_this->toUnicodeStatus << 8);
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
else mySourceChar &= 0x00FF;
|
|
|
|
/*gets the corresponding Unicode codepoint */
|
|
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
|
|
|
/*writing the UniChar to the output stream */
|
|
if (targetUniChar != missingUCharMarker)
|
|
{
|
|
/*writes the UniChar to the output stream */
|
|
myTarget[myTargetIndex++] = targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
if (mySourceChar > 0xff)
|
|
{
|
|
_this->invalidCharLength = 2;
|
|
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
|
_this->invalidCharBuffer[1] = (char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
_this->invalidCharLength = 1;
|
|
_this->invalidCharBuffer[0] = (char) mySourceChar;
|
|
}
|
|
_this->mode = myMode;
|
|
ToU_CALLBACK_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*If at the end of conversion we are still carrying state information
|
|
*flush is TRUE, we can deduce that the input stream is truncated
|
|
*/
|
|
if (_this->toUnicodeStatus
|
|
&& (mySourceIndex == sourceLength)
|
|
&& (flush == TRUE))
|
|
{
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
_this->mode = myMode;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const char *mySource = *source;
|
|
UChar *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
CompactShortArray *myToUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
UChar mySourceChar = 0x0000;
|
|
int32_t myMode = _this->mode;
|
|
int32_t* originalOffsets = offsets;
|
|
|
|
|
|
myToUnicode = _this->sharedData->table->dbcs.toUnicode;
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
/*gets the corresponding UniChar */
|
|
mySourceChar = (unsigned char) (mySource[mySourceIndex++]);
|
|
if (mySourceChar == UCNV_SI) myMode = UCNV_SI;
|
|
else if (mySourceChar == UCNV_SO) myMode = UCNV_SO;
|
|
else if ((myMode == UCNV_SO) &&
|
|
(_this->toUnicodeStatus == 0x00))
|
|
{
|
|
_this->toUnicodeStatus = (unsigned char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
/*In case there is a state, we update the source char
|
|
*by concatenating the previous char with the current
|
|
*one
|
|
*/
|
|
if (_this->toUnicodeStatus != 0x00)
|
|
{
|
|
mySourceChar |= (UChar) (_this->toUnicodeStatus << 8);
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
else mySourceChar &= 0x00FF;
|
|
|
|
/*gets the corresponding Unicode codepoint */
|
|
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
|
|
|
/*writing the UniChar to the output stream */
|
|
if (targetUniChar != missingUCharMarker)
|
|
{
|
|
/*writes the UniChar to the output stream */
|
|
{
|
|
if(myMode == UCNV_SO)
|
|
offsets[myTargetIndex] = mySourceIndex-2; /* double byte */
|
|
else
|
|
offsets[myTargetIndex] = mySourceIndex-1; /* single byte */
|
|
}
|
|
myTarget[myTargetIndex++] = targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
int32_t currentOffset = offsets[myTargetIndex-1] + 2;/* Because mySourceIndex was already incremented */
|
|
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
if (mySourceChar > 0xFF)
|
|
{
|
|
_this->invalidCharLength = 2;
|
|
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
|
_this->invalidCharBuffer[1] = (char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
_this->invalidCharLength = 1;
|
|
_this->invalidCharBuffer[0] = (char) mySourceChar;
|
|
}
|
|
_this->mode = myMode;
|
|
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*If at the end of conversion we are still carrying state information
|
|
*flush is TRUE, we can deduce that the input stream is truncated
|
|
*/
|
|
if (_this->toUnicodeStatus
|
|
&& (mySourceIndex == sourceLength)
|
|
&& (flush == TRUE))
|
|
{
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
_this->mode = myMode;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
void T_UConverter_toUnicode_MBCS (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const char *mySource = *source;
|
|
UChar *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
CompactShortArray *myToUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
UChar mySourceChar = 0x0000;
|
|
bool_t *myStarters = NULL;
|
|
|
|
|
|
|
|
|
|
myToUnicode = _this->sharedData->table->mbcs.toUnicode;
|
|
myStarters = _this->sharedData->table->mbcs.starters;
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
/*gets the corresponding UniChar */
|
|
mySourceChar = (unsigned char) (mySource[mySourceIndex++]);
|
|
|
|
|
|
if (myStarters[(uint8_t) mySourceChar] &&
|
|
(_this->toUnicodeStatus == 0x00))
|
|
{
|
|
_this->toUnicodeStatus = (unsigned char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
/*In case there is a state, we update the source char
|
|
*by concatenating the previous char with the current
|
|
*one
|
|
*/
|
|
|
|
if (_this->toUnicodeStatus != 0x00)
|
|
{
|
|
mySourceChar |= (UChar) (_this->toUnicodeStatus << 8);
|
|
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
|
|
/*gets the corresponding Unicode codepoint */
|
|
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
|
|
|
/*writing the UniChar to the output stream */
|
|
if (targetUniChar != missingUCharMarker)
|
|
{
|
|
myTarget[myTargetIndex++] = targetUniChar;
|
|
|
|
}
|
|
else
|
|
{
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
if (mySourceChar > 0xff)
|
|
{
|
|
_this->invalidCharLength = 2;
|
|
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
|
_this->invalidCharBuffer[1] = (char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
_this->invalidCharLength = 1;
|
|
_this->invalidCharBuffer[0] = (char) mySourceChar;
|
|
}
|
|
|
|
ToU_CALLBACK_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*If at the end of conversion we are still carrying state information
|
|
*flush is TRUE, we can deduce that the input stream is truncated
|
|
*/
|
|
if (_this->toUnicodeStatus
|
|
&& (mySourceIndex == sourceLength)
|
|
&& (flush == TRUE))
|
|
{
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const char *mySource = *source;
|
|
UChar *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
CompactShortArray *myToUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
UChar mySourceChar = 0x0000;
|
|
UChar oldMySourceChar;
|
|
bool_t *myStarters = NULL;
|
|
int32_t* originalOffsets = offsets;
|
|
|
|
|
|
|
|
myToUnicode = _this->sharedData->table->mbcs.toUnicode;
|
|
myStarters = _this->sharedData->table->mbcs.starters;
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
/*gets the corresponding UniChar */
|
|
mySourceChar = (unsigned char) (mySource[mySourceIndex++]);
|
|
|
|
|
|
if (myStarters[(uint8_t) mySourceChar] &&
|
|
(_this->toUnicodeStatus == 0x00))
|
|
{
|
|
_this->toUnicodeStatus = (unsigned char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
/*In case there is a state, we update the source char
|
|
*by concatenating the previous char with the current
|
|
*one
|
|
*/
|
|
|
|
if (_this->toUnicodeStatus != 0x00)
|
|
{
|
|
mySourceChar |= (UChar) (_this->toUnicodeStatus << 8);
|
|
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
|
|
/*gets the corresponding Unicode codepoint */
|
|
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
|
|
|
|
|
/*writing the UniChar to the output stream */
|
|
if (targetUniChar != missingUCharMarker)
|
|
{
|
|
/*writes the UniChar to the output stream */
|
|
{
|
|
|
|
|
|
if (targetUniChar > 0x00FF)
|
|
offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */
|
|
else
|
|
offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */
|
|
|
|
|
|
}
|
|
myTarget[myTargetIndex++] = targetUniChar;
|
|
oldMySourceChar = mySourceChar;
|
|
|
|
}
|
|
else
|
|
{
|
|
int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1);
|
|
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
if (mySourceChar > 0xff)
|
|
{
|
|
_this->invalidCharLength = 2;
|
|
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
|
_this->invalidCharBuffer[1] = (char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
_this->invalidCharLength = 1;
|
|
_this->invalidCharBuffer[0] = (char) mySourceChar;
|
|
}
|
|
|
|
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*If at the end of conversion we are still carrying state information
|
|
*flush is TRUE, we can deduce that the input stream is truncated
|
|
*/
|
|
if (_this->toUnicodeStatus
|
|
&& (mySourceIndex == sourceLength)
|
|
&& (flush == TRUE))
|
|
{
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
void T_UConverter_fromUnicode_EBCDIC_STATEFUL (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
|
|
{
|
|
const UChar *mySource = *source;
|
|
char *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
CompactShortArray *myFromUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
int8_t targetUniCharByteNum = 0;
|
|
UChar mySourceChar = 0x0000;
|
|
bool_t isTargetUCharDBCS = (bool_t)_this->fromUnicodeStatus;
|
|
bool_t oldIsTargetUCharDBCS = isTargetUCharDBCS;
|
|
myFromUnicode = _this->sharedData->table->dbcs.fromUnicode;
|
|
|
|
/*writing the char to the output stream */
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
mySourceChar = (UChar) mySource[mySourceIndex++];
|
|
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
|
oldIsTargetUCharDBCS = isTargetUCharDBCS;
|
|
isTargetUCharDBCS = (targetUniChar>0x00FF);
|
|
|
|
if (targetUniChar != missingCharMarker)
|
|
{
|
|
if (oldIsTargetUCharDBCS != isTargetUCharDBCS)
|
|
{
|
|
if (isTargetUCharDBCS) myTarget[myTargetIndex++] = UCNV_SO;
|
|
else myTarget[myTargetIndex++] = UCNV_SI;
|
|
|
|
|
|
if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength))
|
|
{
|
|
_this->charErrorBuffer[0] = (char) targetUniChar;
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
else if (myTargetIndex+1 >= targetLength)
|
|
{
|
|
_this->charErrorBuffer[0] = (char) (targetUniChar >> 8);
|
|
_this->charErrorBuffer[1] = (char) targetUniChar & 0x00FF;
|
|
_this->charErrorBufferLength = 2;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
if (!isTargetUCharDBCS)
|
|
{
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[0] = (char) targetUniChar;
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
isTargetUCharDBCS = oldIsTargetUCharDBCS;
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
|
|
_this->invalidUCharLength = 1;
|
|
|
|
_this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
|
|
FromU_CALLBACK_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidUCharLength = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
|
|
_this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
|
|
{
|
|
const UChar *mySource = *source;
|
|
char *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
CompactShortArray *myFromUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
int8_t targetUniCharByteNum = 0;
|
|
UChar mySourceChar = 0x0000;
|
|
bool_t isTargetUCharDBCS = (bool_t)_this->fromUnicodeStatus;
|
|
bool_t oldIsTargetUCharDBCS = isTargetUCharDBCS;
|
|
int32_t* originalOffsets = offsets;
|
|
|
|
myFromUnicode = _this->sharedData->table->dbcs.fromUnicode;
|
|
|
|
/*writing the char to the output stream */
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
mySourceChar = (UChar) mySource[mySourceIndex++];
|
|
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
|
oldIsTargetUCharDBCS = isTargetUCharDBCS;
|
|
isTargetUCharDBCS = (targetUniChar>0x00FF);
|
|
|
|
if (targetUniChar != missingCharMarker)
|
|
{
|
|
if (oldIsTargetUCharDBCS != isTargetUCharDBCS)
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
if (isTargetUCharDBCS) myTarget[myTargetIndex++] = UCNV_SO;
|
|
else myTarget[myTargetIndex++] = UCNV_SI;
|
|
|
|
|
|
if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength))
|
|
{
|
|
_this->charErrorBuffer[0] = (char) targetUniChar;
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
else if (myTargetIndex+1 >= targetLength)
|
|
{
|
|
_this->charErrorBuffer[0] = (char) (targetUniChar >> 8);
|
|
_this->charErrorBuffer[1] = (char) targetUniChar & 0x00FF;
|
|
_this->charErrorBufferLength = 2;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!isTargetUCharDBCS)
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[0] = (char) targetUniChar;
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int32_t currentOffset = offsets[myTargetIndex-1]+1;
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
|
|
_this->invalidUCharLength = 1;
|
|
|
|
/* Breaks out of the loop since behaviour was set to stop */
|
|
_this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
|
|
FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidUCharLength = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;;
|
|
|
|
_this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_fromUnicode_MBCS (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
|
|
{
|
|
const UChar *mySource = *source;
|
|
char *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
CompactShortArray *myFromUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
int8_t targetUniCharByteNum = 0;
|
|
UChar mySourceChar = 0x0000;
|
|
|
|
myFromUnicode = _this->sharedData->table->mbcs.fromUnicode;
|
|
|
|
/*writing the char to the output stream */
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
mySourceChar = (UChar) mySource[mySourceIndex++];
|
|
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
|
|
|
|
|
if (targetUniChar != missingCharMarker)
|
|
{
|
|
if (targetUniChar <= 0x00FF)
|
|
{
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[0] = (char) targetUniChar;
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
|
|
_this->invalidUCharLength = 1;
|
|
|
|
FromU_CALLBACK_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidUCharLength = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;;
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
|
|
{
|
|
const UChar *mySource = *source;
|
|
char *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
CompactShortArray *myFromUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
int8_t targetUniCharByteNum = 0;
|
|
UChar mySourceChar = 0x0000;
|
|
int32_t* originalOffsets = offsets;
|
|
|
|
myFromUnicode = _this->sharedData->table->mbcs.fromUnicode;
|
|
|
|
|
|
|
|
/*writing the char to the output stream */
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
mySourceChar = (UChar) mySource[mySourceIndex++];
|
|
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
|
|
|
if (targetUniChar != missingCharMarker)
|
|
{
|
|
if (targetUniChar <= 0x00FF)
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
|
|
}
|
|
else
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[0] = (char) targetUniChar;
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int32_t currentOffset = mySourceIndex -1;
|
|
int32_t* offsetsAnchor = offsets;
|
|
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
|
|
_this->invalidUCharLength = 1;
|
|
|
|
FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidUCharLength = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;;
|
|
|
|
|
|
return;
|
|
}
|
|
void T_UConverter_fromUnicode_ISO_2022(UConverter* _this,
|
|
char** target,
|
|
const char* targetLimit,
|
|
const UChar** source,
|
|
const UChar* sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode* err)
|
|
{
|
|
char const* targetStart = *target;
|
|
T_UConverter_fromUnicode_UTF8(_this,
|
|
target,
|
|
targetLimit,
|
|
source,
|
|
sourceLimit,
|
|
NULL,
|
|
flush,
|
|
err);
|
|
}
|
|
|
|
|
|
void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverter* _this,
|
|
char** target,
|
|
const char* targetLimit,
|
|
const UChar** source,
|
|
const UChar* sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode* err)
|
|
{
|
|
|
|
char const* targetStart = *target;
|
|
T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC(_this,
|
|
target,
|
|
targetLimit,
|
|
source,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
{
|
|
int32_t len = *target - targetStart;
|
|
int32_t i;
|
|
/* icu_memmove(offsets+3, offsets, len); MEMMOVE SEEMS BROKEN --srl */
|
|
|
|
for(i=len-1;i>=0;i--) offsets[i] = offsets[i];
|
|
|
|
}
|
|
}
|
|
|
|
UCNV_TableStates_2022 getKey_2022(char c,
|
|
int32_t* key,
|
|
int32_t* offset)
|
|
{
|
|
int32_t togo = *key;
|
|
int32_t low = 0;
|
|
int32_t hi = MAX_STATES_2022;
|
|
int32_t oldmid;
|
|
|
|
if (*key == 0) togo = normalize_esq_chars_2022[c];
|
|
else
|
|
{
|
|
togo <<= 5;
|
|
togo += normalize_esq_chars_2022[c];
|
|
}
|
|
|
|
while (hi != low) /*binary search*/
|
|
{
|
|
register int32_t mid = (hi+low) >> 1; /*Finds median*/
|
|
|
|
if (mid == oldmid) break;
|
|
if (escSeqStateTable_Key_2022[mid] > togo) hi = mid;
|
|
else if (escSeqStateTable_Key_2022[mid] < togo) low = mid;
|
|
else /*we found it*/
|
|
{
|
|
*key = togo;
|
|
*offset = mid;
|
|
#ifdef Debug
|
|
printf("found at @ %d\n", mid);
|
|
#endif /*Debug*/
|
|
return escSeqStateTable_Value_2022[mid];
|
|
}
|
|
oldmid = mid;
|
|
|
|
}
|
|
|
|
#ifdef Debug
|
|
printf("Could not find \"%d\" for %X\n", togo, c);
|
|
#endif /*Debug*/
|
|
*key = 0;
|
|
*offset = 0;
|
|
|
|
|
|
return INVALID_2022;
|
|
}
|
|
|
|
void changeState_2022(UConverter* _this,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
bool_t flush,
|
|
UErrorCode* err)
|
|
{
|
|
UConverter* myUConverter;
|
|
uint32_t key = _this->toUnicodeStatus;
|
|
UCNV_TableStates_2022 value;
|
|
UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
|
|
const char* chosenConverterName = NULL;
|
|
int32_t offset;
|
|
|
|
/*Close the old Converter*/
|
|
if (_this->mode == UCNV_SO) ucnv_close(myData2022->currentConverter);
|
|
myData2022->currentConverter = NULL;
|
|
_this->mode = UCNV_SI;
|
|
|
|
/*In case we were in the process of consuming an escape sequence
|
|
we need to reprocess it */
|
|
|
|
do
|
|
{
|
|
#ifdef Debug
|
|
printf("Pre Stage: char = %x, key = %d, value =%d\n", **source, key, value);
|
|
fflush(stdout);
|
|
#endif /*Debug*/
|
|
/* Needed explicit cast for key on MVS to make compiler happy - JJD */
|
|
value = getKey_2022(**source,(int32_t *) &key, &offset);
|
|
#ifdef Debug
|
|
printf("Post Stage: char = %x, key = %d, value =%d\n", **source, key, value);
|
|
fflush(stdout);
|
|
#endif /*Debug*/
|
|
switch (value)
|
|
{
|
|
case VALID_NON_TERMINAL_2022 :
|
|
{
|
|
#ifdef Debug
|
|
puts("VALID_NON_TERMINAL_2022");
|
|
#endif /*Debug*/
|
|
};break;
|
|
|
|
case VALID_TERMINAL_2022:
|
|
{
|
|
#ifdef Debug
|
|
puts("VALID_TERMINAL_2022");
|
|
#endif /*Debug*/
|
|
chosenConverterName = escSeqStateTable_Result_2022[offset];
|
|
key = 0;
|
|
goto DONE;
|
|
};break;
|
|
|
|
case INVALID_2022:
|
|
{
|
|
#ifdef Debug
|
|
puts("INVALID_2022");
|
|
#endif /*Debug*/
|
|
_this->toUnicodeStatus = 0;
|
|
*err = U_ILLEGAL_CHAR_FOUND;
|
|
return;
|
|
}
|
|
|
|
case VALID_MAYBE_TERMINAL_2022:
|
|
{
|
|
const char* mySource = (*source + 1);
|
|
int32_t myKey = key;
|
|
UCNV_TableStates_2022 myValue = value;
|
|
int32_t myOffset;
|
|
#ifdef Debug
|
|
puts("VALID_MAYBE_TERMINAL_2022");
|
|
#endif /*Debug*/
|
|
|
|
while ((mySource < sourceLimit) &&
|
|
((myValue == VALID_MAYBE_TERMINAL_2022)||(myValue == VALID_NON_TERMINAL_2022)))
|
|
{
|
|
#ifdef Debug
|
|
printf("MAYBE value = %d myKey = %d %X\n", myValue, myKey, *mySource);
|
|
#endif /*Debug*/
|
|
myValue = getKey_2022(*(mySource++), &myKey, &myOffset);
|
|
}
|
|
#ifdef Debug
|
|
printf("myValue = %d\n", myValue);
|
|
#endif /*Debug*/
|
|
switch (myValue)
|
|
{
|
|
case INVALID_2022:
|
|
{
|
|
/*Backs off*/
|
|
#ifdef Debug
|
|
puts("VALID_MAYBE_TERMINAL INVALID");
|
|
printf("offset = %d\n", offset);
|
|
#endif /*Debug*/
|
|
chosenConverterName = escSeqStateTable_Result_2022[offset];
|
|
value = VALID_TERMINAL_2022;
|
|
#ifdef Debug
|
|
printf("%d\n", offset);
|
|
fflush(stdout);
|
|
#endif /*Debug*/
|
|
goto DONE;
|
|
};break;
|
|
|
|
case VALID_TERMINAL_2022:
|
|
{
|
|
/*uses longer escape sequence*/
|
|
#ifdef Debug
|
|
puts("VALID_MAYBE_TERMINAL TERMINAL");
|
|
#endif /*Debug*/
|
|
*source = mySource-1; /*deals with the overshot in the while above*/
|
|
chosenConverterName = escSeqStateTable_Result_2022[myOffset];
|
|
key = 0;
|
|
value = VALID_TERMINAL_2022;
|
|
goto DONE;
|
|
};break;
|
|
|
|
case VALID_NON_TERMINAL_2022:
|
|
#ifdef Debug
|
|
puts("VALID_MAYBE_TERMINAL NON_TERMINAL");
|
|
#endif /*Debug*/
|
|
case VALID_MAYBE_TERMINAL_2022:
|
|
{
|
|
#ifdef Debug
|
|
puts("VALID_MAYBE_TERMINAL MAYBE_TERMINAL");
|
|
#endif /*Debug*/
|
|
if (flush)
|
|
{
|
|
/*Backs off*/
|
|
chosenConverterName = escSeqStateTable_Result_2022[offset];
|
|
value = VALID_TERMINAL_2022;
|
|
key = 0;
|
|
goto DONE;
|
|
}
|
|
else
|
|
{
|
|
key = myKey;
|
|
value = VALID_NON_TERMINAL_2022;
|
|
}
|
|
};break;
|
|
};break;
|
|
};break;
|
|
}
|
|
} while ((*source)++ <= sourceLimit);
|
|
|
|
DONE:
|
|
_this->toUnicodeStatus = key;
|
|
|
|
if ((value == VALID_NON_TERMINAL_2022) || (value == VALID_MAYBE_TERMINAL_2022))
|
|
{
|
|
#ifdef Debug
|
|
printf("Out: current **source = %X", **source);
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
if (value > 0) myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
|
|
{
|
|
#ifdef Debug
|
|
printf("Error = %d open \"%s\"\n", *err, chosenConverterName);
|
|
#endif /*Debug*/
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
/*Customize the converter with the attributes set on the 2022 converter*/
|
|
myUConverter->fromUCharErrorBehaviour = _this->fromUCharErrorBehaviour;
|
|
myUConverter->fromCharErrorBehaviour = _this->fromCharErrorBehaviour;
|
|
icu_memcpy(myUConverter->subChar,
|
|
_this->subChar,
|
|
myUConverter->subCharLen = _this->subCharLen);
|
|
|
|
_this->mode = UCNV_SO;
|
|
}
|
|
}
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
/*Checks the first 3 characters of the buffer against valid 2022 escape sequences
|
|
*if the match we return a pointer to the initial start of the sequence otherwise
|
|
*we return sourceLimit
|
|
*/
|
|
const char* getEndOfBuffer_2022(const char* source,
|
|
const char* sourceLimit,
|
|
bool_t flush)
|
|
{
|
|
const char* mySource = source;
|
|
|
|
if (source >= sourceLimit) return sourceLimit;
|
|
|
|
do
|
|
{
|
|
if (*mySource == ESC_2022)
|
|
{
|
|
int8_t i;
|
|
int32_t key = 0;
|
|
int32_t offset;
|
|
UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
|
|
|
|
for (i=0;
|
|
(mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
|
|
i++)
|
|
{
|
|
value = getKey_2022(*(mySource+i), &key, &offset);
|
|
#ifdef Debug
|
|
printf("Look ahead value = %d\n", value);
|
|
#endif /*Debug*/
|
|
}
|
|
if (value > 0) return mySource;
|
|
if ((value == VALID_NON_TERMINAL_2022)&&(!flush) ) return sourceLimit;
|
|
}
|
|
}
|
|
while (mySource++ < sourceLimit);
|
|
|
|
return sourceLimit;
|
|
}
|
|
|
|
|
|
|
|
void T_UConverter_toUnicode_ISO_2022(UConverter* _this,
|
|
UChar** target,
|
|
const UChar* targetLimit,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode* err)
|
|
{
|
|
int32_t base = 0;
|
|
const char* mySourceLimit;
|
|
char const* sourceStart;
|
|
|
|
/*Arguments Check*/
|
|
if (U_FAILURE(*err)) return;
|
|
if ((_this == NULL) || (targetLimit < *target) || (sourceLimit < *source))
|
|
{
|
|
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
|
|
mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, flush);
|
|
|
|
|
|
/*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
|
|
if (_this->mode == UCNV_SO) /*Already doing some conversion*/
|
|
{
|
|
const UChar* myTargetStart = *target;
|
|
#ifdef Debug
|
|
printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit);
|
|
#endif /*Debug*/
|
|
|
|
ucnv_toUnicode(((UConverterDataISO2022*)(_this->extraInfo))->currentConverter,
|
|
target,
|
|
targetLimit,
|
|
source,
|
|
mySourceLimit,
|
|
NULL,
|
|
flush,
|
|
err);
|
|
|
|
|
|
#ifdef Debug
|
|
puts("---------------------------> CONVERTED");
|
|
printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit);
|
|
printf("err =%d", *err);
|
|
#endif /*Debug*/
|
|
}
|
|
/*-Done with buffer with entire buffer
|
|
-Error while converting
|
|
*/
|
|
|
|
if (U_FAILURE(*err) || (*source == sourceLimit)) return;
|
|
#ifdef Debug
|
|
puts("Got Here!");
|
|
fflush(stdout);
|
|
#endif /*Debug*/
|
|
sourceStart = *source;
|
|
changeState_2022(_this,
|
|
source,
|
|
sourceLimit,
|
|
flush,
|
|
err);
|
|
(*source)++;
|
|
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverter* _this,
|
|
UChar** target,
|
|
const UChar* targetLimit,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode* err)
|
|
{
|
|
int32_t myOffset=0;
|
|
int32_t base = 0;
|
|
const char* mySourceLimit;
|
|
char const* sourceStart;
|
|
|
|
/*Arguments Check*/
|
|
if (U_FAILURE(*err)) return;
|
|
if ((_this == NULL) || (targetLimit < *target) || (sourceLimit < *source))
|
|
{
|
|
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
|
|
mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, flush);
|
|
/*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
|
|
|
|
if (_this->mode == UCNV_SO) /*Already doing some conversion*/
|
|
{
|
|
const UChar* myTargetStart = *target;
|
|
#ifdef Debug
|
|
printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit);
|
|
#endif /*Debug*/
|
|
|
|
ucnv_toUnicode(((UConverterDataISO2022*)(_this->extraInfo))->currentConverter,
|
|
target,
|
|
targetLimit,
|
|
source,
|
|
mySourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
{
|
|
int32_t lim = *target - myTargetStart;
|
|
int32_t i = 0;
|
|
for (i=base; i < lim;i++) offsets[i] += myOffset;
|
|
base += lim;
|
|
}
|
|
|
|
#ifdef Debug
|
|
puts("---------------------------> CONVERTED");
|
|
printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit);
|
|
printf("err =%d", *err);
|
|
#endif /*Debug*/
|
|
}
|
|
|
|
/*-Done with buffer with entire buffer
|
|
-Error while converting
|
|
*/
|
|
|
|
if (U_FAILURE(*err) || (*source == sourceLimit)) return;
|
|
#ifdef Debug
|
|
puts("Got Here!");
|
|
fflush(stdout);
|
|
#endif /*Debug*/
|
|
sourceStart = *source;
|
|
changeState_2022(_this,
|
|
source,
|
|
sourceLimit,
|
|
flush,
|
|
err);
|
|
(*source)++;
|
|
myOffset += *source - sourceStart;
|
|
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void T_UConverter_fromUnicode_DBCS (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const UChar *mySource = *source;
|
|
unsigned char *myTarget = (unsigned char *) *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - (char *) myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
CompactShortArray *myFromUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
UChar mySourceChar = 0x0000;
|
|
|
|
myFromUnicode = _this->sharedData->table->dbcs.fromUnicode;
|
|
|
|
/*writing the char to the output stream */
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
mySourceChar = (UChar) mySource[mySourceIndex++];
|
|
|
|
/*Gets the corresponding codepoint */
|
|
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
|
if (targetUniChar != missingCharMarker)
|
|
{
|
|
/*writes the char to the output stream */
|
|
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
myTarget[myTargetIndex++] = (char) targetUniChar;
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[0] = (char) targetUniChar;
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
|
|
_this->invalidUCharLength = 1;
|
|
|
|
|
|
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
|
|
FromU_CALLBACK_MACRO(_this,
|
|
(char *)myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidUCharLength = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;;
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_fromUnicode_UTF8 (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const UChar *mySource = *source;
|
|
unsigned char *myTarget = (unsigned char *) *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - (char *) myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
int8_t targetCharByteNum = 0;
|
|
UChar mySourceChar = 0x0000;
|
|
uint32_t ch;
|
|
int16_t i, bytesToWrite = 0;
|
|
uint32_t ch2;
|
|
char temp[4];
|
|
|
|
if (_this->fromUnicodeStatus)
|
|
{
|
|
ch = _this->fromUnicodeStatus;
|
|
_this->fromUnicodeStatus = 0;
|
|
goto lowsurogate;
|
|
}
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
bytesToWrite = 0;
|
|
ch = mySource[mySourceIndex++];
|
|
|
|
if (ch < 0x80) /* Single byte */
|
|
{
|
|
myTarget[myTargetIndex++] = (char) ch;
|
|
}
|
|
else if (ch < 0x800) /* Double byte */
|
|
{
|
|
myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0);
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80);
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80);
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
else
|
|
/* Check for surogates */
|
|
{
|
|
if ((ch >= kSurrogateHighStart) && (ch <= kSurrogateHighEnd))
|
|
{
|
|
lowsurogate:
|
|
if (mySourceIndex < sourceLength && !flush)
|
|
{
|
|
ch2 = mySource[mySourceIndex];
|
|
if ((ch2 >= kSurrogateLowStart) && (ch2 <= kSurrogateLowEnd))
|
|
{
|
|
ch = ((ch - kSurrogateHighStart) << halfShift) + (ch2 - kSurrogateLowStart) + halfBase;
|
|
++mySourceIndex;
|
|
}
|
|
}
|
|
}
|
|
if (ch < 0x10000)
|
|
{
|
|
bytesToWrite = 3;
|
|
temp[0] = (char) ((ch >> 12) | 0xe0);
|
|
temp[1] = (char) ((ch >> 6) & 0x3f | 0x80);
|
|
temp[2] = (char) (ch & 0x3f | 0x80);
|
|
}
|
|
else
|
|
{
|
|
bytesToWrite = 4;
|
|
temp[0] = (char) ((ch >> 18) | 0xf0);
|
|
temp[1] = (char) ((ch >> 12) & 0x3f | 0xe0);
|
|
temp[2] = (char) ((ch >> 6) & 0x3f | 0x80);
|
|
temp[3] = (char) (ch & 0x3f | 0x80);
|
|
}
|
|
for (i = 0; i < bytesToWrite; i++)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
myTarget[myTargetIndex++] = temp[i];
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[_this->charErrorBufferLength++] = temp[i];
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const UChar *mySource = *source;
|
|
unsigned char *myTarget = (unsigned char *) *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - (char *) myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
int8_t targetCharByteNum = 0;
|
|
UChar mySourceChar = 0x0000;
|
|
uint32_t ch;
|
|
int16_t i, bytesToWrite = 0;
|
|
uint32_t ch2;
|
|
char temp[4];
|
|
|
|
if (_this->fromUnicodeStatus)
|
|
{
|
|
ch = _this->fromUnicodeStatus;
|
|
_this->fromUnicodeStatus = 0;
|
|
goto lowsurogate;
|
|
}
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
bytesToWrite = 0;
|
|
ch = mySource[mySourceIndex++];
|
|
|
|
if (ch < 0x80) /* Single byte */
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = (char) ch;
|
|
}
|
|
else if (ch < 0x800) /* Double byte */
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0);
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80);
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80);
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
else
|
|
/* Check for surogates */
|
|
{
|
|
if ((ch >= kSurrogateHighStart) && (ch <= kSurrogateHighEnd))
|
|
{
|
|
lowsurogate:
|
|
if (mySourceIndex < sourceLength && !flush)
|
|
{
|
|
ch2 = mySource[mySourceIndex];
|
|
if ((ch2 >= kSurrogateLowStart) && (ch2 <= kSurrogateLowEnd))
|
|
{
|
|
ch = ((ch - kSurrogateHighStart) << halfShift) + (ch2 - kSurrogateLowStart) + halfBase;
|
|
++mySourceIndex;
|
|
}
|
|
}
|
|
}
|
|
if (ch < 0x10000)
|
|
{
|
|
bytesToWrite = 3;
|
|
temp[0] = (char) ((ch >> 12) | 0xe0);
|
|
temp[1] = (char) ((ch >> 6) & 0x3f | 0x80);
|
|
temp[2] = (char) (ch & 0x3f | 0x80);
|
|
}
|
|
else
|
|
{
|
|
bytesToWrite = 4;
|
|
temp[0] = (char) ((ch >> 18) | 0xf0);
|
|
temp[1] = (char) ((ch >> 12) & 0x3f | 0xe0);
|
|
temp[2] = (char) ((ch >> 6) & 0x3f | 0x80);
|
|
temp[3] = (char) (ch & 0x3f | 0x80);
|
|
}
|
|
for (i = 0; i < bytesToWrite; i++)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = temp[i];
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[_this->charErrorBufferLength++] = temp[i];
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
void T_UConverter_fromUnicode_UTF16_BE (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const UChar *mySource = *source;
|
|
unsigned char *myTarget = (unsigned char *) *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - (char *) myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
UChar mySourceChar;
|
|
|
|
/*writing the char to the output stream */
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
mySourceChar = (UChar) mySource[mySourceIndex++];
|
|
myTarget[myTargetIndex++] = (char) (mySourceChar >> 8);
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
myTarget[myTargetIndex++] = (char) mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[0] = (char) mySourceChar;
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;;
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_fromUnicode_UTF16_LE (UConverter * _this,
|
|
char **target,
|
|
const char *targetLimit,
|
|
const UChar ** source,
|
|
const UChar * sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const UChar *mySource = *source;
|
|
unsigned char *myTarget = (unsigned char *) *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - (char *) myTarget;
|
|
int32_t sourceLength = sourceLimit - mySource;
|
|
UChar mySourceChar;
|
|
|
|
|
|
/*writing the char to the output stream */
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
mySourceChar = (UChar) mySource[mySourceIndex++];
|
|
myTarget[myTargetIndex++] = (char) mySourceChar;
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
myTarget[myTargetIndex++] = (char) (mySourceChar >> 8);
|
|
}
|
|
else
|
|
{
|
|
_this->charErrorBuffer[0] = (char) (mySourceChar >> 8);
|
|
_this->charErrorBufferLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;;
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_toUnicode_UTF16_BE (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const unsigned char *mySource = (unsigned char *) *source;
|
|
UChar *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - (char *) mySource;
|
|
UChar mySourceChar = 0x0000;
|
|
UChar oldmySourceChar = 0x0000;
|
|
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
/*gets the corresponding UChar */
|
|
mySourceChar = (unsigned char) mySource[mySourceIndex++];
|
|
oldmySourceChar = mySourceChar;
|
|
if (_this->toUnicodeStatus == 0)
|
|
{
|
|
_this->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
if (_this->toUnicodeStatus != 0xFFFF)
|
|
mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | mySourceChar);
|
|
_this->toUnicodeStatus = 0;
|
|
|
|
|
|
|
|
myTarget[myTargetIndex++] = mySourceChar;
|
|
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (U_SUCCESS(*err) && flush
|
|
&& (mySourceIndex == sourceLength)
|
|
&& (_this->toUnicodeStatus != 0x00))
|
|
{
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_toUnicode_UTF16_LE (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const unsigned char *mySource = (unsigned char *) *source;
|
|
UChar *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - (char *) mySource;
|
|
CompactShortArray *myToUnicode = NULL;
|
|
UChar targetUniChar = 0x0000;
|
|
UChar mySourceChar = 0x0000;
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
/*gets the corresponding UniChar */
|
|
mySourceChar = (unsigned char) mySource[mySourceIndex++];
|
|
|
|
if (_this->toUnicodeStatus == 0x00)
|
|
{
|
|
_this->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar;
|
|
}
|
|
else
|
|
{
|
|
if (_this->toUnicodeStatus == 0xFFFF)
|
|
mySourceChar = (UChar) (mySourceChar << 8);
|
|
else
|
|
{
|
|
mySourceChar <<= 8;
|
|
mySourceChar |= (UChar) (_this->toUnicodeStatus);
|
|
}
|
|
_this->toUnicodeStatus = 0x00;
|
|
myTarget[myTargetIndex++] = mySourceChar;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
if (U_SUCCESS(*err) && flush
|
|
&& (mySourceIndex == sourceLength)
|
|
&& (_this->toUnicodeStatus != 0x00))
|
|
{
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
}
|
|
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
void T_UConverter_toUnicode_UTF8 (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const unsigned char *mySource = (unsigned char *) *source;
|
|
UChar *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - (char *) mySource;
|
|
uint32_t ch = 0 ,
|
|
ch2 =0 ,
|
|
i =0; /* Index into the current # of bytes consumed in the current sequence */
|
|
uint32_t inBytes = 0; /* Total number of bytes in the current UTF8 sequence */
|
|
|
|
if (_this->toUnicodeStatus)
|
|
{
|
|
i = _this->invalidCharLength; /* restore # of bytes consumed */
|
|
inBytes = _this->toUnicodeStatus; /* Restore size of current sequence */
|
|
|
|
ch = _this->mode; /*Stores the previously calculated ch from a previous call*/
|
|
_this->toUnicodeStatus = 0;
|
|
_this->invalidCharLength = 0;
|
|
goto morebytes;
|
|
}
|
|
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
ch = 0;
|
|
ch = ((uint32_t)mySource[mySourceIndex++]) & 0x000000FF;
|
|
if (ch < 0x80) /* Simple case */
|
|
{
|
|
myTarget[myTargetIndex++] = (UChar) ch;
|
|
}
|
|
else
|
|
{
|
|
/* store the first char */
|
|
|
|
inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */
|
|
_this->invalidCharBuffer[0] = (char)ch;
|
|
i = 1;
|
|
|
|
morebytes:
|
|
for (; i < inBytes; i++)
|
|
{
|
|
{
|
|
if (mySourceIndex >= sourceLength)
|
|
{
|
|
if (flush)
|
|
{
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_this->toUnicodeStatus = inBytes;
|
|
_this->invalidCharLength = (int8_t)i;
|
|
}
|
|
goto donefornow;
|
|
}
|
|
_this->invalidCharBuffer[i] = (char) (ch2 = (((uint32_t)mySource[mySourceIndex++]) & 0x000000FF));
|
|
if ((ch2 & 0xC0) != 0x80) /* Invalid trailing byte */
|
|
break;
|
|
}
|
|
ch <<= 6;
|
|
ch += ch2;
|
|
}
|
|
|
|
|
|
ch -= offsetsFromUTF8[inBytes];
|
|
|
|
if (i == inBytes && ch <= kMaximumUTF16)
|
|
{
|
|
if (ch <= kMaximumUCS2)
|
|
{
|
|
myTarget[myTargetIndex++] = (UChar) ch;
|
|
}
|
|
else
|
|
{
|
|
ch -= halfBase;
|
|
myTarget[myTargetIndex++] = (UChar) ((ch >> halfShift) + kSurrogateHighStart);
|
|
ch = (ch & halfMask) + kSurrogateLowStart;
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
myTarget[myTargetIndex++] = (char)ch;
|
|
}
|
|
else
|
|
{
|
|
_this->invalidUCharBuffer[0] = (UChar) ch;
|
|
_this->invalidUCharLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*err = U_ILLEGAL_CHAR_FOUND;
|
|
_this->invalidCharLength = (int8_t)i;
|
|
|
|
#ifdef Debug
|
|
printf("inbytes %d\n, _this->invalidCharLength = %d,\n mySource[mySourceIndex]=%X\n", inBytes, _this->invalidCharLength, mySource[mySourceIndex]);
|
|
#endif
|
|
/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */
|
|
ToU_CALLBACK_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
(const char *)mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
/* End of target buffer */
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
donefornow:
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
_this->mode = ch; /*stores a partially calculated target*/
|
|
}
|
|
|
|
void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
|
|
UChar ** target,
|
|
const UChar * targetLimit,
|
|
const char **source,
|
|
const char *sourceLimit,
|
|
int32_t *offsets,
|
|
bool_t flush,
|
|
UErrorCode * err)
|
|
{
|
|
const unsigned char *mySource = (unsigned char *) *source;
|
|
UChar *myTarget = *target;
|
|
int32_t mySourceIndex = 0;
|
|
int32_t myTargetIndex = 0;
|
|
int32_t targetLength = targetLimit - myTarget;
|
|
int32_t sourceLength = sourceLimit - (char *) mySource;
|
|
uint32_t ch = 0, ch2 = 0, i = 0;
|
|
uint32_t inBytes = 0;
|
|
int32_t* originalOffsets = offsets;
|
|
|
|
|
|
|
|
if (_this->toUnicodeStatus)
|
|
{
|
|
i = _this->invalidCharLength;
|
|
inBytes = _this->toUnicodeStatus;
|
|
_this->toUnicodeStatus = 0;
|
|
ch = _this->mode;
|
|
goto morebytes;
|
|
}
|
|
|
|
while (mySourceIndex < sourceLength)
|
|
{
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
ch = mySource[mySourceIndex++];
|
|
if (ch < 0x80) /* Simple case */
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-1;
|
|
myTarget[myTargetIndex++] = (UChar) ch;
|
|
}
|
|
else
|
|
{
|
|
inBytes = bytesFromUTF8[ch];
|
|
_this->invalidCharBuffer[0] = (char)ch;
|
|
i = 1;
|
|
|
|
morebytes:
|
|
for (; i < inBytes; i++)
|
|
{
|
|
{
|
|
if (mySourceIndex >= sourceLength)
|
|
{
|
|
if (flush)
|
|
{
|
|
if (U_SUCCESS(*err))
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
_this->toUnicodeStatus = 0x00;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
_this->toUnicodeStatus = inBytes;
|
|
_this->invalidCharLength = (int8_t)i;
|
|
}
|
|
goto donefornow;
|
|
}
|
|
_this->invalidCharBuffer[i] = (char) (ch2 = mySource[mySourceIndex++]);
|
|
if ((ch2 & 0xC0) != 0x80) /* Invalid trailing byte */
|
|
break;
|
|
}
|
|
ch <<= 6;
|
|
ch += ch2;
|
|
}
|
|
|
|
ch -= offsetsFromUTF8[inBytes];
|
|
if (i == inBytes && ch <= kMaximumUTF16)
|
|
{
|
|
if (ch <= kMaximumUCS2) {
|
|
|
|
offsets[myTargetIndex] = mySourceIndex-3;
|
|
myTarget[myTargetIndex++] = (UChar) ch;
|
|
|
|
}
|
|
else
|
|
{
|
|
ch -= halfBase;
|
|
offsets[myTargetIndex] = mySourceIndex-4;
|
|
myTarget[myTargetIndex++] = (UChar) ((ch >> halfShift) + kSurrogateHighStart);
|
|
ch = (ch & halfMask) + kSurrogateLowStart;
|
|
if (myTargetIndex < targetLength)
|
|
{
|
|
offsets[myTargetIndex] = mySourceIndex-4;
|
|
myTarget[myTargetIndex++] = (char)ch;
|
|
}
|
|
else
|
|
{
|
|
_this->invalidUCharBuffer[0] = (UChar) ch;
|
|
_this->invalidUCharLength = 1;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int32_t currentOffset = offsets[myTargetIndex-1];
|
|
|
|
*err = U_ILLEGAL_CHAR_FOUND;
|
|
_this->invalidCharLength = (int8_t)i;
|
|
|
|
/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */
|
|
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this,
|
|
myTarget,
|
|
myTargetIndex,
|
|
targetLimit,
|
|
(const char *)mySource,
|
|
mySourceIndex,
|
|
sourceLimit,
|
|
offsets,
|
|
flush,
|
|
err);
|
|
|
|
|
|
if (U_FAILURE (*err)) break;
|
|
_this->invalidCharLength = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
/* End of target buffer */
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
donefornow:
|
|
*target += myTargetIndex;
|
|
*source += mySourceIndex;
|
|
_this->mode = ch;
|
|
|
|
}
|
|
|
|
/*Empties the internal unicode output buffer */
|
|
void flushInternalUnicodeBuffer (UConverter * _this,
|
|
UChar * myTarget,
|
|
int32_t * myTargetIndex,
|
|
int32_t targetLength,
|
|
int32_t** offsets,
|
|
UErrorCode * err)
|
|
{
|
|
int32_t myUCharErrorBufferLength = _this->UCharErrorBufferLength;
|
|
|
|
if (myUCharErrorBufferLength <= targetLength)
|
|
{
|
|
/*we have enough space
|
|
*So we just copy the whole Error Buffer in to the output stream*/
|
|
icu_memcpy (myTarget,
|
|
_this->UCharErrorBuffer,
|
|
sizeof (UChar) * myUCharErrorBufferLength);
|
|
if (offsets)
|
|
{
|
|
int32_t i=0;
|
|
for (i=0; i<myUCharErrorBufferLength;i++) (*offsets)[i] = -1;
|
|
*offsets += myUCharErrorBufferLength;
|
|
}
|
|
*myTargetIndex += myUCharErrorBufferLength;
|
|
_this->UCharErrorBufferLength = 0;
|
|
}
|
|
else
|
|
{
|
|
/* We don't have enough space so we copy as much as we can
|
|
* on the output stream and update the object
|
|
* by updating the internal buffer*/
|
|
icu_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength);
|
|
if (offsets)
|
|
{
|
|
int32_t i=0;
|
|
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
|
|
*offsets += targetLength;
|
|
}
|
|
icu_memmove (_this->UCharErrorBuffer,
|
|
_this->UCharErrorBuffer + targetLength,
|
|
sizeof (UChar) * (myUCharErrorBufferLength - targetLength));
|
|
_this->UCharErrorBufferLength -= (int8_t) targetLength;
|
|
*myTargetIndex = targetLength;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*Empties the internal codepage output buffer */
|
|
void flushInternalCharBuffer (UConverter * _this,
|
|
char *myTarget,
|
|
int32_t * myTargetIndex,
|
|
int32_t targetLength,
|
|
int32_t** offsets,
|
|
UErrorCode * err)
|
|
{
|
|
int32_t myCharErrorBufferLength = _this->charErrorBufferLength;
|
|
|
|
/*we have enough space */
|
|
if (myCharErrorBufferLength <= targetLength)
|
|
{
|
|
icu_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength);
|
|
if (offsets)
|
|
{
|
|
int32_t i=0;
|
|
for (i=0; i<myCharErrorBufferLength;i++) (*offsets)[i] = -1;
|
|
*offsets += myCharErrorBufferLength;
|
|
}
|
|
|
|
*myTargetIndex += myCharErrorBufferLength;
|
|
_this->charErrorBufferLength = 0;
|
|
}
|
|
else
|
|
/* We don't have enough space so we copy as much as we can
|
|
* on the output stream and update the object*/
|
|
{
|
|
icu_memcpy (myTarget, _this->charErrorBuffer, targetLength);
|
|
if (offsets)
|
|
{
|
|
int32_t i=0;
|
|
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
|
|
*offsets += targetLength;
|
|
}
|
|
icu_memmove (_this->charErrorBuffer,
|
|
_this->charErrorBuffer + targetLength,
|
|
(myCharErrorBufferLength - targetLength));
|
|
_this->charErrorBufferLength -= (int8_t) targetLength;
|
|
*myTargetIndex = targetLength;
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
UChar T_UConverter_getNextUChar_SBCS(UConverter* converter,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
UErrorCode* err)
|
|
{
|
|
UChar myUChar;
|
|
|
|
|
|
if ((*source)+1 > sourceLimit)
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return 0xFFFD;
|
|
}
|
|
|
|
|
|
/*Gets the corresponding codepoint*/
|
|
myUChar = converter->sharedData->table->sbcs.toUnicode[(unsigned char)*((*source)++)];
|
|
|
|
if (myUChar != 0xFFFD) return myUChar;
|
|
else
|
|
{
|
|
UChar* myUCharPtr = &myUChar;
|
|
const char* sourceFinal = *source;
|
|
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
|
|
/*Calls the ErrorFunctor after rewinding the input buffer*/
|
|
(*source)--;
|
|
/*It's is very likely that the ErrorFunctor will write to the
|
|
*internal buffers */
|
|
converter->fromCharErrorBehaviour(converter,
|
|
&myUCharPtr,
|
|
myUCharPtr + 1,
|
|
&sourceFinal,
|
|
sourceLimit,
|
|
NULL,
|
|
TRUE,
|
|
err);
|
|
|
|
/*makes the internal caching transparent to the user*/
|
|
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
|
|
|
|
return myUChar;
|
|
}
|
|
}
|
|
|
|
UChar T_UConverter_getNextUChar_LATIN_1(UConverter* converter,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
UErrorCode* err)
|
|
{
|
|
|
|
/* Empties the internal buffers if need be
|
|
* In this case since ErrorFunctors are never called
|
|
* (LATIN_1 is a subset of Unicode)
|
|
*/
|
|
|
|
if ((*source)+1 > sourceLimit)
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return 0xFFFD;
|
|
}
|
|
|
|
return (UChar)*((*source)++);
|
|
}
|
|
|
|
UChar T_UConverter_getNextUChar_ISO_2022(UConverter* converter,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
UErrorCode* err)
|
|
{
|
|
const char* mySourceLimit;
|
|
/*Arguments Check*/
|
|
if (sourceLimit < *source)
|
|
{
|
|
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0xFFFD;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, TRUE);
|
|
/*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
|
|
if (converter->mode == UCNV_SO) /*Already doing some conversion*/
|
|
{
|
|
|
|
return ucnv_getNextUChar(((UConverterDataISO2022*)(converter->extraInfo))->currentConverter,
|
|
source,
|
|
mySourceLimit,
|
|
err);
|
|
|
|
|
|
}
|
|
/*-Done with buffer with entire buffer
|
|
-Error while converting
|
|
*/
|
|
|
|
|
|
changeState_2022(converter,
|
|
source,
|
|
sourceLimit,
|
|
TRUE,
|
|
err);
|
|
(*source)++;
|
|
}
|
|
|
|
return 0xFFFD;
|
|
}
|
|
|
|
UChar T_UConverter_getNextUChar_DBCS(UConverter* converter,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
UErrorCode* err)
|
|
{
|
|
UChar myUChar;
|
|
|
|
/*Checks boundaries and set appropriate error codes*/
|
|
if ((*source)+2 > sourceLimit)
|
|
{
|
|
if ((*source) >= sourceLimit)
|
|
{
|
|
/*Either caller has reached the end of the byte stream*/
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
else if (((*source)+1) == sourceLimit)
|
|
{
|
|
/* a character was cut in half*/
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
}
|
|
|
|
return 0xFFFD;
|
|
}
|
|
|
|
/*Gets the corresponding codepoint*/
|
|
myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode,
|
|
((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)));
|
|
|
|
/*update the input pointer*/
|
|
*source += 2;
|
|
if (myUChar != 0xFFFD) return myUChar;
|
|
else
|
|
{
|
|
UChar* myUCharPtr = &myUChar;
|
|
const char* sourceFinal = *source;
|
|
|
|
/*Calls the ErrorFunctor after rewinding the input buffer*/
|
|
(*source) -= 2;
|
|
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
|
|
/*It's is very likely that the ErrorFunctor will write to the
|
|
*internal buffers */
|
|
converter->fromCharErrorBehaviour(converter,
|
|
&myUCharPtr,
|
|
myUCharPtr + 1,
|
|
&sourceFinal,
|
|
sourceLimit,
|
|
NULL,
|
|
TRUE,
|
|
err);
|
|
/*makes the internal caching transparent to the user*/
|
|
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
|
|
|
|
return myUChar;
|
|
}
|
|
}
|
|
|
|
UChar T_UConverter_getNextUChar_MBCS(UConverter* converter,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
UErrorCode* err)
|
|
{
|
|
UChar myUChar;
|
|
char const *sourceInitial = *source;
|
|
/*safe keeps a ptr to the beginning in case we need to step back*/
|
|
|
|
/*Input boundary check*/
|
|
if ((*source)+1 > sourceLimit)
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return 0xFFFD;
|
|
}
|
|
|
|
/*Checks to see if the byte is a lead*/
|
|
if (converter->sharedData->table->mbcs.starters[(uint8_t)**source] == FALSE)
|
|
{
|
|
/*Not lead byte: we update the source ptr and get the codepoint*/
|
|
myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode,
|
|
(UChar)(**source));
|
|
(*source)++;
|
|
}
|
|
else
|
|
{
|
|
/*Lead byte: we Build the codepoint and get the corresponding character
|
|
* and update the source ptr*/
|
|
if ((*source + 2) > sourceLimit)
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
return 0xFFFD;
|
|
}
|
|
|
|
myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode,
|
|
((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)));
|
|
|
|
(*source) += 2;
|
|
}
|
|
|
|
if (myUChar != 0xFFFD) return myUChar;
|
|
else
|
|
{
|
|
/*rewinds source*/
|
|
const char* sourceFinal = *source;
|
|
UChar* myUCharPtr = &myUChar;
|
|
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
*source = sourceInitial;
|
|
|
|
/*It's is very likely that the ErrorFunctor will write to the
|
|
*internal buffers */
|
|
converter->fromCharErrorBehaviour(converter,
|
|
&myUCharPtr,
|
|
myUCharPtr + 1,
|
|
&sourceFinal,
|
|
sourceLimit,
|
|
NULL,
|
|
TRUE,
|
|
err);
|
|
|
|
/*makes the internal caching transparent to the user*/
|
|
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
|
|
|
|
return myUChar;
|
|
}
|
|
}
|
|
|
|
UChar T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverter* converter,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
UErrorCode* err)
|
|
{
|
|
UChar myUChar;
|
|
char const *sourceInitial = *source;
|
|
/*safe keeps a ptr to the beginning in case we need to step back*/
|
|
|
|
/*Input boundary check*/
|
|
if ((*source)+1 > sourceLimit)
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return 0xFFFD;
|
|
}
|
|
|
|
/*Checks to see if with have SI/SO shifters
|
|
if we do we change the mode appropriately and we consume the byte*/
|
|
if ((**source == UCNV_SI) || (**source == UCNV_SO))
|
|
{
|
|
converter->mode = **source;
|
|
(*source)++;
|
|
|
|
/*Rechecks boundary after consuming the shift sequence*/
|
|
if ((*source)+1 > sourceLimit)
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return 0xFFFD;
|
|
}
|
|
}
|
|
|
|
if (converter->mode == UCNV_SI)
|
|
{
|
|
/*Not lead byte: we update the source ptr and get the codepoint*/
|
|
myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode,
|
|
(UChar)(**source));
|
|
(*source)++;
|
|
}
|
|
else
|
|
{
|
|
/*Lead byte: we Build the codepoint and get the corresponding character
|
|
* and update the source ptr*/
|
|
if ((*source + 2) > sourceLimit)
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
return 0xFFFD;
|
|
}
|
|
|
|
myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode,
|
|
((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)));
|
|
|
|
(*source) += 2;
|
|
}
|
|
|
|
if (myUChar != 0xFFFD) return myUChar;
|
|
else
|
|
{
|
|
/*rewinds source*/
|
|
const char* sourceFinal = *source;
|
|
UChar* myUCharPtr = &myUChar;
|
|
|
|
*err = U_INVALID_CHAR_FOUND;
|
|
*source = sourceInitial;
|
|
|
|
/*It's is very likely that the ErrorFunctor will write to the
|
|
*internal buffers */
|
|
converter->fromCharErrorBehaviour(converter,
|
|
&myUCharPtr,
|
|
myUCharPtr + 1,
|
|
&sourceFinal,
|
|
sourceLimit,
|
|
NULL,
|
|
TRUE,
|
|
err);
|
|
|
|
/*makes the internal caching transparent to the user*/
|
|
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
|
|
|
|
return myUChar;
|
|
}
|
|
}
|
|
|
|
UChar T_UConverter_getNextUChar_UTF16_BE(UConverter* converter,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
UErrorCode* err)
|
|
{
|
|
UChar myUChar;
|
|
/*Checks boundaries and set appropriate error codes*/
|
|
if ((*source)+2 > sourceLimit)
|
|
{
|
|
if ((*source) >= sourceLimit)
|
|
{
|
|
/*Either caller has reached the end of the byte stream*/
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
else if (((*source)+1) == sourceLimit)
|
|
{
|
|
/* a character was cut in half*/
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
}
|
|
|
|
return 0xFFFD;
|
|
}
|
|
|
|
|
|
/*Gets the corresponding codepoint*/
|
|
|
|
myUChar = ((uint16_t)((**source)) << 8) |((uint8_t)*((*source)+1));
|
|
*source += 2;
|
|
return myUChar;
|
|
}
|
|
|
|
|
|
UChar T_UConverter_getNextUChar_UTF16_LE(UConverter* converter,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
UErrorCode* err)
|
|
{
|
|
UChar myUChar;
|
|
/*Checks boundaries and set appropriate error codes*/
|
|
if ((*source)+2 > sourceLimit)
|
|
{
|
|
if ((*source) >= sourceLimit)
|
|
{
|
|
/*Either caller has reached the end of the byte stream*/
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
}
|
|
else if (((*source)+1) == sourceLimit)
|
|
{
|
|
/* a character was cut in half*/
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
}
|
|
|
|
return 0xFFFD;
|
|
}
|
|
|
|
|
|
/*Gets the corresponding codepoint*/
|
|
myUChar = ((uint16_t)*((*source)+1) << 8) |((uint8_t)((**source)));
|
|
/*updates the source*/
|
|
*source += 2;
|
|
return myUChar;
|
|
}
|
|
|
|
UChar T_UConverter_getNextUChar_UTF8(UConverter* converter,
|
|
const char** source,
|
|
const char* sourceLimit,
|
|
UErrorCode* err)
|
|
{
|
|
UChar myUChar;
|
|
/*safe keeps a ptr to the beginning in case we need to step back*/
|
|
char const *sourceInitial = *source;
|
|
uint16_t extraBytesToWrite = 1;
|
|
uint8_t myByte;
|
|
uint32_t ch = 0x00000000;
|
|
int8_t isLegalSequence = 1;
|
|
|
|
/*Input boundary check*/
|
|
if ((*source)+1 > sourceLimit)
|
|
{
|
|
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return 0xFFFD;
|
|
}
|
|
|
|
|
|
extraBytesToWrite = (uint16_t)bytesFromUTF8[(uint8_t)**source];
|
|
|
|
if (extraBytesToWrite > 4) goto CALL_ERROR_FUNCTION;
|
|
|
|
|
|
/*The byte sequence is longer than the buffer area passed*/
|
|
|
|
if ((*source + extraBytesToWrite) > sourceLimit)
|
|
{
|
|
*err = U_TRUNCATED_CHAR_FOUND;
|
|
return 0xFFFD;
|
|
}
|
|
else
|
|
{
|
|
switch(extraBytesToWrite)
|
|
{
|
|
/* note: code falls through cases! (sic)*/
|
|
case 5: ch += *((*source)++); ch <<= 6;
|
|
case 4: ch += (myByte = (uint8_t)*((*source)++)); ch <<= 6;
|
|
if ((myByte & 0xC0) == 0)
|
|
{
|
|
isLegalSequence = 0;
|
|
break;
|
|
}
|
|
case 3: ch += (myByte = *((*source)++)); ch <<= 6;
|
|
if ((myByte & 0xC0) == 0)
|
|
{
|
|
isLegalSequence = 0;
|
|
break;
|
|
}
|
|
case 2: ch += (myByte = *((*source)++)); ch <<= 6;
|
|
if ((myByte & 0xC0) == 0)
|
|
{
|
|
isLegalSequence = 0;
|
|
break;
|
|
}
|
|
case 1: ch += (myByte = *((*source)++)); ch <<= 6;
|
|
if ((myByte & 0xC0) == 0)
|
|
{
|
|
isLegalSequence = 0;
|
|
break;
|
|
}
|
|
case 0: ch += (myByte = *((*source)++));
|
|
if ((myByte & 0xC0) == 0)
|
|
{
|
|
isLegalSequence = 0;
|
|
}
|
|
};
|
|
}
|
|
ch -= offsetsFromUTF8[extraBytesToWrite];
|
|
|
|
|
|
if (isLegalSequence == 0) goto CALL_ERROR_FUNCTION;
|
|
|
|
/*we got a UCS-2 Character*/
|
|
if (ch <= kMaximumUCS2) return (UChar)ch;
|
|
/*character out of bounds*/
|
|
else if (ch >= kMaximumUTF16) goto CALL_ERROR_FUNCTION;
|
|
/*Surrogates found*/
|
|
else
|
|
{
|
|
ch -= halfBase;
|
|
/*stores the 2nd surrogate inside the converter for the next call*/
|
|
converter->UCharErrorBuffer[0] = (UChar)((ch >> halfShift) + kSurrogateHighStart);
|
|
converter->UCharErrorBufferLength = 1;
|
|
|
|
/*returns the 1st surrogate*/
|
|
return (UChar)((ch & halfMask) + kSurrogateLowStart);
|
|
}
|
|
|
|
|
|
CALL_ERROR_FUNCTION:
|
|
{
|
|
/*rewinds source*/
|
|
const char* sourceFinal = *source;
|
|
UChar* myUCharPtr = &myUChar;
|
|
|
|
*err = U_ILLEGAL_CHAR_FOUND;
|
|
*source = sourceInitial;
|
|
|
|
/*It's is very likely that the ErrorFunctor will write to the
|
|
*internal buffers */
|
|
converter->fromCharErrorBehaviour(converter,
|
|
&myUCharPtr,
|
|
myUCharPtr + 1,
|
|
&sourceFinal,
|
|
sourceLimit,
|
|
NULL,
|
|
TRUE,
|
|
err);
|
|
|
|
/*makes the internal caching transparent to the user*/
|
|
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
|
|
|
|
return myUChar;
|
|
}
|
|
}
|