/* ******************************************************************************** * * * COPYRIGHT: * * (C) Copyright International Business Machines Corporation, 1998 * * Licensed Material - Program-Property of IBM - All Rights Reserved. * * US Government Users Restricted Rights - Use, duplication, or disclosure * * restricted by GSA ADP Schedule Contract with IBM Corp. * * * ******************************************************************************** * * * uconv_cnv.c: * Implements all the low level conversion functions * T_UnicodeConverter_{to,from}Unicode_$ConversionType * */ #include "utypes.h" #include "uhash.h" #include "ucmp16.h" #include "ucmp8.h" #include "ucnv_bld.h" #include "ucnv_err.h" #include "ucnv_cnv.h" #include "ucnv.h" #include "cmemory.h" #ifdef Debug #include #endif void flushInternalUnicodeBuffer (UConverter * _this, UChar * myTarget, int32_t * myTargetIndex, int32_t targetLength, int32_t** offsets, UErrorCode * err); void flushInternalCharBuffer (UConverter * _this, char *myTarget, int32_t * myTargetIndex, int32_t targetLength, int32_t** offsets, UErrorCode * err); #define FromU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\ else \ { \ char *myTargetCopy = myTarget + myTargetIndex; \ const UChar *mySourceCopy = mySource + mySourceIndex; \ /*copies current values for the ErrorFunctor to update */ \ /*Calls the ErrorFunctor */ \ _this->fromUCharErrorBehaviour (_this, \ (char **) &myTargetCopy, \ targetLimit, \ (const UChar **) &mySourceCopy, \ sourceLimit, \ offsets, \ flush, \ err); \ /*Update the local Indexes so that the conversion can restart at the right points */ \ mySourceIndex = (mySourceCopy - mySource) ; \ myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \ } #define ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \ else \ { \ UChar *myTargetCopy = myTarget + myTargetIndex; \ const char *mySourceCopy = mySource + mySourceIndex; \ /*Calls the ErrorFunctor */ \ _this->fromCharErrorBehaviour (_this, \ &myTargetCopy, \ targetLimit, \ (const char **) &mySourceCopy, \ sourceLimit, \ offsets, \ flush, \ err); \ /*Update the local Indexes so that the conversion can restart at the right points */ \ mySourceIndex = ((char*)mySourceCopy - (char*)mySource); \ myTargetIndex = (myTargetCopy - myTarget); \ } #define FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\ else \ { \ char *myTargetCopy = myTarget + myTargetIndex; \ const UChar *mySourceCopy = mySource + mySourceIndex; \ int32_t My_i = myTargetIndex; \ /*copies current values for the ErrorFunctor to update */ \ /*Calls the ErrorFunctor */ \ _this->fromUCharErrorBehaviour (_this, \ (char **) &myTargetCopy, \ targetLimit, \ (const UChar **) &mySourceCopy, \ sourceLimit, \ offsets + myTargetIndex, \ flush, \ err); \ /*Update the local Indexes so that the conversion can restart at the right points */ \ mySourceIndex = mySourceCopy - mySource ; \ myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \ for (;My_i < myTargetIndex;My_i++) offsets[My_i] += currentOffset ; \ } #define ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \ else \ { \ UChar *myTargetCopy = myTarget + myTargetIndex; \ const char *mySourceCopy = mySource + mySourceIndex; \ int32_t My_i = myTargetIndex; \ _this->fromCharErrorBehaviour (_this, \ &myTargetCopy, \ targetLimit, \ (const char **) &mySourceCopy, \ sourceLimit, \ offsets + myTargetIndex, \ flush, \ err); \ /*Update the local Indexes so that the conversion can restart at the right points */ \ mySourceIndex = (char *)mySourceCopy - (char*)mySource; \ myTargetIndex = ((UChar*)myTargetCopy - (UChar*)myTarget); \ for (;My_i < myTargetIndex;My_i++) {offsets[My_i] += currentOffset ; } \ } /* UTF-8 Conversion DATA * for more information see Unicode Strandard 2.0 , Transformation Formats Appendix A-9 */ const uint32_t kReplacementCharacter = 0x0000FFFD; const uint32_t kMaximumUCS2 = 0x0000FFFF; const uint32_t kMaximumUTF16 = 0x0010FFFF; const uint32_t kMaximumUCS4 = 0x7FFFFFFF; const int8_t halfShift = 10; const uint32_t halfBase = 0x0010000; const uint32_t halfMask = 0x3FF; const uint32_t kSurrogateHighStart = 0xD800; const uint32_t kSurrogateHighEnd = 0xDBFF; const uint32_t kSurrogateLowStart = 0xDC00; const uint32_t kSurrogateLowEnd = 0xDFFF; const uint32_t offsetsFromUTF8[7] = {0, (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080 }; #define ESC_2022 0x1B /*ESC*/ typedef enum { INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/ VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/ VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/ VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/ } UCNV_TableStates_2022; /*Below are the 3 arrays depicting a state transition table*/ int8_t normalize_esq_chars_2022[256] = { 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,0 ,0 ,2 ,0 ,0 ,0 ,0 ,3 ,0 ,6 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12 ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,0 ,0 ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0}; #define MAX_STATES_2022 54 int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = { 1 ,34 ,36 ,39 ,1093 ,1096 ,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106 ,1109 ,1154 ,1157 ,1160 ,1161 ,1254 ,1257 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940 ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,40133 ,40136 ,40138 ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630 ,35947631 ,35947635 ,35947636 ,35947638}; const char* escSeqStateTable_Result_2022[MAX_STATES_2022] = { NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-895" ,"ibm-943" ,"latin1" ,"latin1" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,NULL ,"UTF8" ,NULL ,"ibm-955" ,"bm-367" ,"ibm-952" ,"ibm-949" ,"ibm-953" ,"ibm-1383" ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"}; UCNV_TableStates_2022 escSeqStateTable_Value_2022[MAX_STATES_2022] = { VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022}; /*for 2022 looks ahead in the stream *to determine the longest possible convertible *data stream*/ static const char* getEndOfBuffer_2022(const char* source, const char* sourceLimit, bool_t flush); /*runs through a state machine to determine the escape sequence - codepage correspondance *changes the pointer pointed to be _this->extraInfo*/ static void changeState_2022(UConverter* _this, const char** source, const char* sourceLimit, bool_t flush, UErrorCode* err); UCNV_TableStates_2022 getKey_2022(char source, int32_t* key, int32_t* offset); /* END OF UTF-8 Conversion DATA */ const int8_t bytesFromUTF8[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 }; const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC}; #define missingCharMarker 0xFFFF #define missingUCharMarker 0xFFFD void T_UConverter_toUnicode_SBCS (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { char *mySource = (char *) *source; UChar *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - (char *) mySource; UChar *myToUnicode = NULL; UChar targetUniChar = 0x0000; myToUnicode = _this->sharedData->table->sbcs.toUnicode; while (mySourceIndex < sourceLength) { /*writing the UniChar to the output stream */ if (myTargetIndex < targetLength) { /*gets the corresponding UniChar */ targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]]; if (targetUniChar != missingUCharMarker) { /* writes the UniChar to the output stream */ myTarget[myTargetIndex++] = targetUniChar; } else { *err = U_INVALID_CHAR_FOUND; _this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1]; _this->invalidCharLength = 1; ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidCharLength = 0; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex; return; } void T_UConverter_toUnicode_DBCS (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const char *mySource = ( char *) *source; UChar *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - (char *) mySource; CompactShortArray *myToUnicode = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; myToUnicode = _this->sharedData->table->dbcs.toUnicode; while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { /*gets the corresponding UniChar */ mySourceChar = (unsigned char) mySource[mySourceIndex++]; /*We have no internal state, we should */ if (_this->toUnicodeStatus == 0x00) { _this->toUnicodeStatus = (unsigned char) mySourceChar; } else { if (_this->toUnicodeStatus != 0x00) { mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | (mySourceChar & 0x00FF)); _this->toUnicodeStatus = 0x00; } targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); /*writing the UniChar to the output stream */ if (targetUniChar != missingUCharMarker) { /*writes the UniChar to the output stream */ myTarget[myTargetIndex++] = targetUniChar; } else { *err = U_INVALID_CHAR_FOUND; _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); _this->invalidCharBuffer[1] = (char) mySourceChar; _this->invalidCharLength = 2; ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidCharLength = 0; } } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } /*If at the end of conversion we are still carrying state information *flush is TRUE, we can deduce that the input stream is truncated */ if ((flush == TRUE) && (mySourceIndex == sourceLength) && (_this->toUnicodeStatus != 0x00)) { if (U_SUCCESS(*err)) { *err = U_TRUNCATED_CHAR_FOUND; _this->toUnicodeStatus = 0x00; } } *target += myTargetIndex; *source += mySourceIndex; return; } void T_UConverter_toUnicode_LATIN_1 (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { unsigned char *mySource = (unsigned char *) *source; UChar *myTarget = *target; int32_t sourceLength = sourceLimit - (char *) mySource; int32_t readLen = 0; int32_t i = 0; /*Since there is no risk of encountering illegal Chars *we need to pad our latin1 chars to create Unicode codepoints *we need to go as far a min(targetLen, sourceLen) *in case we don't have enough buffer space *we set the error flag accordingly */ if ((targetLimit - *target) < sourceLength) { readLen = targetLimit - *target; *err = U_INDEX_OUTOFBOUNDS_ERROR; } else { readLen = sourceLimit - (char *) mySource; } for (i = 0; i < readLen; i++) myTarget[i] = (UChar) mySource[i]; *target += i; *source += i; return; } void T_UConverter_fromUnicode_LATIN_1 (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; unsigned char *myTarget = (unsigned char *) *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - (char *) myTarget; int32_t sourceLength = sourceLimit - mySource; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { if (mySource[mySourceIndex] < 0x0100) { /*writes the char to the output stream */ myTarget[myTargetIndex++] = (char) mySource[mySourceIndex++]; } else { *err = U_INVALID_CHAR_FOUND; _this->invalidUCharBuffer[0] = (UChar) mySource[mySourceIndex++]; _this->invalidUCharLength = 1; /* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ FromU_CALLBACK_MACRO(_this, (char *)myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidUCharLength = 0; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex;; return; } void T_UConverter_fromUnicode_SBCS (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; unsigned char *myTarget = (unsigned char *) *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - (char *) myTarget; int32_t sourceLength = sourceLimit - mySource; CompactByteArray *myFromUnicode; unsigned char targetChar = 0x00; myFromUnicode = _this->sharedData->table->sbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) { targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]); if (myTargetIndex < targetLength) { mySourceIndex++; if (targetChar != 0 || !mySource[mySourceIndex - 1]) { /*writes the char to the output stream */ myTarget[myTargetIndex++] = targetChar; } else { *err = U_INVALID_CHAR_FOUND; _this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1]; _this->invalidUCharLength = 1; /* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ FromU_CALLBACK_MACRO(_this, (char *)myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) { break; } _this->invalidUCharLength = 0; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex; return; } void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const char *mySource = *source; UChar *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; CompactShortArray *myToUnicode = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; int32_t myMode = _this->mode; myToUnicode = _this->sharedData->table->dbcs.toUnicode; while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { /*gets the corresponding UniChar */ mySourceChar = (unsigned char) (mySource[mySourceIndex++]); if (mySourceChar == UCNV_SI) myMode = UCNV_SI; else if (mySourceChar == UCNV_SO) myMode = UCNV_SO; else if ((myMode == UCNV_SO) && (_this->toUnicodeStatus == 0x00)) { _this->toUnicodeStatus = (unsigned char) mySourceChar; } else { /*In case there is a state, we update the source char *by concatenating the previous char with the current *one */ if (_this->toUnicodeStatus != 0x00) { mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); _this->toUnicodeStatus = 0x00; } else mySourceChar &= 0x00FF; /*gets the corresponding Unicode codepoint */ targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); /*writing the UniChar to the output stream */ if (targetUniChar != missingUCharMarker) { /*writes the UniChar to the output stream */ myTarget[myTargetIndex++] = targetUniChar; } else { *err = U_INVALID_CHAR_FOUND; if (mySourceChar > 0xff) { _this->invalidCharLength = 2; _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); _this->invalidCharBuffer[1] = (char) mySourceChar; } else { _this->invalidCharLength = 1; _this->invalidCharBuffer[0] = (char) mySourceChar; } _this->mode = myMode; ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidCharLength = 0; } } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } /*If at the end of conversion we are still carrying state information *flush is TRUE, we can deduce that the input stream is truncated */ if (_this->toUnicodeStatus && (mySourceIndex == sourceLength) && (flush == TRUE)) { if (U_SUCCESS(*err)) { *err = U_TRUNCATED_CHAR_FOUND; _this->toUnicodeStatus = 0x00; } } *target += myTargetIndex; *source += mySourceIndex; _this->mode = myMode; return; } void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const char *mySource = *source; UChar *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; CompactShortArray *myToUnicode = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; int32_t myMode = _this->mode; int32_t* originalOffsets = offsets; myToUnicode = _this->sharedData->table->dbcs.toUnicode; while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { /*gets the corresponding UniChar */ mySourceChar = (unsigned char) (mySource[mySourceIndex++]); if (mySourceChar == UCNV_SI) myMode = UCNV_SI; else if (mySourceChar == UCNV_SO) myMode = UCNV_SO; else if ((myMode == UCNV_SO) && (_this->toUnicodeStatus == 0x00)) { _this->toUnicodeStatus = (unsigned char) mySourceChar; } else { /*In case there is a state, we update the source char *by concatenating the previous char with the current *one */ if (_this->toUnicodeStatus != 0x00) { mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); _this->toUnicodeStatus = 0x00; } else mySourceChar &= 0x00FF; /*gets the corresponding Unicode codepoint */ targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); /*writing the UniChar to the output stream */ if (targetUniChar != missingUCharMarker) { /*writes the UniChar to the output stream */ { if(myMode == UCNV_SO) offsets[myTargetIndex] = mySourceIndex-2; /* double byte */ else offsets[myTargetIndex] = mySourceIndex-1; /* single byte */ } myTarget[myTargetIndex++] = targetUniChar; } else { int32_t currentOffset = offsets[myTargetIndex-1] + 2;/* Because mySourceIndex was already incremented */ *err = U_INVALID_CHAR_FOUND; if (mySourceChar > 0xFF) { _this->invalidCharLength = 2; _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); _this->invalidCharBuffer[1] = (char) mySourceChar; } else { _this->invalidCharLength = 1; _this->invalidCharBuffer[0] = (char) mySourceChar; } _this->mode = myMode; ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidCharLength = 0; } } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } /*If at the end of conversion we are still carrying state information *flush is TRUE, we can deduce that the input stream is truncated */ if (_this->toUnicodeStatus && (mySourceIndex == sourceLength) && (flush == TRUE)) { if (U_SUCCESS(*err)) { *err = U_TRUNCATED_CHAR_FOUND; _this->toUnicodeStatus = 0x00; } } *target += myTargetIndex; *source += mySourceIndex; _this->mode = myMode; return; } void T_UConverter_toUnicode_MBCS (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const char *mySource = *source; UChar *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; CompactShortArray *myToUnicode = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; bool_t *myStarters = NULL; myToUnicode = _this->sharedData->table->mbcs.toUnicode; myStarters = _this->sharedData->table->mbcs.starters; while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { /*gets the corresponding UniChar */ mySourceChar = (unsigned char) (mySource[mySourceIndex++]); if (myStarters[(uint8_t) mySourceChar] && (_this->toUnicodeStatus == 0x00)) { _this->toUnicodeStatus = (unsigned char) mySourceChar; } else { /*In case there is a state, we update the source char *by concatenating the previous char with the current *one */ if (_this->toUnicodeStatus != 0x00) { mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); _this->toUnicodeStatus = 0x00; } /*gets the corresponding Unicode codepoint */ targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); /*writing the UniChar to the output stream */ if (targetUniChar != missingUCharMarker) { myTarget[myTargetIndex++] = targetUniChar; } else { *err = U_INVALID_CHAR_FOUND; if (mySourceChar > 0xff) { _this->invalidCharLength = 2; _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); _this->invalidCharBuffer[1] = (char) mySourceChar; } else { _this->invalidCharLength = 1; _this->invalidCharBuffer[0] = (char) mySourceChar; } ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidCharLength = 0; } } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } /*If at the end of conversion we are still carrying state information *flush is TRUE, we can deduce that the input stream is truncated */ if (_this->toUnicodeStatus && (mySourceIndex == sourceLength) && (flush == TRUE)) { if (U_SUCCESS(*err)) { *err = U_TRUNCATED_CHAR_FOUND; _this->toUnicodeStatus = 0x00; } } *target += myTargetIndex; *source += mySourceIndex; return; } void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const char *mySource = *source; UChar *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; CompactShortArray *myToUnicode = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; UChar oldMySourceChar; bool_t *myStarters = NULL; int32_t* originalOffsets = offsets; myToUnicode = _this->sharedData->table->mbcs.toUnicode; myStarters = _this->sharedData->table->mbcs.starters; while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { /*gets the corresponding UniChar */ mySourceChar = (unsigned char) (mySource[mySourceIndex++]); if (myStarters[(uint8_t) mySourceChar] && (_this->toUnicodeStatus == 0x00)) { _this->toUnicodeStatus = (unsigned char) mySourceChar; } else { /*In case there is a state, we update the source char *by concatenating the previous char with the current *one */ if (_this->toUnicodeStatus != 0x00) { mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); _this->toUnicodeStatus = 0x00; } /*gets the corresponding Unicode codepoint */ targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); /*writing the UniChar to the output stream */ if (targetUniChar != missingUCharMarker) { /*writes the UniChar to the output stream */ { if (targetUniChar > 0x00FF) offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */ else offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */ } myTarget[myTargetIndex++] = targetUniChar; oldMySourceChar = mySourceChar; } else { int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1); *err = U_INVALID_CHAR_FOUND; if (mySourceChar > 0xff) { _this->invalidCharLength = 2; _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); _this->invalidCharBuffer[1] = (char) mySourceChar; } else { _this->invalidCharLength = 1; _this->invalidCharBuffer[0] = (char) mySourceChar; } ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidCharLength = 0; } } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } /*If at the end of conversion we are still carrying state information *flush is TRUE, we can deduce that the input stream is truncated */ if (_this->toUnicodeStatus && (mySourceIndex == sourceLength) && (flush == TRUE)) { if (U_SUCCESS(*err)) { *err = U_TRUNCATED_CHAR_FOUND; _this->toUnicodeStatus = 0x00; } } *target += myTargetIndex; *source += mySourceIndex; return; } void T_UConverter_fromUnicode_EBCDIC_STATEFUL (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; char *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; CompactShortArray *myFromUnicode = NULL; UChar targetUniChar = 0x0000; int8_t targetUniCharByteNum = 0; UChar mySourceChar = 0x0000; bool_t isTargetUCharDBCS = (bool_t)_this->fromUnicodeStatus; bool_t oldIsTargetUCharDBCS = isTargetUCharDBCS; myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { mySourceChar = (UChar) mySource[mySourceIndex++]; targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); oldIsTargetUCharDBCS = isTargetUCharDBCS; isTargetUCharDBCS = (targetUniChar>0x00FF); if (targetUniChar != missingCharMarker) { if (oldIsTargetUCharDBCS != isTargetUCharDBCS) { if (isTargetUCharDBCS) myTarget[myTargetIndex++] = UCNV_SO; else myTarget[myTargetIndex++] = UCNV_SI; if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength)) { _this->charErrorBuffer[0] = (char) targetUniChar; _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } else if (myTargetIndex+1 >= targetLength) { _this->charErrorBuffer[0] = (char) (targetUniChar >> 8); _this->charErrorBuffer[1] = (char) targetUniChar & 0x00FF; _this->charErrorBufferLength = 2; *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } if (!isTargetUCharDBCS) { myTarget[myTargetIndex++] = (char) targetUniChar; } else { myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); if (myTargetIndex < targetLength) { myTarget[myTargetIndex++] = (char) targetUniChar; } else { _this->charErrorBuffer[0] = (char) targetUniChar; _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } } else { isTargetUCharDBCS = oldIsTargetUCharDBCS; *err = U_INVALID_CHAR_FOUND; _this->invalidUCharBuffer[0] = (UChar) mySourceChar; _this->invalidUCharLength = 1; _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; FromU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidUCharLength = 0; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex; _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; return; } void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; char *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; CompactShortArray *myFromUnicode = NULL; UChar targetUniChar = 0x0000; int8_t targetUniCharByteNum = 0; UChar mySourceChar = 0x0000; bool_t isTargetUCharDBCS = (bool_t)_this->fromUnicodeStatus; bool_t oldIsTargetUCharDBCS = isTargetUCharDBCS; int32_t* originalOffsets = offsets; myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { mySourceChar = (UChar) mySource[mySourceIndex++]; targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); oldIsTargetUCharDBCS = isTargetUCharDBCS; isTargetUCharDBCS = (targetUniChar>0x00FF); if (targetUniChar != missingCharMarker) { if (oldIsTargetUCharDBCS != isTargetUCharDBCS) { offsets[myTargetIndex] = mySourceIndex-1; if (isTargetUCharDBCS) myTarget[myTargetIndex++] = UCNV_SO; else myTarget[myTargetIndex++] = UCNV_SI; if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength)) { _this->charErrorBuffer[0] = (char) targetUniChar; _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } else if (myTargetIndex+1 >= targetLength) { _this->charErrorBuffer[0] = (char) (targetUniChar >> 8); _this->charErrorBuffer[1] = (char) targetUniChar & 0x00FF; _this->charErrorBufferLength = 2; *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } if (!isTargetUCharDBCS) { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = (char) targetUniChar; } else { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); if (myTargetIndex < targetLength) { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = (char) targetUniChar; } else { _this->charErrorBuffer[0] = (char) targetUniChar; _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } } else { int32_t currentOffset = offsets[myTargetIndex-1]+1; *err = U_INVALID_CHAR_FOUND; _this->invalidUCharBuffer[0] = (UChar) mySourceChar; _this->invalidUCharLength = 1; /* Breaks out of the loop since behaviour was set to stop */ _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidUCharLength = 0; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex;; _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; return; } void T_UConverter_fromUnicode_MBCS (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; char *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; CompactShortArray *myFromUnicode = NULL; UChar targetUniChar = 0x0000; int8_t targetUniCharByteNum = 0; UChar mySourceChar = 0x0000; myFromUnicode = _this->sharedData->table->mbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { mySourceChar = (UChar) mySource[mySourceIndex++]; targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); if (targetUniChar != missingCharMarker) { if (targetUniChar <= 0x00FF) { myTarget[myTargetIndex++] = (char) targetUniChar; } else { myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); if (myTargetIndex < targetLength) { myTarget[myTargetIndex++] = (char) targetUniChar; } else { _this->charErrorBuffer[0] = (char) targetUniChar; _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } } else { *err = U_INVALID_CHAR_FOUND; _this->invalidUCharBuffer[0] = (UChar) mySourceChar; _this->invalidUCharLength = 1; FromU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidUCharLength = 0; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex;; return; } void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; char *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; CompactShortArray *myFromUnicode = NULL; UChar targetUniChar = 0x0000; int8_t targetUniCharByteNum = 0; UChar mySourceChar = 0x0000; int32_t* originalOffsets = offsets; myFromUnicode = _this->sharedData->table->mbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { mySourceChar = (UChar) mySource[mySourceIndex++]; targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); if (targetUniChar != missingCharMarker) { if (targetUniChar <= 0x00FF) { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = (char) targetUniChar; } else { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); if (myTargetIndex < targetLength) { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = (char) targetUniChar; } else { _this->charErrorBuffer[0] = (char) targetUniChar; _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } } else { int32_t currentOffset = mySourceIndex -1; int32_t* offsetsAnchor = offsets; *err = U_INVALID_CHAR_FOUND; _this->invalidUCharBuffer[0] = (UChar) mySourceChar; _this->invalidUCharLength = 1; FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidUCharLength = 0; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex;; return; } void T_UConverter_fromUnicode_ISO_2022(UConverter* _this, char** target, const char* targetLimit, const UChar** source, const UChar* sourceLimit, int32_t *offsets, bool_t flush, UErrorCode* err) { char const* targetStart = *target; T_UConverter_fromUnicode_UTF8(_this, target, targetLimit, source, sourceLimit, NULL, flush, err); } void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverter* _this, char** target, const char* targetLimit, const UChar** source, const UChar* sourceLimit, int32_t *offsets, bool_t flush, UErrorCode* err) { char const* targetStart = *target; T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC(_this, target, targetLimit, source, sourceLimit, offsets, flush, err); { int32_t len = *target - targetStart; int32_t i; /* icu_memmove(offsets+3, offsets, len); MEMMOVE SEEMS BROKEN --srl */ for(i=len-1;i>=0;i--) offsets[i] = offsets[i]; } } UCNV_TableStates_2022 getKey_2022(char c, int32_t* key, int32_t* offset) { int32_t togo = *key; int32_t low = 0; int32_t hi = MAX_STATES_2022; int32_t oldmid; if (*key == 0) togo = normalize_esq_chars_2022[c]; else { togo <<= 5; togo += normalize_esq_chars_2022[c]; } while (hi != low) /*binary search*/ { register int32_t mid = (hi+low) >> 1; /*Finds median*/ if (mid == oldmid) break; if (escSeqStateTable_Key_2022[mid] > togo) hi = mid; else if (escSeqStateTable_Key_2022[mid] < togo) low = mid; else /*we found it*/ { *key = togo; *offset = mid; #ifdef Debug printf("found at @ %d\n", mid); #endif /*Debug*/ return escSeqStateTable_Value_2022[mid]; } oldmid = mid; } #ifdef Debug printf("Could not find \"%d\" for %X\n", togo, c); #endif /*Debug*/ *key = 0; *offset = 0; return INVALID_2022; } void changeState_2022(UConverter* _this, const char** source, const char* sourceLimit, bool_t flush, UErrorCode* err) { UConverter* myUConverter; uint32_t key = _this->toUnicodeStatus; UCNV_TableStates_2022 value; UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo); const char* chosenConverterName = NULL; int32_t offset; /*Close the old Converter*/ if (_this->mode == UCNV_SO) ucnv_close(myData2022->currentConverter); myData2022->currentConverter = NULL; _this->mode = UCNV_SI; /*In case we were in the process of consuming an escape sequence we need to reprocess it */ do { #ifdef Debug printf("Pre Stage: char = %x, key = %d, value =%d\n", **source, key, value); fflush(stdout); #endif /*Debug*/ /* Needed explicit cast for key on MVS to make compiler happy - JJD */ value = getKey_2022(**source,(int32_t *) &key, &offset); #ifdef Debug printf("Post Stage: char = %x, key = %d, value =%d\n", **source, key, value); fflush(stdout); #endif /*Debug*/ switch (value) { case VALID_NON_TERMINAL_2022 : { #ifdef Debug puts("VALID_NON_TERMINAL_2022"); #endif /*Debug*/ };break; case VALID_TERMINAL_2022: { #ifdef Debug puts("VALID_TERMINAL_2022"); #endif /*Debug*/ chosenConverterName = escSeqStateTable_Result_2022[offset]; key = 0; goto DONE; };break; case INVALID_2022: { #ifdef Debug puts("INVALID_2022"); #endif /*Debug*/ _this->toUnicodeStatus = 0; *err = U_ILLEGAL_CHAR_FOUND; return; } case VALID_MAYBE_TERMINAL_2022: { const char* mySource = (*source + 1); int32_t myKey = key; UCNV_TableStates_2022 myValue = value; int32_t myOffset; #ifdef Debug puts("VALID_MAYBE_TERMINAL_2022"); #endif /*Debug*/ while ((mySource < sourceLimit) && ((myValue == VALID_MAYBE_TERMINAL_2022)||(myValue == VALID_NON_TERMINAL_2022))) { #ifdef Debug printf("MAYBE value = %d myKey = %d %X\n", myValue, myKey, *mySource); #endif /*Debug*/ myValue = getKey_2022(*(mySource++), &myKey, &myOffset); } #ifdef Debug printf("myValue = %d\n", myValue); #endif /*Debug*/ switch (myValue) { case INVALID_2022: { /*Backs off*/ #ifdef Debug puts("VALID_MAYBE_TERMINAL INVALID"); printf("offset = %d\n", offset); #endif /*Debug*/ chosenConverterName = escSeqStateTable_Result_2022[offset]; value = VALID_TERMINAL_2022; #ifdef Debug printf("%d\n", offset); fflush(stdout); #endif /*Debug*/ goto DONE; };break; case VALID_TERMINAL_2022: { /*uses longer escape sequence*/ #ifdef Debug puts("VALID_MAYBE_TERMINAL TERMINAL"); #endif /*Debug*/ *source = mySource-1; /*deals with the overshot in the while above*/ chosenConverterName = escSeqStateTable_Result_2022[myOffset]; key = 0; value = VALID_TERMINAL_2022; goto DONE; };break; case VALID_NON_TERMINAL_2022: #ifdef Debug puts("VALID_MAYBE_TERMINAL NON_TERMINAL"); #endif /*Debug*/ case VALID_MAYBE_TERMINAL_2022: { #ifdef Debug puts("VALID_MAYBE_TERMINAL MAYBE_TERMINAL"); #endif /*Debug*/ if (flush) { /*Backs off*/ chosenConverterName = escSeqStateTable_Result_2022[offset]; value = VALID_TERMINAL_2022; key = 0; goto DONE; } else { key = myKey; value = VALID_NON_TERMINAL_2022; } };break; };break; };break; } } while ((*source)++ <= sourceLimit); DONE: _this->toUnicodeStatus = key; if ((value == VALID_NON_TERMINAL_2022) || (value == VALID_MAYBE_TERMINAL_2022)) { #ifdef Debug printf("Out: current **source = %X", **source); #endif return; } if (value > 0) myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err); { #ifdef Debug printf("Error = %d open \"%s\"\n", *err, chosenConverterName); #endif /*Debug*/ if (U_SUCCESS(*err)) { /*Customize the converter with the attributes set on the 2022 converter*/ myUConverter->fromUCharErrorBehaviour = _this->fromUCharErrorBehaviour; myUConverter->fromCharErrorBehaviour = _this->fromCharErrorBehaviour; icu_memcpy(myUConverter->subChar, _this->subChar, myUConverter->subCharLen = _this->subCharLen); _this->mode = UCNV_SO; } } return; } /*Checks the first 3 characters of the buffer against valid 2022 escape sequences *if the match we return a pointer to the initial start of the sequence otherwise *we return sourceLimit */ const char* getEndOfBuffer_2022(const char* source, const char* sourceLimit, bool_t flush) { const char* mySource = source; if (source >= sourceLimit) return sourceLimit; do { if (*mySource == ESC_2022) { int8_t i; int32_t key = 0; int32_t offset; UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022; for (i=0; (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022); i++) { value = getKey_2022(*(mySource+i), &key, &offset); #ifdef Debug printf("Look ahead value = %d\n", value); #endif /*Debug*/ } if (value > 0) return mySource; if ((value == VALID_NON_TERMINAL_2022)&&(!flush) ) return sourceLimit; } } while (mySource++ < sourceLimit); return sourceLimit; } void T_UConverter_toUnicode_ISO_2022(UConverter* _this, UChar** target, const UChar* targetLimit, const char** source, const char* sourceLimit, int32_t *offsets, bool_t flush, UErrorCode* err) { int32_t base = 0; const char* mySourceLimit; char const* sourceStart; /*Arguments Check*/ if (U_FAILURE(*err)) return; if ((_this == NULL) || (targetLimit < *target) || (sourceLimit < *source)) { *err = U_ILLEGAL_ARGUMENT_ERROR; return; } for (;;) { mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, flush); /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ if (_this->mode == UCNV_SO) /*Already doing some conversion*/ { const UChar* myTargetStart = *target; #ifdef Debug printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); #endif /*Debug*/ ucnv_toUnicode(((UConverterDataISO2022*)(_this->extraInfo))->currentConverter, target, targetLimit, source, mySourceLimit, NULL, flush, err); #ifdef Debug puts("---------------------------> CONVERTED"); printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); printf("err =%d", *err); #endif /*Debug*/ } /*-Done with buffer with entire buffer -Error while converting */ if (U_FAILURE(*err) || (*source == sourceLimit)) return; #ifdef Debug puts("Got Here!"); fflush(stdout); #endif /*Debug*/ sourceStart = *source; changeState_2022(_this, source, sourceLimit, flush, err); (*source)++; } return; } void T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverter* _this, UChar** target, const UChar* targetLimit, const char** source, const char* sourceLimit, int32_t *offsets, bool_t flush, UErrorCode* err) { int32_t myOffset=0; int32_t base = 0; const char* mySourceLimit; char const* sourceStart; /*Arguments Check*/ if (U_FAILURE(*err)) return; if ((_this == NULL) || (targetLimit < *target) || (sourceLimit < *source)) { *err = U_ILLEGAL_ARGUMENT_ERROR; return; } for (;;) { mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, flush); /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ if (_this->mode == UCNV_SO) /*Already doing some conversion*/ { const UChar* myTargetStart = *target; #ifdef Debug printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); #endif /*Debug*/ ucnv_toUnicode(((UConverterDataISO2022*)(_this->extraInfo))->currentConverter, target, targetLimit, source, mySourceLimit, offsets, flush, err); { int32_t lim = *target - myTargetStart; int32_t i = 0; for (i=base; i < lim;i++) offsets[i] += myOffset; base += lim; } #ifdef Debug puts("---------------------------> CONVERTED"); printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); printf("err =%d", *err); #endif /*Debug*/ } /*-Done with buffer with entire buffer -Error while converting */ if (U_FAILURE(*err) || (*source == sourceLimit)) return; #ifdef Debug puts("Got Here!"); fflush(stdout); #endif /*Debug*/ sourceStart = *source; changeState_2022(_this, source, sourceLimit, flush, err); (*source)++; myOffset += *source - sourceStart; } return; } void T_UConverter_fromUnicode_DBCS (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; unsigned char *myTarget = (unsigned char *) *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - (char *) myTarget; int32_t sourceLength = sourceLimit - mySource; CompactShortArray *myFromUnicode = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { mySourceChar = (UChar) mySource[mySourceIndex++]; /*Gets the corresponding codepoint */ targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); if (targetUniChar != missingCharMarker) { /*writes the char to the output stream */ myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); if (myTargetIndex < targetLength) { myTarget[myTargetIndex++] = (char) targetUniChar; } else { _this->charErrorBuffer[0] = (char) targetUniChar; _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } else { *err = U_INVALID_CHAR_FOUND; _this->invalidUCharBuffer[0] = (UChar) mySourceChar; _this->invalidUCharLength = 1; /* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ FromU_CALLBACK_MACRO(_this, (char *)myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidUCharLength = 0; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex;; return; } void T_UConverter_fromUnicode_UTF8 (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; unsigned char *myTarget = (unsigned char *) *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - (char *) myTarget; int32_t sourceLength = sourceLimit - mySource; int8_t targetCharByteNum = 0; UChar mySourceChar = 0x0000; uint32_t ch; int16_t i, bytesToWrite = 0; uint32_t ch2; char temp[4]; if (_this->fromUnicodeStatus) { ch = _this->fromUnicodeStatus; _this->fromUnicodeStatus = 0; goto lowsurogate; } while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { bytesToWrite = 0; ch = mySource[mySourceIndex++]; if (ch < 0x80) /* Single byte */ { myTarget[myTargetIndex++] = (char) ch; } else if (ch < 0x800) /* Double byte */ { myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0); if (myTargetIndex < targetLength) { myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80); } else { _this->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80); _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } else /* Check for surogates */ { if ((ch >= kSurrogateHighStart) && (ch <= kSurrogateHighEnd)) { lowsurogate: if (mySourceIndex < sourceLength && !flush) { ch2 = mySource[mySourceIndex]; if ((ch2 >= kSurrogateLowStart) && (ch2 <= kSurrogateLowEnd)) { ch = ((ch - kSurrogateHighStart) << halfShift) + (ch2 - kSurrogateLowStart) + halfBase; ++mySourceIndex; } } } if (ch < 0x10000) { bytesToWrite = 3; temp[0] = (char) ((ch >> 12) | 0xe0); temp[1] = (char) ((ch >> 6) & 0x3f | 0x80); temp[2] = (char) (ch & 0x3f | 0x80); } else { bytesToWrite = 4; temp[0] = (char) ((ch >> 18) | 0xf0); temp[1] = (char) ((ch >> 12) & 0x3f | 0xe0); temp[2] = (char) ((ch >> 6) & 0x3f | 0x80); temp[3] = (char) (ch & 0x3f | 0x80); } for (i = 0; i < bytesToWrite; i++) { if (myTargetIndex < targetLength) { myTarget[myTargetIndex++] = temp[i]; } else { _this->charErrorBuffer[_this->charErrorBufferLength++] = temp[i]; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex; return; } void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; unsigned char *myTarget = (unsigned char *) *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - (char *) myTarget; int32_t sourceLength = sourceLimit - mySource; int8_t targetCharByteNum = 0; UChar mySourceChar = 0x0000; uint32_t ch; int16_t i, bytesToWrite = 0; uint32_t ch2; char temp[4]; if (_this->fromUnicodeStatus) { ch = _this->fromUnicodeStatus; _this->fromUnicodeStatus = 0; goto lowsurogate; } while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { bytesToWrite = 0; ch = mySource[mySourceIndex++]; if (ch < 0x80) /* Single byte */ { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = (char) ch; } else if (ch < 0x800) /* Double byte */ { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0); if (myTargetIndex < targetLength) { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80); } else { _this->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80); _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } else /* Check for surogates */ { if ((ch >= kSurrogateHighStart) && (ch <= kSurrogateHighEnd)) { lowsurogate: if (mySourceIndex < sourceLength && !flush) { ch2 = mySource[mySourceIndex]; if ((ch2 >= kSurrogateLowStart) && (ch2 <= kSurrogateLowEnd)) { ch = ((ch - kSurrogateHighStart) << halfShift) + (ch2 - kSurrogateLowStart) + halfBase; ++mySourceIndex; } } } if (ch < 0x10000) { bytesToWrite = 3; temp[0] = (char) ((ch >> 12) | 0xe0); temp[1] = (char) ((ch >> 6) & 0x3f | 0x80); temp[2] = (char) (ch & 0x3f | 0x80); } else { bytesToWrite = 4; temp[0] = (char) ((ch >> 18) | 0xf0); temp[1] = (char) ((ch >> 12) & 0x3f | 0xe0); temp[2] = (char) ((ch >> 6) & 0x3f | 0x80); temp[3] = (char) (ch & 0x3f | 0x80); } for (i = 0; i < bytesToWrite; i++) { if (myTargetIndex < targetLength) { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = temp[i]; } else { _this->charErrorBuffer[_this->charErrorBufferLength++] = temp[i]; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex; return; } void T_UConverter_fromUnicode_UTF16_BE (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; unsigned char *myTarget = (unsigned char *) *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - (char *) myTarget; int32_t sourceLength = sourceLimit - mySource; UChar mySourceChar; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { mySourceChar = (UChar) mySource[mySourceIndex++]; myTarget[myTargetIndex++] = (char) (mySourceChar >> 8); if (myTargetIndex < targetLength) { myTarget[myTargetIndex++] = (char) mySourceChar; } else { _this->charErrorBuffer[0] = (char) mySourceChar; _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex;; return; } void T_UConverter_fromUnicode_UTF16_LE (UConverter * _this, char **target, const char *targetLimit, const UChar ** source, const UChar * sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const UChar *mySource = *source; unsigned char *myTarget = (unsigned char *) *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - (char *) myTarget; int32_t sourceLength = sourceLimit - mySource; UChar mySourceChar; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { mySourceChar = (UChar) mySource[mySourceIndex++]; myTarget[myTargetIndex++] = (char) mySourceChar; if (myTargetIndex < targetLength) { myTarget[myTargetIndex++] = (char) (mySourceChar >> 8); } else { _this->charErrorBuffer[0] = (char) (mySourceChar >> 8); _this->charErrorBufferLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } *target += myTargetIndex; *source += mySourceIndex;; return; } void T_UConverter_toUnicode_UTF16_BE (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const unsigned char *mySource = (unsigned char *) *source; UChar *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - (char *) mySource; UChar mySourceChar = 0x0000; UChar oldmySourceChar = 0x0000; while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { /*gets the corresponding UChar */ mySourceChar = (unsigned char) mySource[mySourceIndex++]; oldmySourceChar = mySourceChar; if (_this->toUnicodeStatus == 0) { _this->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar; } else { if (_this->toUnicodeStatus != 0xFFFF) mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | mySourceChar); _this->toUnicodeStatus = 0; myTarget[myTargetIndex++] = mySourceChar; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } if (U_SUCCESS(*err) && flush && (mySourceIndex == sourceLength) && (_this->toUnicodeStatus != 0x00)) { if (U_SUCCESS(*err)) { *err = U_TRUNCATED_CHAR_FOUND; _this->toUnicodeStatus = 0x00; } } *target += myTargetIndex; *source += mySourceIndex; return; } void T_UConverter_toUnicode_UTF16_LE (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const unsigned char *mySource = (unsigned char *) *source; UChar *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - (char *) mySource; CompactShortArray *myToUnicode = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { /*gets the corresponding UniChar */ mySourceChar = (unsigned char) mySource[mySourceIndex++]; if (_this->toUnicodeStatus == 0x00) { _this->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar; } else { if (_this->toUnicodeStatus == 0xFFFF) mySourceChar = (UChar) (mySourceChar << 8); else { mySourceChar <<= 8; mySourceChar |= (UChar) (_this->toUnicodeStatus); } _this->toUnicodeStatus = 0x00; myTarget[myTargetIndex++] = mySourceChar; } } else { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } if (U_SUCCESS(*err) && flush && (mySourceIndex == sourceLength) && (_this->toUnicodeStatus != 0x00)) { if (U_SUCCESS(*err)) { *err = U_TRUNCATED_CHAR_FOUND; _this->toUnicodeStatus = 0x00; } } *target += myTargetIndex; *source += mySourceIndex; return; } void T_UConverter_toUnicode_UTF8 (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const unsigned char *mySource = (unsigned char *) *source; UChar *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - (char *) mySource; uint32_t ch = 0 , ch2 =0 , i =0; /* Index into the current # of bytes consumed in the current sequence */ uint32_t inBytes = 0; /* Total number of bytes in the current UTF8 sequence */ if (_this->toUnicodeStatus) { i = _this->invalidCharLength; /* restore # of bytes consumed */ inBytes = _this->toUnicodeStatus; /* Restore size of current sequence */ ch = _this->mode; /*Stores the previously calculated ch from a previous call*/ _this->toUnicodeStatus = 0; _this->invalidCharLength = 0; goto morebytes; } while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { ch = 0; ch = ((uint32_t)mySource[mySourceIndex++]) & 0x000000FF; if (ch < 0x80) /* Simple case */ { myTarget[myTargetIndex++] = (UChar) ch; } else { /* store the first char */ inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */ _this->invalidCharBuffer[0] = (char)ch; i = 1; morebytes: for (; i < inBytes; i++) { { if (mySourceIndex >= sourceLength) { if (flush) { if (U_SUCCESS(*err)) { *err = U_TRUNCATED_CHAR_FOUND; _this->toUnicodeStatus = 0x00; } } else { _this->toUnicodeStatus = inBytes; _this->invalidCharLength = (int8_t)i; } goto donefornow; } _this->invalidCharBuffer[i] = (char) (ch2 = (((uint32_t)mySource[mySourceIndex++]) & 0x000000FF)); if ((ch2 & 0xC0) != 0x80) /* Invalid trailing byte */ break; } ch <<= 6; ch += ch2; } ch -= offsetsFromUTF8[inBytes]; if (i == inBytes && ch <= kMaximumUTF16) { if (ch <= kMaximumUCS2) { myTarget[myTargetIndex++] = (UChar) ch; } else { ch -= halfBase; myTarget[myTargetIndex++] = (UChar) ((ch >> halfShift) + kSurrogateHighStart); ch = (ch & halfMask) + kSurrogateLowStart; if (myTargetIndex < targetLength) { myTarget[myTargetIndex++] = (char)ch; } else { _this->invalidUCharBuffer[0] = (UChar) ch; _this->invalidUCharLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } } else { *err = U_ILLEGAL_CHAR_FOUND; _this->invalidCharLength = (int8_t)i; #ifdef Debug printf("inbytes %d\n, _this->invalidCharLength = %d,\n mySource[mySourceIndex]=%X\n", inBytes, _this->invalidCharLength, mySource[mySourceIndex]); #endif /* Needed explicit cast for mySource on MVS to make compiler happy - JJD */ ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, (const char *)mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidCharLength = 0; } } } else /* End of target buffer */ { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } donefornow: *target += myTargetIndex; *source += mySourceIndex; _this->mode = ch; /*stores a partially calculated target*/ } void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this, UChar ** target, const UChar * targetLimit, const char **source, const char *sourceLimit, int32_t *offsets, bool_t flush, UErrorCode * err) { const unsigned char *mySource = (unsigned char *) *source; UChar *myTarget = *target; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - (char *) mySource; uint32_t ch = 0, ch2 = 0, i = 0; uint32_t inBytes = 0; int32_t* originalOffsets = offsets; if (_this->toUnicodeStatus) { i = _this->invalidCharLength; inBytes = _this->toUnicodeStatus; _this->toUnicodeStatus = 0; ch = _this->mode; goto morebytes; } while (mySourceIndex < sourceLength) { if (myTargetIndex < targetLength) { ch = mySource[mySourceIndex++]; if (ch < 0x80) /* Simple case */ { offsets[myTargetIndex] = mySourceIndex-1; myTarget[myTargetIndex++] = (UChar) ch; } else { inBytes = bytesFromUTF8[ch]; _this->invalidCharBuffer[0] = (char)ch; i = 1; morebytes: for (; i < inBytes; i++) { { if (mySourceIndex >= sourceLength) { if (flush) { if (U_SUCCESS(*err)) { *err = U_TRUNCATED_CHAR_FOUND; _this->toUnicodeStatus = 0x00; } } else { _this->toUnicodeStatus = inBytes; _this->invalidCharLength = (int8_t)i; } goto donefornow; } _this->invalidCharBuffer[i] = (char) (ch2 = mySource[mySourceIndex++]); if ((ch2 & 0xC0) != 0x80) /* Invalid trailing byte */ break; } ch <<= 6; ch += ch2; } ch -= offsetsFromUTF8[inBytes]; if (i == inBytes && ch <= kMaximumUTF16) { if (ch <= kMaximumUCS2) { offsets[myTargetIndex] = mySourceIndex-3; myTarget[myTargetIndex++] = (UChar) ch; } else { ch -= halfBase; offsets[myTargetIndex] = mySourceIndex-4; myTarget[myTargetIndex++] = (UChar) ((ch >> halfShift) + kSurrogateHighStart); ch = (ch & halfMask) + kSurrogateLowStart; if (myTargetIndex < targetLength) { offsets[myTargetIndex] = mySourceIndex-4; myTarget[myTargetIndex++] = (char)ch; } else { _this->invalidUCharBuffer[0] = (UChar) ch; _this->invalidUCharLength = 1; *err = U_INDEX_OUTOFBOUNDS_ERROR; } } } else { int32_t currentOffset = offsets[myTargetIndex-1]; *err = U_ILLEGAL_CHAR_FOUND; _this->invalidCharLength = (int8_t)i; /* Needed explicit cast for mySource on MVS to make compiler happy - JJD */ ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, (const char *)mySource, mySourceIndex, sourceLimit, offsets, flush, err); if (U_FAILURE (*err)) break; _this->invalidCharLength = 0; } } } else /* End of target buffer */ { *err = U_INDEX_OUTOFBOUNDS_ERROR; break; } } donefornow: *target += myTargetIndex; *source += mySourceIndex; _this->mode = ch; } /*Empties the internal unicode output buffer */ void flushInternalUnicodeBuffer (UConverter * _this, UChar * myTarget, int32_t * myTargetIndex, int32_t targetLength, int32_t** offsets, UErrorCode * err) { int32_t myUCharErrorBufferLength = _this->UCharErrorBufferLength; if (myUCharErrorBufferLength <= targetLength) { /*we have enough space *So we just copy the whole Error Buffer in to the output stream*/ icu_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * myUCharErrorBufferLength); if (offsets) { int32_t i=0; for (i=0; iUCharErrorBufferLength = 0; } else { /* We don't have enough space so we copy as much as we can * on the output stream and update the object * by updating the internal buffer*/ icu_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength); if (offsets) { int32_t i=0; for (i=0; i< targetLength;i++) (*offsets)[i] = -1; *offsets += targetLength; } icu_memmove (_this->UCharErrorBuffer, _this->UCharErrorBuffer + targetLength, sizeof (UChar) * (myUCharErrorBufferLength - targetLength)); _this->UCharErrorBufferLength -= (int8_t) targetLength; *myTargetIndex = targetLength; *err = U_INDEX_OUTOFBOUNDS_ERROR; } return; } /*Empties the internal codepage output buffer */ void flushInternalCharBuffer (UConverter * _this, char *myTarget, int32_t * myTargetIndex, int32_t targetLength, int32_t** offsets, UErrorCode * err) { int32_t myCharErrorBufferLength = _this->charErrorBufferLength; /*we have enough space */ if (myCharErrorBufferLength <= targetLength) { icu_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength); if (offsets) { int32_t i=0; for (i=0; icharErrorBufferLength = 0; } else /* We don't have enough space so we copy as much as we can * on the output stream and update the object*/ { icu_memcpy (myTarget, _this->charErrorBuffer, targetLength); if (offsets) { int32_t i=0; for (i=0; i< targetLength;i++) (*offsets)[i] = -1; *offsets += targetLength; } icu_memmove (_this->charErrorBuffer, _this->charErrorBuffer + targetLength, (myCharErrorBufferLength - targetLength)); _this->charErrorBufferLength -= (int8_t) targetLength; *myTargetIndex = targetLength; *err = U_INDEX_OUTOFBOUNDS_ERROR; } return; } UChar T_UConverter_getNextUChar_SBCS(UConverter* converter, const char** source, const char* sourceLimit, UErrorCode* err) { UChar myUChar; if ((*source)+1 > sourceLimit) { *err = U_INDEX_OUTOFBOUNDS_ERROR; return 0xFFFD; } /*Gets the corresponding codepoint*/ myUChar = converter->sharedData->table->sbcs.toUnicode[(unsigned char)*((*source)++)]; if (myUChar != 0xFFFD) return myUChar; else { UChar* myUCharPtr = &myUChar; const char* sourceFinal = *source; *err = U_INVALID_CHAR_FOUND; /*Calls the ErrorFunctor after rewinding the input buffer*/ (*source)--; /*It's is very likely that the ErrorFunctor will write to the *internal buffers */ converter->fromCharErrorBehaviour(converter, &myUCharPtr, myUCharPtr + 1, &sourceFinal, sourceLimit, NULL, TRUE, err); /*makes the internal caching transparent to the user*/ if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; return myUChar; } } UChar T_UConverter_getNextUChar_LATIN_1(UConverter* converter, const char** source, const char* sourceLimit, UErrorCode* err) { /* Empties the internal buffers if need be * In this case since ErrorFunctors are never called * (LATIN_1 is a subset of Unicode) */ if ((*source)+1 > sourceLimit) { *err = U_INDEX_OUTOFBOUNDS_ERROR; return 0xFFFD; } return (UChar)*((*source)++); } UChar T_UConverter_getNextUChar_ISO_2022(UConverter* converter, const char** source, const char* sourceLimit, UErrorCode* err) { const char* mySourceLimit; /*Arguments Check*/ if (sourceLimit < *source) { *err = U_ILLEGAL_ARGUMENT_ERROR; return 0xFFFD; } for (;;) { mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, TRUE); /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ if (converter->mode == UCNV_SO) /*Already doing some conversion*/ { return ucnv_getNextUChar(((UConverterDataISO2022*)(converter->extraInfo))->currentConverter, source, mySourceLimit, err); } /*-Done with buffer with entire buffer -Error while converting */ changeState_2022(converter, source, sourceLimit, TRUE, err); (*source)++; } return 0xFFFD; } UChar T_UConverter_getNextUChar_DBCS(UConverter* converter, const char** source, const char* sourceLimit, UErrorCode* err) { UChar myUChar; /*Checks boundaries and set appropriate error codes*/ if ((*source)+2 > sourceLimit) { if ((*source) >= sourceLimit) { /*Either caller has reached the end of the byte stream*/ *err = U_INDEX_OUTOFBOUNDS_ERROR; } else if (((*source)+1) == sourceLimit) { /* a character was cut in half*/ *err = U_TRUNCATED_CHAR_FOUND; } return 0xFFFD; } /*Gets the corresponding codepoint*/ myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, ((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))); /*update the input pointer*/ *source += 2; if (myUChar != 0xFFFD) return myUChar; else { UChar* myUCharPtr = &myUChar; const char* sourceFinal = *source; /*Calls the ErrorFunctor after rewinding the input buffer*/ (*source) -= 2; *err = U_INVALID_CHAR_FOUND; /*It's is very likely that the ErrorFunctor will write to the *internal buffers */ converter->fromCharErrorBehaviour(converter, &myUCharPtr, myUCharPtr + 1, &sourceFinal, sourceLimit, NULL, TRUE, err); /*makes the internal caching transparent to the user*/ if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; return myUChar; } } UChar T_UConverter_getNextUChar_MBCS(UConverter* converter, const char** source, const char* sourceLimit, UErrorCode* err) { UChar myUChar; char const *sourceInitial = *source; /*safe keeps a ptr to the beginning in case we need to step back*/ /*Input boundary check*/ if ((*source)+1 > sourceLimit) { *err = U_INDEX_OUTOFBOUNDS_ERROR; return 0xFFFD; } /*Checks to see if the byte is a lead*/ if (converter->sharedData->table->mbcs.starters[(uint8_t)**source] == FALSE) { /*Not lead byte: we update the source ptr and get the codepoint*/ myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode, (UChar)(**source)); (*source)++; } else { /*Lead byte: we Build the codepoint and get the corresponding character * and update the source ptr*/ if ((*source + 2) > sourceLimit) { *err = U_TRUNCATED_CHAR_FOUND; return 0xFFFD; } myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode, ((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))); (*source) += 2; } if (myUChar != 0xFFFD) return myUChar; else { /*rewinds source*/ const char* sourceFinal = *source; UChar* myUCharPtr = &myUChar; *err = U_INVALID_CHAR_FOUND; *source = sourceInitial; /*It's is very likely that the ErrorFunctor will write to the *internal buffers */ converter->fromCharErrorBehaviour(converter, &myUCharPtr, myUCharPtr + 1, &sourceFinal, sourceLimit, NULL, TRUE, err); /*makes the internal caching transparent to the user*/ if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; return myUChar; } } UChar T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverter* converter, const char** source, const char* sourceLimit, UErrorCode* err) { UChar myUChar; char const *sourceInitial = *source; /*safe keeps a ptr to the beginning in case we need to step back*/ /*Input boundary check*/ if ((*source)+1 > sourceLimit) { *err = U_INDEX_OUTOFBOUNDS_ERROR; return 0xFFFD; } /*Checks to see if with have SI/SO shifters if we do we change the mode appropriately and we consume the byte*/ if ((**source == UCNV_SI) || (**source == UCNV_SO)) { converter->mode = **source; (*source)++; /*Rechecks boundary after consuming the shift sequence*/ if ((*source)+1 > sourceLimit) { *err = U_INDEX_OUTOFBOUNDS_ERROR; return 0xFFFD; } } if (converter->mode == UCNV_SI) { /*Not lead byte: we update the source ptr and get the codepoint*/ myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, (UChar)(**source)); (*source)++; } else { /*Lead byte: we Build the codepoint and get the corresponding character * and update the source ptr*/ if ((*source + 2) > sourceLimit) { *err = U_TRUNCATED_CHAR_FOUND; return 0xFFFD; } myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, ((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))); (*source) += 2; } if (myUChar != 0xFFFD) return myUChar; else { /*rewinds source*/ const char* sourceFinal = *source; UChar* myUCharPtr = &myUChar; *err = U_INVALID_CHAR_FOUND; *source = sourceInitial; /*It's is very likely that the ErrorFunctor will write to the *internal buffers */ converter->fromCharErrorBehaviour(converter, &myUCharPtr, myUCharPtr + 1, &sourceFinal, sourceLimit, NULL, TRUE, err); /*makes the internal caching transparent to the user*/ if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; return myUChar; } } UChar T_UConverter_getNextUChar_UTF16_BE(UConverter* converter, const char** source, const char* sourceLimit, UErrorCode* err) { UChar myUChar; /*Checks boundaries and set appropriate error codes*/ if ((*source)+2 > sourceLimit) { if ((*source) >= sourceLimit) { /*Either caller has reached the end of the byte stream*/ *err = U_INDEX_OUTOFBOUNDS_ERROR; } else if (((*source)+1) == sourceLimit) { /* a character was cut in half*/ *err = U_TRUNCATED_CHAR_FOUND; } return 0xFFFD; } /*Gets the corresponding codepoint*/ myUChar = ((uint16_t)((**source)) << 8) |((uint8_t)*((*source)+1)); *source += 2; return myUChar; } UChar T_UConverter_getNextUChar_UTF16_LE(UConverter* converter, const char** source, const char* sourceLimit, UErrorCode* err) { UChar myUChar; /*Checks boundaries and set appropriate error codes*/ if ((*source)+2 > sourceLimit) { if ((*source) >= sourceLimit) { /*Either caller has reached the end of the byte stream*/ *err = U_INDEX_OUTOFBOUNDS_ERROR; } else if (((*source)+1) == sourceLimit) { /* a character was cut in half*/ *err = U_TRUNCATED_CHAR_FOUND; } return 0xFFFD; } /*Gets the corresponding codepoint*/ myUChar = ((uint16_t)*((*source)+1) << 8) |((uint8_t)((**source))); /*updates the source*/ *source += 2; return myUChar; } UChar T_UConverter_getNextUChar_UTF8(UConverter* converter, const char** source, const char* sourceLimit, UErrorCode* err) { UChar myUChar; /*safe keeps a ptr to the beginning in case we need to step back*/ char const *sourceInitial = *source; uint16_t extraBytesToWrite = 1; uint8_t myByte; uint32_t ch = 0x00000000; int8_t isLegalSequence = 1; /*Input boundary check*/ if ((*source)+1 > sourceLimit) { *err = U_INDEX_OUTOFBOUNDS_ERROR; return 0xFFFD; } extraBytesToWrite = (uint16_t)bytesFromUTF8[(uint8_t)**source]; if (extraBytesToWrite > 4) goto CALL_ERROR_FUNCTION; /*The byte sequence is longer than the buffer area passed*/ if ((*source + extraBytesToWrite) > sourceLimit) { *err = U_TRUNCATED_CHAR_FOUND; return 0xFFFD; } else { switch(extraBytesToWrite) { /* note: code falls through cases! (sic)*/ case 5: ch += *((*source)++); ch <<= 6; case 4: ch += (myByte = (uint8_t)*((*source)++)); ch <<= 6; if ((myByte & 0xC0) == 0) { isLegalSequence = 0; break; } case 3: ch += (myByte = *((*source)++)); ch <<= 6; if ((myByte & 0xC0) == 0) { isLegalSequence = 0; break; } case 2: ch += (myByte = *((*source)++)); ch <<= 6; if ((myByte & 0xC0) == 0) { isLegalSequence = 0; break; } case 1: ch += (myByte = *((*source)++)); ch <<= 6; if ((myByte & 0xC0) == 0) { isLegalSequence = 0; break; } case 0: ch += (myByte = *((*source)++)); if ((myByte & 0xC0) == 0) { isLegalSequence = 0; } }; } ch -= offsetsFromUTF8[extraBytesToWrite]; if (isLegalSequence == 0) goto CALL_ERROR_FUNCTION; /*we got a UCS-2 Character*/ if (ch <= kMaximumUCS2) return (UChar)ch; /*character out of bounds*/ else if (ch >= kMaximumUTF16) goto CALL_ERROR_FUNCTION; /*Surrogates found*/ else { ch -= halfBase; /*stores the 2nd surrogate inside the converter for the next call*/ converter->UCharErrorBuffer[0] = (UChar)((ch >> halfShift) + kSurrogateHighStart); converter->UCharErrorBufferLength = 1; /*returns the 1st surrogate*/ return (UChar)((ch & halfMask) + kSurrogateLowStart); } CALL_ERROR_FUNCTION: { /*rewinds source*/ const char* sourceFinal = *source; UChar* myUCharPtr = &myUChar; *err = U_ILLEGAL_CHAR_FOUND; *source = sourceInitial; /*It's is very likely that the ErrorFunctor will write to the *internal buffers */ converter->fromCharErrorBehaviour(converter, &myUCharPtr, myUCharPtr + 1, &sourceFinal, sourceLimit, NULL, TRUE, err); /*makes the internal caching transparent to the user*/ if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; return myUChar; } }