ICU-507 use common implementation for getNextUChar() where appropriate
X-SVN-Rev: 3261
This commit is contained in:
parent
76dc0104b5
commit
b7c791ad75
@ -986,8 +986,8 @@ UChar32 ucnv_getNextUChar(UConverter * converter,
|
|||||||
UTF_NEXT_CHAR(converter->UCharErrorBuffer, i, sizeof(converter->UCharErrorBuffer), myUChar);
|
UTF_NEXT_CHAR(converter->UCharErrorBuffer, i, sizeof(converter->UCharErrorBuffer), myUChar);
|
||||||
/*In this memmove we update the internal buffer by
|
/*In this memmove we update the internal buffer by
|
||||||
*popping the first character.
|
*popping the first character.
|
||||||
*Note that in the call itself we decrement
|
*Note that in the call itself we decrement
|
||||||
*UCharErrorBufferLength
|
*UCharErrorBufferLength
|
||||||
*/
|
*/
|
||||||
uprv_memmove (converter->UCharErrorBuffer,
|
uprv_memmove (converter->UCharErrorBuffer,
|
||||||
converter->UCharErrorBuffer + i,
|
converter->UCharErrorBuffer + i,
|
||||||
@ -1005,7 +1005,13 @@ UChar32 ucnv_getNextUChar(UConverter * converter,
|
|||||||
args.target = NULL;
|
args.target = NULL;
|
||||||
args.targetLimit = NULL;
|
args.targetLimit = NULL;
|
||||||
args.size = sizeof(args);
|
args.size = sizeof(args);
|
||||||
ch = converter->sharedData->impl->getNextUChar(&args, err);
|
if (converter->sharedData->impl->getNextUChar != NULL)
|
||||||
|
{
|
||||||
|
ch = converter->sharedData->impl->getNextUChar(&args, err);
|
||||||
|
} else {
|
||||||
|
/* default implementation */
|
||||||
|
ch = ucnv_getNextUCharFromToUImpl(&args, converter->sharedData->impl->toUnicode, FALSE, err);
|
||||||
|
}
|
||||||
*source = args.source;
|
*source = args.source;
|
||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
|
@ -152,9 +152,6 @@ U_CFUNC void UConverter_toUnicode_ISO_2022_JP(UConverterToUnicodeArgs* args,
|
|||||||
U_CFUNC void UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
|
U_CFUNC void UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
|
||||||
UErrorCode* err);
|
UErrorCode* err);
|
||||||
|
|
||||||
U_CFUNC UChar32 UConverter_getNextUChar_ISO_2022_JP (UConverterToUnicodeArgs * args,
|
|
||||||
UErrorCode * err);
|
|
||||||
|
|
||||||
/***************** ISO-2022-KR ********************************/
|
/***************** ISO-2022-KR ********************************/
|
||||||
U_CFUNC void UConverter_fromUnicode_ISO_2022_KR(UConverterFromUnicodeArgs* args,
|
U_CFUNC void UConverter_fromUnicode_ISO_2022_KR(UConverterFromUnicodeArgs* args,
|
||||||
UErrorCode* err);
|
UErrorCode* err);
|
||||||
@ -168,9 +165,6 @@ U_CFUNC void UConverter_toUnicode_ISO_2022_KR(UConverterToUnicodeArgs* args,
|
|||||||
U_CFUNC void UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
|
U_CFUNC void UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
|
||||||
UErrorCode* err);
|
UErrorCode* err);
|
||||||
|
|
||||||
U_CFUNC UChar32 UConverter_getNextUChar_ISO_2022_KR (UConverterToUnicodeArgs * args,
|
|
||||||
UErrorCode * err);
|
|
||||||
|
|
||||||
/***************** ISO-2022-CN ********************************/
|
/***************** ISO-2022-CN ********************************/
|
||||||
U_CFUNC void UConverter_fromUnicode_ISO_2022_CN(UConverterFromUnicodeArgs* args,
|
U_CFUNC void UConverter_fromUnicode_ISO_2022_CN(UConverterFromUnicodeArgs* args,
|
||||||
UErrorCode* err);
|
UErrorCode* err);
|
||||||
@ -184,9 +178,6 @@ U_CFUNC void UConverter_toUnicode_ISO_2022_CN(UConverterToUnicodeArgs* args,
|
|||||||
U_CFUNC void UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
|
U_CFUNC void UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
|
||||||
UErrorCode* err);
|
UErrorCode* err);
|
||||||
|
|
||||||
U_CFUNC UChar32 UConverter_getNextUChar_ISO_2022_CN (UConverterToUnicodeArgs * args,
|
|
||||||
UErrorCode * err);
|
|
||||||
|
|
||||||
#define ESC_2022 0x1B /*ESC*/
|
#define ESC_2022 0x1B /*ESC*/
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
@ -416,7 +407,7 @@ static const UConverterImpl _ISO2022JPImpl={
|
|||||||
UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
|
UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
|
||||||
UConverter_fromUnicode_ISO_2022_JP,
|
UConverter_fromUnicode_ISO_2022_JP,
|
||||||
UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
|
UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
|
||||||
UConverter_getNextUChar_ISO_2022_JP,
|
NULL,
|
||||||
|
|
||||||
NULL,
|
NULL,
|
||||||
_ISO2022getName
|
_ISO2022getName
|
||||||
@ -447,7 +438,7 @@ static const UConverterImpl _ISO2022KRImpl={
|
|||||||
UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
|
UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
|
||||||
UConverter_fromUnicode_ISO_2022_KR,
|
UConverter_fromUnicode_ISO_2022_KR,
|
||||||
UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
|
UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
|
||||||
UConverter_getNextUChar_ISO_2022_KR,
|
NULL,
|
||||||
|
|
||||||
NULL,
|
NULL,
|
||||||
_ISO2022getName
|
_ISO2022getName
|
||||||
@ -479,7 +470,7 @@ static const UConverterImpl _ISO2022CNImpl={
|
|||||||
UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
|
UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
|
||||||
UConverter_fromUnicode_ISO_2022_CN,
|
UConverter_fromUnicode_ISO_2022_CN,
|
||||||
UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
|
UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
|
||||||
UConverter_getNextUChar_ISO_2022_CN,
|
NULL,
|
||||||
|
|
||||||
NULL,
|
NULL,
|
||||||
_ISO2022getName
|
_ISO2022getName
|
||||||
@ -1899,43 +1890,6 @@ static void concatChar(UConverterFromUnicodeArgs* args, int32_t *targetIndex, in
|
|||||||
|
|
||||||
/*************** to unicode *******************/
|
/*************** to unicode *******************/
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a simple, interim implementation of GetNextUChar()
|
|
||||||
* that allows to concentrate on testing one single implementation
|
|
||||||
* of the ToUnicode conversion before it gets copied to
|
|
||||||
* multiple version that are then optimized for their needs
|
|
||||||
* (with vs. without offsets and getNextUChar).
|
|
||||||
*/
|
|
||||||
|
|
||||||
U_CFUNC UChar32
|
|
||||||
UConverter_getNextUChar_ISO_2022_JP(UConverterToUnicodeArgs *pArgs,
|
|
||||||
UErrorCode *pErrorCode) {
|
|
||||||
UChar buffer[UTF_MAX_CHAR_LENGTH];
|
|
||||||
const char *realLimit=pArgs->sourceLimit;
|
|
||||||
|
|
||||||
pArgs->target=buffer;
|
|
||||||
pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
|
|
||||||
|
|
||||||
while(pArgs->source<realLimit) {
|
|
||||||
/* feed in one byte at a time to make sure to get only one character out */
|
|
||||||
pArgs->sourceLimit=pArgs->source+1;
|
|
||||||
pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
|
|
||||||
UConverter_toUnicode_ISO_2022_JP(pArgs, pErrorCode);
|
|
||||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
return 0xffff;
|
|
||||||
} else if(pArgs->target!=buffer) {
|
|
||||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
|
||||||
}
|
|
||||||
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, pArgs->target-buffer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* no output because of empty input or only state changes and skipping callbacks */
|
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
||||||
return 0xffff;
|
|
||||||
}
|
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Recognized escape sequences are
|
* Recognized escape sequences are
|
||||||
* <ESC>(B ASCII
|
* <ESC>(B ASCII
|
||||||
@ -3243,43 +3197,6 @@ END_LOOP:
|
|||||||
args->source = mySource;
|
args->source = mySource;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a simple, interim implementation of GetNextUChar()
|
|
||||||
* that allows to concentrate on testing one single implementation
|
|
||||||
* of the ToUnicode conversion before it gets copied to
|
|
||||||
* multiple version that are then optimized for their needs
|
|
||||||
* (with vs. without offsets and getNextUChar).
|
|
||||||
*/
|
|
||||||
|
|
||||||
U_CFUNC UChar32
|
|
||||||
UConverter_getNextUChar_ISO_2022_KR(UConverterToUnicodeArgs *pArgs,
|
|
||||||
UErrorCode *pErrorCode) {
|
|
||||||
UChar buffer[UTF_MAX_CHAR_LENGTH];
|
|
||||||
const char *realLimit=pArgs->sourceLimit;
|
|
||||||
|
|
||||||
pArgs->target=buffer;
|
|
||||||
pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
|
|
||||||
|
|
||||||
while(pArgs->source<realLimit) {
|
|
||||||
/* feed in one byte at a time to make sure to get only one character out */
|
|
||||||
pArgs->sourceLimit=pArgs->source+1;
|
|
||||||
pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
|
|
||||||
UConverter_toUnicode_ISO_2022_KR(pArgs, pErrorCode);
|
|
||||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
return 0xffff;
|
|
||||||
} else if(pArgs->target!=buffer) {
|
|
||||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
|
||||||
}
|
|
||||||
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, pArgs->target-buffer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* no output because of empty input or only state changes and skipping callbacks */
|
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
||||||
return 0xffff;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************** END ISO2022-KR *********************************/
|
/*************************** END ISO2022-KR *********************************/
|
||||||
|
|
||||||
|
|
||||||
@ -4779,40 +4696,3 @@ END_LOOP:
|
|||||||
args->target = myTarget;
|
args->target = myTarget;
|
||||||
args->source = mySource;
|
args->source = mySource;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a simple, interim implementation of GetNextUChar()
|
|
||||||
* that allows to concentrate on testing one single implementation
|
|
||||||
* of the ToUnicode conversion before it gets copied to
|
|
||||||
* multiple version that are then optimized for their needs
|
|
||||||
* (with vs. without offsets and getNextUChar).
|
|
||||||
*/
|
|
||||||
|
|
||||||
U_CFUNC UChar32
|
|
||||||
UConverter_getNextUChar_ISO_2022_CN(UConverterToUnicodeArgs *pArgs,
|
|
||||||
UErrorCode *pErrorCode) {
|
|
||||||
UChar buffer[UTF_MAX_CHAR_LENGTH];
|
|
||||||
const char *realLimit=pArgs->sourceLimit;
|
|
||||||
|
|
||||||
pArgs->target=buffer;
|
|
||||||
pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
|
|
||||||
|
|
||||||
while(pArgs->source<realLimit) {
|
|
||||||
/* feed in one byte at a time to make sure to get only one character out */
|
|
||||||
pArgs->sourceLimit=pArgs->source+1;
|
|
||||||
pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
|
|
||||||
UConverter_toUnicode_ISO_2022_CN(pArgs, pErrorCode);
|
|
||||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
return 0xffff;
|
|
||||||
} else if(pArgs->target!=buffer) {
|
|
||||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
|
||||||
}
|
|
||||||
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, pArgs->target-buffer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* no output because of empty input or only state changes and skipping callbacks */
|
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
||||||
return 0xffff;
|
|
||||||
}
|
|
||||||
|
@ -193,3 +193,51 @@ ucnv_updateCallbackOffsets(int32_t *offsets, int32_t length, int32_t sourceIndex
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is a simple implementation of ucnv_getNextUChar() that uses the
|
||||||
|
* converter's toUnicode() function. See ucnv_cnv.h for details.
|
||||||
|
*/
|
||||||
|
U_CFUNC UChar32
|
||||||
|
ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
|
||||||
|
T_ToUnicodeFunction toU,
|
||||||
|
UBool collectPairs,
|
||||||
|
UErrorCode *pErrorCode) {
|
||||||
|
UChar buffer[UTF_MAX_CHAR_LENGTH];
|
||||||
|
const char *realLimit=pArgs->sourceLimit;
|
||||||
|
|
||||||
|
pArgs->target=buffer;
|
||||||
|
pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
|
||||||
|
|
||||||
|
while(pArgs->source<realLimit) {
|
||||||
|
/* feed in one byte at a time to make sure to get only one character out */
|
||||||
|
pArgs->sourceLimit=pArgs->source+1;
|
||||||
|
pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
|
||||||
|
|
||||||
|
/* convert this byte and check the result */
|
||||||
|
toU(pArgs, pErrorCode);
|
||||||
|
if(U_SUCCESS(*pErrorCode)) {
|
||||||
|
int32_t length=pArgs->target-buffer;
|
||||||
|
|
||||||
|
/* this test is UTF-16 specific */
|
||||||
|
if(/* some output and
|
||||||
|
(source consumed or don't collect surrogate pairs or not a surrogate or a surrogate pair) */
|
||||||
|
length>0 &&
|
||||||
|
(pArgs->flush || !collectPairs || !UTF_IS_FIRST_SURROGATE(buffer[0]) || length==2)
|
||||||
|
) {
|
||||||
|
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, length);
|
||||||
|
}
|
||||||
|
/* else continue with the loop */
|
||||||
|
} else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||||
|
*pErrorCode=U_ZERO_ERROR;
|
||||||
|
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, UTF_MAX_CHAR_LENGTH);
|
||||||
|
} else {
|
||||||
|
/* U_FAILURE() */
|
||||||
|
return 0xffff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* no output because of empty input or only state changes and skipping callbacks */
|
||||||
|
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||||
|
return 0xffff;
|
||||||
|
}
|
||||||
|
@ -260,4 +260,30 @@ ucnv_updateCallbackOffsets(int32_t *offsets, int32_t length, int32_t sourceIndex
|
|||||||
#define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) || (uint32_t)((c)-0xe000)<0x1900 || (uint32_t)((c)-0xf0000)<0x20000)
|
#define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) || (uint32_t)((c)-0xe000)<0x1900 || (uint32_t)((c)-0xf0000)<0x20000)
|
||||||
#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c)
|
#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a simple implementation of ucnv_getNextUChar() that uses the
|
||||||
|
* converter's toUnicode() function.
|
||||||
|
*
|
||||||
|
* \par
|
||||||
|
* A surrogate pair from a single byte sequence is always
|
||||||
|
* combined to a supplementary code point.
|
||||||
|
* A surrogate pair from consecutive byte sequences is only combined
|
||||||
|
* if collectPairs is set. This is necessary for SCSU
|
||||||
|
* but not allowed for most legacy codepages.
|
||||||
|
*
|
||||||
|
* @param pArgs The argument structure supplied by ucnv_getNextUChar()
|
||||||
|
* @param toU A function pointer to the converter's toUnicode() function
|
||||||
|
* @param collectPairs indicates whether separate surrogate results from
|
||||||
|
* consecutive byte sequences should be combined into
|
||||||
|
* a single code point
|
||||||
|
* @param pErrorCode An ICU error code parameter
|
||||||
|
* @return The Unicode code point as a result of a conversion of a minimal
|
||||||
|
* number of input bytes
|
||||||
|
*/
|
||||||
|
U_CFUNC UChar32
|
||||||
|
ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
|
||||||
|
T_ToUnicodeFunction toU,
|
||||||
|
UBool collectPairs,
|
||||||
|
UErrorCode *pErrorCode);
|
||||||
|
|
||||||
#endif /* UCNV_CNV */
|
#endif /* UCNV_CNV */
|
||||||
|
@ -60,9 +60,6 @@ U_CFUNC void UConverter_fromUnicode_HZ(UConverterFromUnicodeArgs *args,
|
|||||||
U_CFUNC void UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs *args,
|
U_CFUNC void UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs *args,
|
||||||
UErrorCode *err);
|
UErrorCode *err);
|
||||||
|
|
||||||
U_CFUNC UChar32 UConverter_getNextUChar_HZ (UConverterToUnicodeArgs *pArgs,
|
|
||||||
UErrorCode *pErrorCode);
|
|
||||||
|
|
||||||
static UConverterImpl _HZImpl={
|
static UConverterImpl _HZImpl={
|
||||||
UCNV_HZ,
|
UCNV_HZ,
|
||||||
|
|
||||||
@ -77,7 +74,7 @@ static UConverterImpl _HZImpl={
|
|||||||
UConverter_toUnicode_HZ_OFFSETS_LOGIC,
|
UConverter_toUnicode_HZ_OFFSETS_LOGIC,
|
||||||
UConverter_fromUnicode_HZ,
|
UConverter_fromUnicode_HZ,
|
||||||
UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
|
UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
|
||||||
UConverter_getNextUChar_HZ,
|
NULL,
|
||||||
|
|
||||||
NULL,
|
NULL,
|
||||||
NULL
|
NULL
|
||||||
@ -998,31 +995,3 @@ CALLBACK:
|
|||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
U_CFUNC UChar32 UConverter_getNextUChar_HZ (UConverterToUnicodeArgs * pArgs,
|
|
||||||
UErrorCode *pErrorCode){
|
|
||||||
UChar buffer[UTF_MAX_CHAR_LENGTH];
|
|
||||||
const char *realLimit=pArgs->sourceLimit;
|
|
||||||
|
|
||||||
pArgs->target=buffer;
|
|
||||||
pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
|
|
||||||
|
|
||||||
while(pArgs->source<realLimit) {
|
|
||||||
/* feed in one byte at a time to make sure to get only one character out */
|
|
||||||
pArgs->sourceLimit=pArgs->source+1;
|
|
||||||
pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
|
|
||||||
UConverter_toUnicode_HZ(pArgs, pErrorCode);
|
|
||||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
return 0xffff;
|
|
||||||
} else if(pArgs->target!=buffer) {
|
|
||||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
|
||||||
}
|
|
||||||
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, pArgs->target-buffer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* no output because of empty input or only state changes and skipping callbacks */
|
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
||||||
return 0xffff;
|
|
||||||
}
|
|
||||||
|
@ -908,73 +908,12 @@ endloop:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a simple, interim implementation of GetNextUChar()
|
|
||||||
* that allows to concentrate on testing one single implementation
|
|
||||||
* of the ToUnicode conversion before it gets copied to
|
|
||||||
* multiple version that are then optimized for their needs
|
|
||||||
* (with vs. without offsets and getNextUChar).
|
|
||||||
* ### TODO: implement this directly similar to ToUnicode()
|
* ### TODO: implement this directly similar to ToUnicode()
|
||||||
*/
|
*/
|
||||||
U_CFUNC UChar32
|
U_CFUNC UChar32
|
||||||
_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
||||||
UErrorCode *pErrorCode) {
|
UErrorCode *pErrorCode) {
|
||||||
UChar buffer[UTF_MAX_CHAR_LENGTH];
|
return ucnv_getNextUCharFromToUImpl(pArgs, _MBCSToUnicode, FALSE, pErrorCode);
|
||||||
const char *realLimit=pArgs->sourceLimit;
|
|
||||||
|
|
||||||
pArgs->target=buffer;
|
|
||||||
pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
|
|
||||||
|
|
||||||
while(pArgs->source<realLimit) {
|
|
||||||
/* feed in one byte at a time to make sure to get only one character out */
|
|
||||||
pArgs->sourceLimit=pArgs->source+1;
|
|
||||||
pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
|
|
||||||
_MBCSToUnicode(pArgs, pErrorCode);
|
|
||||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
return 0xffff;
|
|
||||||
} else {
|
|
||||||
int32_t length=pArgs->target-buffer;
|
|
||||||
#if 0
|
|
||||||
/*
|
|
||||||
* markus 2000-oct-26
|
|
||||||
*
|
|
||||||
* This version of the exit condition is commented out because of
|
|
||||||
* a clarification of the semantics of ucnv_getNextUChar() (see updated javadoc):
|
|
||||||
*
|
|
||||||
* Codepages that provide direct encodings of supplementary Unicode code points (U+10000 and up)
|
|
||||||
* should return single surrogates without combining them into pairs if single surrogates
|
|
||||||
* are encoded. This group of codepages includes UTF-8, UTF-32, and GB 18030.
|
|
||||||
*
|
|
||||||
* Codepages that provide direct encodings only of single surrogates
|
|
||||||
* must attempt to match pairs of them into supplementary code points.
|
|
||||||
* Single surrogates are returned only if they are not part of matched pairs.
|
|
||||||
* This group of codepages includes SCSU, LMBCS, and UTF-16.
|
|
||||||
*
|
|
||||||
* Currently, there is no MBCS codepage in the second group. SCSU, LMBCS, and UTF-16
|
|
||||||
* are implemented with separate code.
|
|
||||||
*
|
|
||||||
* Therefore, this feature is removed here.
|
|
||||||
* It might need to be added back in later when some MBCS codepages are created that
|
|
||||||
* fall into the second group. In this case, a flag in the .cnv file will be necessary
|
|
||||||
* to indicate this. makeconv would need to set this flag based on whether the codepage
|
|
||||||
* contains only mappings for single surrogates but
|
|
||||||
* not directly for any supplementary code points.
|
|
||||||
*/
|
|
||||||
if(/* some output and (source consumed or not a surrogate or a surrogate pair [UTF-16 specific]) */
|
|
||||||
length>0 &&
|
|
||||||
(pArgs->flush || !UTF_IS_FIRST_SURROGATE(buffer[0]) || length==2)
|
|
||||||
#endif
|
|
||||||
if(length>0) {
|
|
||||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
|
||||||
*pErrorCode=U_ZERO_ERROR;
|
|
||||||
}
|
|
||||||
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* no output because of empty input or only state changes and skipping callbacks */
|
|
||||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
|
||||||
return 0xffff;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user