ICU-5195 Fix codepoint handling that stradle buffers.
X-SVN-Rev: 19614
This commit is contained in:
parent
e1412ad342
commit
dd2917fc9b
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2005, International Business Machines
|
||||
* Copyright (C) 2002-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_u32.c
|
||||
@ -51,9 +51,10 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
|
||||
unsigned char *toUBytes = args->converter->toUBytes;
|
||||
uint32_t ch, i;
|
||||
|
||||
/* UTF-8 returns here for only non-offset, this needs to change.*/
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
||||
ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
|
||||
args->converter->toUnicodeStatus = 0;
|
||||
@ -131,8 +132,10 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
|
||||
uint32_t ch, i;
|
||||
int32_t offsetNum = 0;
|
||||
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
||||
ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
|
||||
args->converter->toUnicodeStatus = 0;
|
||||
@ -510,10 +513,11 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
|
||||
unsigned char *toUBytes = args->converter->toUBytes;
|
||||
uint32_t ch, i;
|
||||
|
||||
/* UTF-8 returns here for only non-offset, this needs to change.*/
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
|
||||
{
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
||||
/* Stores the previously calculated ch from a previous call*/
|
||||
ch = args->converter->toUnicodeStatus - 1;
|
||||
@ -596,10 +600,11 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
|
||||
uint32_t ch, i;
|
||||
int32_t offsetNum = 0;
|
||||
|
||||
/* UTF-8 returns here for only non-offset, this needs to change.*/
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
|
||||
{
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
||||
/* Stores the previously calculated ch from a previous call*/
|
||||
ch = args->converter->toUnicodeStatus - 1;
|
||||
|
@ -699,6 +699,39 @@ static void TestRegressionUTF8(){
|
||||
}
|
||||
free(standardForm);
|
||||
free(utf8);
|
||||
|
||||
{
|
||||
static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 };
|
||||
static const UChar expected[] = { 0x0301, 0x0300 };
|
||||
UConverter *conv8;
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
UChar pivotBuffer[100];
|
||||
const UChar* const pivEnd = pivotBuffer + 100;
|
||||
const char* srcBeg;
|
||||
const char* srcEnd;
|
||||
UChar* pivBeg;
|
||||
|
||||
conv8 = ucnv_open("UTF-8", &err);
|
||||
|
||||
srcBeg = src8;
|
||||
pivBeg = pivotBuffer;
|
||||
srcEnd = src8 + 3;
|
||||
ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
|
||||
if (srcBeg != srcEnd) {
|
||||
log_err("Did not consume whole buffer on first call.\n");
|
||||
}
|
||||
|
||||
srcEnd = src8 + 4;
|
||||
ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
|
||||
if (srcBeg != srcEnd) {
|
||||
log_err("Did not consume whole buffer on second call.\n");
|
||||
}
|
||||
|
||||
if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
|
||||
log_err("Did not get expected results for UTF-8.\n");
|
||||
}
|
||||
ucnv_close(conv8);
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_UTF32_LEN 1
|
||||
@ -770,6 +803,71 @@ static void TestRegressionUTF32(){
|
||||
expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
|
||||
log_err("u->UTF-32LE\n");
|
||||
}
|
||||
|
||||
{
|
||||
static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 };
|
||||
static const UChar expected[] = { 0x0031, 0x0030 };
|
||||
UConverter *convBE;
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
UChar pivotBuffer[100];
|
||||
const UChar* const pivEnd = pivotBuffer + 100;
|
||||
const char* srcBeg;
|
||||
const char* srcEnd;
|
||||
UChar* pivBeg;
|
||||
|
||||
convBE = ucnv_open("UTF-32BE", &err);
|
||||
|
||||
srcBeg = srcBE;
|
||||
pivBeg = pivotBuffer;
|
||||
srcEnd = srcBE + 5;
|
||||
ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
|
||||
if (srcBeg != srcEnd) {
|
||||
log_err("Did not consume whole buffer on first call.\n");
|
||||
}
|
||||
|
||||
srcEnd = srcBE + 8;
|
||||
ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
|
||||
if (srcBeg != srcEnd) {
|
||||
log_err("Did not consume whole buffer on second call.\n");
|
||||
}
|
||||
|
||||
if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
|
||||
log_err("Did not get expected results for UTF-32BE.\n");
|
||||
}
|
||||
ucnv_close(convBE);
|
||||
}
|
||||
{
|
||||
static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 };
|
||||
static const UChar expected[] = { 0x0031, 0x0030 };
|
||||
UConverter *convLE;
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
UChar pivotBuffer[100];
|
||||
const UChar* const pivEnd = pivotBuffer + 100;
|
||||
const char* srcBeg;
|
||||
const char* srcEnd;
|
||||
UChar* pivBeg;
|
||||
|
||||
convLE = ucnv_open("UTF-32LE", &err);
|
||||
|
||||
srcBeg = srcLE;
|
||||
pivBeg = pivotBuffer;
|
||||
srcEnd = srcLE + 5;
|
||||
ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
|
||||
if (srcBeg != srcEnd) {
|
||||
log_err("Did not consume whole buffer on first call.\n");
|
||||
}
|
||||
|
||||
srcEnd = srcLE + 8;
|
||||
ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
|
||||
if (srcBeg != srcEnd) {
|
||||
log_err("Did not consume whole buffer on second call.\n");
|
||||
}
|
||||
|
||||
if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
|
||||
log_err("Did not get expected results for UTF-32LE.\n");
|
||||
}
|
||||
ucnv_close(convLE);
|
||||
}
|
||||
}
|
||||
|
||||
/*Walk through the available converters*/
|
||||
|
Loading…
Reference in New Issue
Block a user