ICU-469 small speed improvement for UTF-8
X-SVN-Rev: 2009
This commit is contained in:
parent
c778f68dd7
commit
1e5e8a4f36
@ -62,7 +62,7 @@ static const int8_t bytesFromUTF8[256] = {
|
|||||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};
|
//static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};
|
||||||
|
|
||||||
U_CFUNC void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
|
U_CFUNC void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
|
||||||
UErrorCode * err)
|
UErrorCode * err)
|
||||||
@ -86,6 +86,7 @@ U_CFUNC void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
|
|||||||
converter->toULength = 0;
|
converter->toULength = 0;
|
||||||
|
|
||||||
ch = converter->mode; /*Stores the previously calculated ch from a previous call*/
|
ch = converter->mode; /*Stores the previously calculated ch from a previous call*/
|
||||||
|
converter->mode = 0;
|
||||||
goto morebytes;
|
goto morebytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -131,9 +132,9 @@ morebytes:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
converter->mode = ch; /* stores a partially calculated target*/
|
|
||||||
converter->toUnicodeStatus = inBytes;
|
converter->toUnicodeStatus = inBytes;
|
||||||
converter->toULength = (int8_t)i;
|
converter->toULength = (int8_t)i;
|
||||||
|
converter->mode = ch; /* stores a partially calculated target*/
|
||||||
}
|
}
|
||||||
goto donefornow;
|
goto donefornow;
|
||||||
}
|
}
|
||||||
@ -171,9 +172,6 @@ morebytes:
|
|||||||
const char* saveSource = args->source;
|
const char* saveSource = args->source;
|
||||||
|
|
||||||
*err = U_ILLEGAL_CHAR_FOUND;
|
*err = U_ILLEGAL_CHAR_FOUND;
|
||||||
converter->toULength = 0;
|
|
||||||
converter->mode = 0;
|
|
||||||
converter->toUnicodeStatus = 0;
|
|
||||||
converter->invalidCharLength = (int8_t)i;
|
converter->invalidCharLength = (int8_t)i;
|
||||||
if (i > 0)
|
if (i > 0)
|
||||||
{
|
{
|
||||||
@ -184,9 +182,9 @@ morebytes:
|
|||||||
printf("inBytes %d\n, converter->invalidCharLength = %d,\n mySource[mySourceIndex]=%X\n",
|
printf("inBytes %d\n, converter->invalidCharLength = %d,\n mySource[mySourceIndex]=%X\n",
|
||||||
inBytes, converter->invalidCharLength, mySource[mySourceIndex]);
|
inBytes, converter->invalidCharLength, mySource[mySourceIndex]);
|
||||||
#endif
|
#endif
|
||||||
/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */
|
/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */
|
||||||
args->target = myTarget + myTargetIndex;
|
|
||||||
args->source = (const char*) mySource + mySourceIndex;
|
args->source = (const char*) mySource + mySourceIndex;
|
||||||
|
args->target = myTarget + myTargetIndex;
|
||||||
ToU_CALLBACK_MACRO(converter->toUContext,
|
ToU_CALLBACK_MACRO(converter->toUContext,
|
||||||
args,
|
args,
|
||||||
converter->invalidCharBuffer,
|
converter->invalidCharBuffer,
|
||||||
@ -194,9 +192,9 @@ morebytes:
|
|||||||
UCNV_ILLEGAL,
|
UCNV_ILLEGAL,
|
||||||
err);
|
err);
|
||||||
/* restore the state in case the callback changed it */
|
/* restore the state in case the callback changed it */
|
||||||
|
converter->toUnicodeStatus = 0;
|
||||||
converter->toULength = 0;
|
converter->toULength = 0;
|
||||||
converter->mode = 0;
|
converter->mode = 0;
|
||||||
converter->toUnicodeStatus = 0;
|
|
||||||
args->source = saveSource;
|
args->source = saveSource;
|
||||||
args->target = saveTarget;
|
args->target = saveTarget;
|
||||||
|
|
||||||
@ -223,6 +221,7 @@ U_CFUNC void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs
|
|||||||
{
|
{
|
||||||
const unsigned char *mySource = (unsigned char *) args->source;
|
const unsigned char *mySource = (unsigned char *) args->source;
|
||||||
UChar *myTarget = args->target;
|
UChar *myTarget = args->target;
|
||||||
|
int32_t *myOffsets = args->offsets;
|
||||||
UConverter *converter = args->converter;
|
UConverter *converter = args->converter;
|
||||||
int32_t mySourceIndex = 0;
|
int32_t mySourceIndex = 0;
|
||||||
int32_t myTargetIndex = 0;
|
int32_t myTargetIndex = 0;
|
||||||
@ -248,7 +247,7 @@ U_CFUNC void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs
|
|||||||
ch = mySource[mySourceIndex++];
|
ch = mySource[mySourceIndex++];
|
||||||
if (ch < 0x80) /* Simple case */
|
if (ch < 0x80) /* Simple case */
|
||||||
{
|
{
|
||||||
args->offsets[myTargetIndex] = mySourceIndex - 1;
|
myOffsets[myTargetIndex] = mySourceIndex - 1;
|
||||||
myTarget[myTargetIndex++] = (UChar) ch;
|
myTarget[myTargetIndex++] = (UChar) ch;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -289,7 +288,7 @@ morebytes:
|
|||||||
|
|
||||||
if (i == inBytes && ch <= MAXIMUM_UTF16)
|
if (i == inBytes && ch <= MAXIMUM_UTF16)
|
||||||
{
|
{
|
||||||
args->offsets[myTargetIndex] = mySourceIndex - inBytes;
|
myOffsets[myTargetIndex] = mySourceIndex - inBytes;
|
||||||
if (ch <= MAXIMUM_UCS2)
|
if (ch <= MAXIMUM_UCS2)
|
||||||
{
|
{
|
||||||
myTarget[myTargetIndex++] = (UChar) ch;
|
myTarget[myTargetIndex++] = (UChar) ch;
|
||||||
@ -301,7 +300,7 @@ morebytes:
|
|||||||
ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
|
ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
|
||||||
if (myTargetIndex < targetLength)
|
if (myTargetIndex < targetLength)
|
||||||
{
|
{
|
||||||
args->offsets[myTargetIndex] = mySourceIndex - inBytes;
|
myOffsets[myTargetIndex] = mySourceIndex - inBytes;
|
||||||
myTarget[myTargetIndex++] = (char)ch;
|
myTarget[myTargetIndex++] = (char)ch;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -314,18 +313,22 @@ morebytes:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int32_t currentOffset = args->offsets[myTargetIndex-1];
|
int32_t currentOffset = myOffsets[myTargetIndex - 1];
|
||||||
int32_t My_i = myTargetIndex;
|
int32_t My_i = myTargetIndex;
|
||||||
UChar* saveTarget = args->target;
|
UChar* saveTarget = args->target;
|
||||||
const char* saveSource = args->source;
|
const char* saveSource = args->source;
|
||||||
int32_t* saveOffsets = args->offsets;
|
int32_t* saveOffsets = myOffsets;
|
||||||
|
|
||||||
*err = U_ILLEGAL_CHAR_FOUND;
|
*err = U_ILLEGAL_CHAR_FOUND;
|
||||||
converter->invalidCharLength = (int8_t)i;
|
converter->invalidCharLength = (int8_t)i;
|
||||||
|
if (i > 0)
|
||||||
|
{
|
||||||
|
uprv_memcpy(converter->invalidCharBuffer, converter->toUBytes, i);
|
||||||
|
}
|
||||||
|
|
||||||
args->target = myTarget + myTargetIndex;
|
args->target = myTarget + myTargetIndex;
|
||||||
args->source = (const char*)mySource + mySourceIndex;
|
args->source = (const char*)mySource + mySourceIndex;
|
||||||
args->offsets = args->offsets?args->offsets+myTargetIndex:0;
|
myOffsets = myOffsets ? (myOffsets + myTargetIndex) : 0;
|
||||||
|
|
||||||
/* To do HSYS: more smarts here, including offsets */
|
/* To do HSYS: more smarts here, including offsets */
|
||||||
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(converter->toUContext,
|
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(converter->toUContext,
|
||||||
@ -335,6 +338,9 @@ morebytes:
|
|||||||
UCNV_UNASSIGNED,
|
UCNV_UNASSIGNED,
|
||||||
err);
|
err);
|
||||||
|
|
||||||
|
converter->toUnicodeStatus = 0;
|
||||||
|
converter->toULength = 0;
|
||||||
|
converter->mode = 0;
|
||||||
args->source = saveSource;
|
args->source = saveSource;
|
||||||
args->target = saveTarget;
|
args->target = saveTarget;
|
||||||
|
|
||||||
@ -469,6 +475,7 @@ U_CFUNC void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeA
|
|||||||
{
|
{
|
||||||
const UChar *mySource = args->source;
|
const UChar *mySource = args->source;
|
||||||
unsigned char *myTarget = (unsigned char *) args->target;
|
unsigned char *myTarget = (unsigned char *) args->target;
|
||||||
|
int32_t *myOffsets = args->offsets;
|
||||||
int32_t mySourceIndex = 0;
|
int32_t mySourceIndex = 0;
|
||||||
int32_t myTargetIndex = 0;
|
int32_t myTargetIndex = 0;
|
||||||
int32_t targetLength = args->targetLimit - (char *) myTarget;
|
int32_t targetLength = args->targetLimit - (char *) myTarget;
|
||||||
@ -494,16 +501,16 @@ U_CFUNC void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeA
|
|||||||
|
|
||||||
if (ch < 0x80) /* Single byte */
|
if (ch < 0x80) /* Single byte */
|
||||||
{
|
{
|
||||||
args->offsets[myTargetIndex] = mySourceIndex-1;
|
myOffsets[myTargetIndex] = mySourceIndex-1;
|
||||||
myTarget[myTargetIndex++] = (char) ch;
|
myTarget[myTargetIndex++] = (char) ch;
|
||||||
}
|
}
|
||||||
else if (ch < 0x800) /* Double byte */
|
else if (ch < 0x800) /* Double byte */
|
||||||
{
|
{
|
||||||
args->offsets[myTargetIndex] = mySourceIndex-1;
|
myOffsets[myTargetIndex] = mySourceIndex-1;
|
||||||
myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0);
|
myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0);
|
||||||
if (myTargetIndex < targetLength)
|
if (myTargetIndex < targetLength)
|
||||||
{
|
{
|
||||||
args->offsets[myTargetIndex] = mySourceIndex-1;
|
myOffsets[myTargetIndex] = mySourceIndex-1;
|
||||||
myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80);
|
myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -553,7 +560,7 @@ lowsurogate:
|
|||||||
{
|
{
|
||||||
if (myTargetIndex < targetLength)
|
if (myTargetIndex < targetLength)
|
||||||
{
|
{
|
||||||
args->offsets[myTargetIndex] = mySourceIndex-1;
|
myOffsets[myTargetIndex] = mySourceIndex-1;
|
||||||
myTarget[myTargetIndex++] = temp[i];
|
myTarget[myTargetIndex++] = temp[i];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
Loading…
Reference in New Issue
Block a user