ICU-469 small speed improvement for UTF-8

X-SVN-Rev: 2009
This commit is contained in:
George Rhoten 2000-07-21 20:42:04 +00:00
parent c778f68dd7
commit 1e5e8a4f36

View File

@ -62,7 +62,7 @@ static const int8_t bytesFromUTF8[256] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
}; };
static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC}; //static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};
U_CFUNC void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args, U_CFUNC void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
UErrorCode * err) UErrorCode * err)
@ -86,6 +86,7 @@ U_CFUNC void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
converter->toULength = 0; converter->toULength = 0;
ch = converter->mode; /*Stores the previously calculated ch from a previous call*/ ch = converter->mode; /*Stores the previously calculated ch from a previous call*/
converter->mode = 0;
goto morebytes; goto morebytes;
} }
@ -131,9 +132,9 @@ morebytes:
} }
else else
{ {
converter->mode = ch; /* stores a partially calculated target*/
converter->toUnicodeStatus = inBytes; converter->toUnicodeStatus = inBytes;
converter->toULength = (int8_t)i; converter->toULength = (int8_t)i;
converter->mode = ch; /* stores a partially calculated target*/
} }
goto donefornow; goto donefornow;
} }
@ -171,9 +172,6 @@ morebytes:
const char* saveSource = args->source; const char* saveSource = args->source;
*err = U_ILLEGAL_CHAR_FOUND; *err = U_ILLEGAL_CHAR_FOUND;
converter->toULength = 0;
converter->mode = 0;
converter->toUnicodeStatus = 0;
converter->invalidCharLength = (int8_t)i; converter->invalidCharLength = (int8_t)i;
if (i > 0) if (i > 0)
{ {
@ -184,9 +182,9 @@ morebytes:
printf("inBytes %d\n, converter->invalidCharLength = %d,\n mySource[mySourceIndex]=%X\n", printf("inBytes %d\n, converter->invalidCharLength = %d,\n mySource[mySourceIndex]=%X\n",
inBytes, converter->invalidCharLength, mySource[mySourceIndex]); inBytes, converter->invalidCharLength, mySource[mySourceIndex]);
#endif #endif
/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */ /* Needed explicit cast for mySource on MVS to make compiler happy - JJD */
args->target = myTarget + myTargetIndex;
args->source = (const char*) mySource + mySourceIndex; args->source = (const char*) mySource + mySourceIndex;
args->target = myTarget + myTargetIndex;
ToU_CALLBACK_MACRO(converter->toUContext, ToU_CALLBACK_MACRO(converter->toUContext,
args, args,
converter->invalidCharBuffer, converter->invalidCharBuffer,
@ -194,9 +192,9 @@ morebytes:
UCNV_ILLEGAL, UCNV_ILLEGAL,
err); err);
/* restore the state in case the callback changed it */ /* restore the state in case the callback changed it */
converter->toUnicodeStatus = 0;
converter->toULength = 0; converter->toULength = 0;
converter->mode = 0; converter->mode = 0;
converter->toUnicodeStatus = 0;
args->source = saveSource; args->source = saveSource;
args->target = saveTarget; args->target = saveTarget;
@ -223,6 +221,7 @@ U_CFUNC void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs
{ {
const unsigned char *mySource = (unsigned char *) args->source; const unsigned char *mySource = (unsigned char *) args->source;
UChar *myTarget = args->target; UChar *myTarget = args->target;
int32_t *myOffsets = args->offsets;
UConverter *converter = args->converter; UConverter *converter = args->converter;
int32_t mySourceIndex = 0; int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0; int32_t myTargetIndex = 0;
@ -248,7 +247,7 @@ U_CFUNC void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs
ch = mySource[mySourceIndex++]; ch = mySource[mySourceIndex++];
if (ch < 0x80) /* Simple case */ if (ch < 0x80) /* Simple case */
{ {
args->offsets[myTargetIndex] = mySourceIndex - 1; myOffsets[myTargetIndex] = mySourceIndex - 1;
myTarget[myTargetIndex++] = (UChar) ch; myTarget[myTargetIndex++] = (UChar) ch;
} }
else else
@ -289,7 +288,7 @@ morebytes:
if (i == inBytes && ch <= MAXIMUM_UTF16) if (i == inBytes && ch <= MAXIMUM_UTF16)
{ {
args->offsets[myTargetIndex] = mySourceIndex - inBytes; myOffsets[myTargetIndex] = mySourceIndex - inBytes;
if (ch <= MAXIMUM_UCS2) if (ch <= MAXIMUM_UCS2)
{ {
myTarget[myTargetIndex++] = (UChar) ch; myTarget[myTargetIndex++] = (UChar) ch;
@ -301,7 +300,7 @@ morebytes:
ch = (ch & HALF_MASK) + SURROGATE_LOW_START; ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
if (myTargetIndex < targetLength) if (myTargetIndex < targetLength)
{ {
args->offsets[myTargetIndex] = mySourceIndex - inBytes; myOffsets[myTargetIndex] = mySourceIndex - inBytes;
myTarget[myTargetIndex++] = (char)ch; myTarget[myTargetIndex++] = (char)ch;
} }
else else
@ -314,18 +313,22 @@ morebytes:
} }
else else
{ {
int32_t currentOffset = args->offsets[myTargetIndex-1]; int32_t currentOffset = myOffsets[myTargetIndex - 1];
int32_t My_i = myTargetIndex; int32_t My_i = myTargetIndex;
UChar* saveTarget = args->target; UChar* saveTarget = args->target;
const char* saveSource = args->source; const char* saveSource = args->source;
int32_t* saveOffsets = args->offsets; int32_t* saveOffsets = myOffsets;
*err = U_ILLEGAL_CHAR_FOUND; *err = U_ILLEGAL_CHAR_FOUND;
converter->invalidCharLength = (int8_t)i; converter->invalidCharLength = (int8_t)i;
if (i > 0)
{
uprv_memcpy(converter->invalidCharBuffer, converter->toUBytes, i);
}
args->target = myTarget + myTargetIndex; args->target = myTarget + myTargetIndex;
args->source = (const char*)mySource + mySourceIndex; args->source = (const char*)mySource + mySourceIndex;
args->offsets = args->offsets?args->offsets+myTargetIndex:0; myOffsets = myOffsets ? (myOffsets + myTargetIndex) : 0;
/* To do HSYS: more smarts here, including offsets */ /* To do HSYS: more smarts here, including offsets */
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(converter->toUContext, ToU_CALLBACK_OFFSETS_LOGIC_MACRO(converter->toUContext,
@ -335,6 +338,9 @@ morebytes:
UCNV_UNASSIGNED, UCNV_UNASSIGNED,
err); err);
converter->toUnicodeStatus = 0;
converter->toULength = 0;
converter->mode = 0;
args->source = saveSource; args->source = saveSource;
args->target = saveTarget; args->target = saveTarget;
@ -469,6 +475,7 @@ U_CFUNC void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeA
{ {
const UChar *mySource = args->source; const UChar *mySource = args->source;
unsigned char *myTarget = (unsigned char *) args->target; unsigned char *myTarget = (unsigned char *) args->target;
int32_t *myOffsets = args->offsets;
int32_t mySourceIndex = 0; int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0; int32_t myTargetIndex = 0;
int32_t targetLength = args->targetLimit - (char *) myTarget; int32_t targetLength = args->targetLimit - (char *) myTarget;
@ -494,16 +501,16 @@ U_CFUNC void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeA
if (ch < 0x80) /* Single byte */ if (ch < 0x80) /* Single byte */
{ {
args->offsets[myTargetIndex] = mySourceIndex-1; myOffsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) ch; myTarget[myTargetIndex++] = (char) ch;
} }
else if (ch < 0x800) /* Double byte */ else if (ch < 0x800) /* Double byte */
{ {
args->offsets[myTargetIndex] = mySourceIndex-1; myOffsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0); myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0);
if (myTargetIndex < targetLength) if (myTargetIndex < targetLength)
{ {
args->offsets[myTargetIndex] = mySourceIndex-1; myOffsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80); myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80);
} }
else else
@ -553,7 +560,7 @@ lowsurogate:
{ {
if (myTargetIndex < targetLength) if (myTargetIndex < targetLength)
{ {
args->offsets[myTargetIndex] = mySourceIndex-1; myOffsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = temp[i]; myTarget[myTargetIndex++] = temp[i];
} }
else else