From dc3d5b5d692e16583a47cca60dfd79f0ecd9ae01 Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Tue, 4 Mar 2003 19:21:42 +0000 Subject: [PATCH] ICU-2120 Speed improvements for %g and the char * format specifier X-SVN-Rev: 11236 --- icu4c/source/extra/ustdio/sprintf.c | 75 +++++++++++++++++----------- icu4c/source/extra/ustdio/sscanf.c | 19 +++++-- icu4c/source/extra/ustdio/ufmt_cmn.c | 16 +++--- icu4c/source/extra/ustdio/ufmt_cmn.h | 23 +++++++-- icu4c/source/extra/ustdio/uprintf.c | 74 ++++++++++++++++----------- icu4c/source/extra/ustdio/uscanf.c | 19 +++++-- 6 files changed, 149 insertions(+), 77 deletions(-) diff --git a/icu4c/source/extra/ustdio/sprintf.c b/icu4c/source/extra/ustdio/sprintf.c index 11ea361267..ff4702500b 100644 --- a/icu4c/source/extra/ustdio/sprintf.c +++ b/icu4c/source/extra/ustdio/sprintf.c @@ -325,19 +325,28 @@ u_vsnprintf(UChar *buffer, { int32_t written; UChar *pattern; + UChar patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t size = (int32_t)strlen(patternSpecification) + 1; /* convert from the default codepage to Unicode */ - pattern = ufmt_defaultCPToUnicode(patternSpecification, - (int32_t)strlen(patternSpecification)); - if(pattern == 0) { - return 0; + if (size >= MAX_UCHAR_BUFFER_SIZE(patBuffer)) { + pattern = (UChar *)uprv_malloc(size * sizeof(UChar)); + if(pattern == 0) { + return 0; + } } + else { + pattern = patBuffer; + } + ufmt_defaultCPToUnicode(patternSpecification, size, pattern, size); /* do the work */ written = u_vsnprintf_u(buffer, count, locale, pattern, ap); /* clean up */ - uprv_free(pattern); + if (pattern != patBuffer) { + uprv_free(pattern); + } return written; } @@ -471,18 +480,30 @@ u_sprintf_string_handler(u_localized_string *output, const ufmt_args *args) { UChar *s; + UChar buffer[UFMT_DEFAULT_BUFFER_SIZE]; int32_t len, written; + int32_t argSize; const char *arg = (const char*)(args[0].ptrValue); /* convert from the default codepage to Unicode */ - if (arg) - s = ufmt_defaultCPToUnicode(arg, (int32_t)strlen(arg)); + if (arg) { + argSize = (int32_t)strlen(arg) + 1; + if (argSize >= MAX_UCHAR_BUFFER_SIZE(buffer)) { + s = ufmt_defaultCPToUnicode(arg, argSize, + (UChar *)uprv_malloc(MAX_UCHAR_BUFFER_NEEDED(argSize)), + MAX_UCHAR_BUFFER_NEEDED(argSize)); + if(s == NULL) { + return 0; + } + } + else { + s = ufmt_defaultCPToUnicode(arg, argSize, buffer, + sizeof(buffer)/sizeof(UChar)); + } + } else { s = gNullStr; } - if(s == 0) { - return 0; - } len = u_strlen(s); /* width = minimum # of characters to write */ @@ -499,7 +520,7 @@ u_sprintf_string_handler(u_localized_string *output, } /* clean up */ - if (gNullStr != s) { + if (gNullStr != s && buffer != s) { uprv_free(s); } @@ -732,15 +753,12 @@ u_sprintf_char_handler(u_localized_string *output, const u_sprintf_spec_info *info, const ufmt_args *args) { - UChar *s; + UChar s[UTF_MAX_CHAR_LENGTH+1]; int32_t len, written; unsigned char arg = (unsigned char)(args[0].intValue); /* convert from default codepage to Unicode */ - s = ufmt_defaultCPToUnicode((const char *)&arg, 1); - if(s == 0) { - return 0; - } + ufmt_defaultCPToUnicode((const char *)&arg, 2, s, sizeof(s)/sizeof(UChar)); /* Remember that this may be a surrogate pair */ len = u_strlen(s); @@ -758,9 +776,6 @@ u_sprintf_char_handler(u_localized_string *output, written = u_sprintf_pad_and_justify(output, info, s, len); } - /* clean up */ - uprv_free(s); - return written; } @@ -812,9 +827,9 @@ u_sprintf_scientific_handler(u_localized_string *output, /* clone the stream's bundle if it isn't owned */ if(! output->fOwnBundle) { - output->fBundle = u_locbund_clone(output->fBundle); - output->fOwnBundle = TRUE; - format = u_locbund_getScientificFormat(output->fBundle); + output->fBundle = u_locbund_clone(output->fBundle); + output->fOwnBundle = TRUE; + format = u_locbund_getScientificFormat(output->fBundle); } srcLen = unum_getSymbol(format, @@ -877,11 +892,13 @@ u_sprintf_scientific_handler(u_localized_string *output, unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); - unum_setSymbol(format, + /* Since we clone the fBundle and we're only using the scientific + format, we don't need to save the old exponent value. */ + /*unum_setSymbol(format, UNUM_EXPONENTIAL_SYMBOL, srcExpBuf, srcLen, - &status); + &status);*/ return u_sprintf_pad_and_justify(output, info, result, u_strlen(result)); } @@ -1056,11 +1073,11 @@ u_sprintf_currency_handler(u_localized_string *output, else if(info->fAlt) { /* '#' means always show decimal point */ /* copy of printf behavior on Solaris - '#' shows 6 digits */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 2); } else { - /* # of decimal digits is 6 if precision not specified */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + /* # of decimal digits is 2 if precision not specified, 2 is typical */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 2); } /* set whether to show the sign */ @@ -1153,11 +1170,11 @@ u_sprintf_scidbl_handler(u_localized_string *output, /* call the double handler */ return u_sprintf_double_handler(output, &scidbl_info, args); } - else if(num < 0.0001 + else if(num < 0.0001 || (scidbl_info.fPrecision < 1 && 1000000.0 <= num) || (scidbl_info.fPrecision != -1 && num > uprv_pow10(scidbl_info.fPrecision))) { /* use 'e' or 'E' notation */ - scidbl_info.fSpec = scidbl_info.fSpec - 1; + scidbl_info.fSpec = scidbl_info.fSpec - 2; /* call the scientific handler */ return u_sprintf_scientific_handler(output, &scidbl_info, args); } diff --git a/icu4c/source/extra/ustdio/sscanf.c b/icu4c/source/extra/ustdio/sscanf.c index 0491b88526..834506e28c 100644 --- a/icu4c/source/extra/ustdio/sscanf.c +++ b/icu4c/source/extra/ustdio/sscanf.c @@ -321,19 +321,28 @@ u_vsscanf(const UChar *buffer, { int32_t converted; UChar *pattern; + UChar patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t size = (int32_t)strlen(patternSpecification) + 1; /* convert from the default codepage to Unicode */ - pattern = ufmt_defaultCPToUnicode(patternSpecification, - strlen(patternSpecification)+1); - if(pattern == 0) { - return 0; + if (size >= MAX_UCHAR_BUFFER_SIZE(patBuffer)) { + pattern = (UChar *)uprv_malloc(size * sizeof(UChar)); + if(pattern == 0) { + return 0; + } } + else { + pattern = patBuffer; + } + ufmt_defaultCPToUnicode(patternSpecification, size, pattern, size); /* do the work */ converted = u_vsscanf_u(buffer, locale, pattern, ap); /* clean up */ - uprv_free(pattern); + if (pattern != patBuffer) { + uprv_free(pattern); + } return converted; } diff --git a/icu4c/source/extra/ustdio/ufmt_cmn.c b/icu4c/source/extra/ustdio/ufmt_cmn.c index 4f52a49c43..a74887466b 100644 --- a/icu4c/source/extra/ustdio/ufmt_cmn.c +++ b/icu4c/source/extra/ustdio/ufmt_cmn.c @@ -18,6 +18,7 @@ ****************************************************************************** */ +#include "cstring.h" #include "cmemory.h" #include "ufmt_cmn.h" #include "unicode/uchar.h" @@ -126,24 +127,25 @@ ufmt_utol(const UChar *buffer, } UChar* -ufmt_defaultCPToUnicode(const char *s, - int32_t len) +ufmt_defaultCPToUnicode(const char *s, int32_t sSize, + UChar *target, int32_t tSize) { - int32_t size; - UChar *target, *alias; + UChar *alias; UErrorCode status = U_ZERO_ERROR; UConverter *defConverter = u_getDefaultConverter(&status); if(U_FAILURE(status) || defConverter == 0) return 0; + + if(sSize <= 0) { + sSize = uprv_strlen(s) + 1; + } /* perform the conversion in one swoop */ - size = (len + 1) / ucnv_getMinCharSize(defConverter); - target = (UChar*) uprv_malloc(size * sizeof(UChar)); if(target != 0) { alias = target; - ucnv_toUnicode(defConverter, &alias, alias + size, &s, s + len, + ucnv_toUnicode(defConverter, &alias, alias + tSize, &s, s + sSize - 1, NULL, TRUE, &status); diff --git a/icu4c/source/extra/ustdio/ufmt_cmn.h b/icu4c/source/extra/ustdio/ufmt_cmn.h index 2aa3999b0e..f4b07c122e 100644 --- a/icu4c/source/extra/ustdio/ufmt_cmn.h +++ b/icu4c/source/extra/ustdio/ufmt_cmn.h @@ -22,6 +22,10 @@ #include "unicode/utypes.h" +#define UFMT_DEFAULT_BUFFER_SIZE 64 +#define MAX_UCHAR_BUFFER_SIZE(buffer) (sizeof(buffer)/(UTF_MAX_CHAR_LENGTH*sizeof(UChar))) +#define MAX_UCHAR_BUFFER_NEEDED(strLen) ((strLen+1)*UTF_MAX_CHAR_LENGTH*sizeof(UChar)) + /** * Enum representing the possible argument types for uprintf/uscanf */ @@ -119,13 +123,15 @@ ufmt_utol(const UChar *buffer, /** * Convert a string from the default codepage to Unicode. * @param s The string to convert, in the default codepage. - * @param len The number of characters in s. + * @param sSize The size of s to convert. + * @param target The buffer to convert to. + * @param tSize The size of target * @return A pointer to a newly allocated converted version of s, or 0 * on error. */ UChar* -ufmt_defaultCPToUnicode(const char *s, - int32_t len); +ufmt_defaultCPToUnicode(const char *s, int32_t sSize, + UChar *target, int32_t tSize); /** @@ -139,6 +145,17 @@ char* ufmt_unicodeToDefaultCP(const UChar *s, int32_t len); +/** + * Get the number of fraction digits based on the requested precision. + * This is a shortcoming of the formatting API, which doesn't + * support precision + * @param num The number to look at + * @param precision The requested precision + * @return The fraction digits size to use on the formatting API. + */ +int32_t +ufmt_getFractionDigits(double num, int32_t precision); + #endif diff --git a/icu4c/source/extra/ustdio/uprintf.c b/icu4c/source/extra/ustdio/uprintf.c index 9e301386f8..39c8c2a9c4 100644 --- a/icu4c/source/extra/ustdio/uprintf.c +++ b/icu4c/source/extra/ustdio/uprintf.c @@ -276,19 +276,28 @@ u_vfprintf( UFILE *f, { int32_t count; UChar *pattern; + UChar buffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t size = (int32_t)strlen(patternSpecification) + 1; /* convert from the default codepage to Unicode */ - pattern = ufmt_defaultCPToUnicode(patternSpecification, - (int32_t)strlen(patternSpecification)); - if(pattern == 0) { - return 0; + if (size >= MAX_UCHAR_BUFFER_SIZE(buffer)) { + pattern = (UChar *)uprv_malloc(size * sizeof(UChar)); + if(pattern == 0) { + return 0; + } } + else { + pattern = buffer; + } + ufmt_defaultCPToUnicode(patternSpecification, size, pattern, size); /* do the work */ count = u_vfprintf_u(f, pattern, ap); /* clean up */ - uprv_free(pattern); + if (pattern != buffer) { + uprv_free(pattern); + } return count; } @@ -377,20 +386,31 @@ u_printf_string_handler(UFILE *stream, const u_printf_spec_info *info, const ufmt_args *args) { - UChar *s = NULL; + UChar *s; + UChar buffer[UFMT_DEFAULT_BUFFER_SIZE]; int32_t len, written; + int32_t argSize; const char *arg = (const char*)(args[0].ptrValue); /* convert from the default codepage to Unicode */ if (arg) { - s = ufmt_defaultCPToUnicode(arg, (int32_t)strlen(arg)); + argSize = (int32_t)strlen(arg) + 1; + if (argSize >= MAX_UCHAR_BUFFER_SIZE(buffer)) { + s = ufmt_defaultCPToUnicode(arg, argSize, + (UChar *)uprv_malloc(MAX_UCHAR_BUFFER_NEEDED(argSize)), + MAX_UCHAR_BUFFER_NEEDED(argSize)); + if(s == NULL) { + return 0; + } + } + else { + s = ufmt_defaultCPToUnicode(arg, argSize, buffer, + sizeof(buffer)/sizeof(UChar)); + } } else { s = gNullStr; } - if(s == NULL) { - return 0; - } len = u_strlen(s); /* width = minimum # of characters to write */ @@ -407,7 +427,7 @@ u_printf_string_handler(UFILE *stream, } /* clean up */ - if (gNullStr != s) { + if (gNullStr != s && buffer != s) { uprv_free(s); } @@ -638,15 +658,14 @@ u_printf_char_handler(UFILE *stream, const u_printf_spec_info *info, const ufmt_args *args) { - UChar *s; + UChar s[UTF_MAX_CHAR_LENGTH+1]; int32_t len = 1, written; unsigned char arg = (unsigned char)(args[0].intValue); /* convert from default codepage to Unicode */ - s = ufmt_defaultCPToUnicode((const char *)&arg, 1); - if(s == 0) { - return 0; - } + ufmt_defaultCPToUnicode((const char *)&arg, 2, s, sizeof(s)/sizeof(UChar)); + + /* Remember that this may be a surrogate pair */ if (arg != 0) { len = u_strlen(s); } @@ -664,9 +683,6 @@ u_printf_char_handler(UFILE *stream, written = u_printf_pad_and_justify(stream, info, s, len); } - /* clean up */ - uprv_free(s); - return written; } @@ -720,8 +736,8 @@ u_printf_scientific_handler(UFILE *stream, /* clone the stream's bundle if it isn't owned */ if(! stream->fOwnBundle) { stream->fBundle = u_locbund_clone(stream->fBundle); - stream->fOwnBundle = TRUE; - format = u_locbund_getScientificFormat(stream->fBundle); + stream->fOwnBundle = TRUE; + format = u_locbund_getScientificFormat(stream->fBundle); } srcLen = unum_getSymbol(format, @@ -784,11 +800,13 @@ u_printf_scientific_handler(UFILE *stream, unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); - unum_setSymbol(format, + /* Since we clone the fBundle and we're only using the scientific + format, we don't need to save the old exponent value. */ + /*unum_setSymbol(format, UNUM_EXPONENTIAL_SYMBOL, srcExpBuf, srcLen, - &status); + &status);*/ return u_printf_pad_and_justify(stream, info, result, u_strlen(result)); } @@ -963,11 +981,11 @@ u_printf_currency_handler(UFILE *stream, else if(info->fAlt) { /* '#' means always show decimal point */ /* copy of printf behavior on Solaris - '#' shows 6 digits */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 2); } else { - /* # of decimal digits is 6 if precision not specified */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + /* # of decimal digits is 2 if precision not specified, 2 is typical */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 2); } /* set whether to show the sign */ @@ -1060,11 +1078,11 @@ u_printf_scidbl_handler(UFILE *stream, /* call the double handler */ return u_printf_double_handler(stream, &scidbl_info, args); } - else if(num < 0.0001 + else if(num < 0.0001 || (scidbl_info.fPrecision < 1 && 1000000.0 <= num) || (scidbl_info.fPrecision != -1 && num > uprv_pow10(scidbl_info.fPrecision))) { /* use 'e' or 'E' notation */ - scidbl_info.fSpec = scidbl_info.fSpec - 1; + scidbl_info.fSpec = scidbl_info.fSpec - 2; /* call the scientific handler */ return u_printf_scientific_handler(stream, &scidbl_info, args); } diff --git a/icu4c/source/extra/ustdio/uscanf.c b/icu4c/source/extra/ustdio/uscanf.c index 016b707524..816dffe78f 100644 --- a/icu4c/source/extra/ustdio/uscanf.c +++ b/icu4c/source/extra/ustdio/uscanf.c @@ -318,19 +318,28 @@ u_vfscanf(UFILE *f, { int32_t converted; UChar *pattern; + UChar buffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t size = (int32_t)strlen(patternSpecification) + 1; /* convert from the default codepage to Unicode */ - pattern = ufmt_defaultCPToUnicode(patternSpecification, - strlen(patternSpecification)+1); - if(pattern == 0) { - return 0; + if (size >= MAX_UCHAR_BUFFER_SIZE(buffer)) { + pattern = (UChar *)uprv_malloc(size * sizeof(UChar)); + if(pattern == 0) { + return 0; + } } + else { + pattern = buffer; + } + ufmt_defaultCPToUnicode(patternSpecification, size, pattern, size); /* do the work */ converted = u_vfscanf_u(f, pattern, ap); /* clean up */ - uprv_free(pattern); + if (pattern != buffer) { + uprv_free(pattern); + } return converted; }