From 50e27899137071a7379e0bcca3ced4097d982e5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Slav=C3=ADk?= Date: Fri, 22 Jun 2007 11:10:36 +0000 Subject: [PATCH] normalize printf/scanf format strings correctly on all platforms, while accounting for wxArgNormalizer conversions git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@46612 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- include/wx/strvararg.h | 42 +- src/common/strvararg.cpp | 466 +++++++++++++++++- src/common/wxcrt.cpp | 260 +--------- src/common/wxprintf.cpp | 34 +- tests/formatconverter/formatconvertertest.cpp | 173 ++++--- 5 files changed, 630 insertions(+), 345 deletions(-) diff --git a/include/wx/strvararg.h b/include/wx/strvararg.h index ee87423fa7..8e40bd1ff1 100644 --- a/include/wx/strvararg.h +++ b/include/wx/strvararg.h @@ -47,6 +47,9 @@ class WXDLLIMPEXP_BASE wxString; // * wchar_t* if wxUSE_UNICODE_WCHAR or if wxUSE_UNICODE_UTF8 and the current // locale is not UTF-8 // +// Note that wxFormatString *must* be used for the format parameter of these +// functions, otherwise the implementation won't work correctly. +// // Parameters: // [ there are examples in square brackets showing values of the parameters // for the wxFprintf() wrapper for fprintf() function with the following @@ -111,11 +114,12 @@ class WXDLLIMPEXP_BASE wxString; // wxFormatString // ---------------------------------------------------------------------------- -// This class should be used for format string argument of the functions +// This class must be used for format string argument of the functions // defined using WX_DEFINE_VARARG_FUNC_* macros. It converts the string to // char* or wchar_t* for passing to implementation function efficiently (i.e. // without keeping the converted string in memory for longer than necessary, -// like c_str()) +// like c_str()). It also converts format string to the correct form that +// accounts for string changes done by wxArgNormalizer<> // // Note that this class can _only_ be used for function arguments! class WXDLLIMPEXP_BASE wxFormatString @@ -138,7 +142,13 @@ public: operator const char*() const { return wx_const_cast(wxFormatString*, this)->AsChar(); } private: + // InputAsChar() returns the value converted passed to ctor, only converted + // to char, while AsChar() takes the the string returned by InputAsChar() + // and does format string conversion on it as well (and similarly for + // ..AsWChar() below) + const char* InputAsChar(); const char* AsChar(); + wxCharBuffer m_convertedChar; #endif // !wxUSE_UNICODE_WCHAR #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY @@ -146,7 +156,9 @@ public: operator const wchar_t*() const { return wx_const_cast(wxFormatString*, this)->AsWChar(); } private: + const wchar_t* InputAsWChar(); const wchar_t* AsWChar(); + wxWCharBuffer m_convertedWChar; #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY private: @@ -433,28 +445,38 @@ WX_ARG_NORMALIZER_FORWARD(wxStdWideString, const wxStdWideString&); #if !wxUSE_UTF8_LOCALE_ONLY template<> struct wxArgNormalizerWchar - : public wxArgNormalizerWchar { - wxArgNormalizerWchar(const wxUniChar& s) - : wxArgNormalizerWchar((wxChar)s) {} + wxArgNormalizerWchar(const wxUniChar& s) : m_value(s) {} + + // FIXME-UTF8: use wchar_t once ANSI build is removed + wxChar get() const { return m_value; } + + wxChar m_value; }; #endif // !wxUSE_UTF8_LOCALE_ONLY #if wxUSE_UNICODE_UTF8 template<> struct wxArgNormalizerUtf8 - : public wxArgNormalizerUtf8 { - wxArgNormalizerUtf8(const wxUniChar& s) - // FIXME-UTF8: this is lossy, we need to convert to string, but that - // requires format string update - : wxArgNormalizerUtf8((const char)s) {} + wxArgNormalizerUtf8(const wxUniChar& s) : m_value(s.AsUTF8()) {} + + const wxStringCharType *get() const { return m_value; } + + wxUniChar::Utf8CharBuffer m_value; }; #endif // wxUSE_UNICODE_UTF8 WX_ARG_NORMALIZER_FORWARD(wxUniChar, const wxUniChar&); WX_ARG_NORMALIZER_FORWARD(const wxUniCharRef&, const wxUniChar&); WX_ARG_NORMALIZER_FORWARD(wxUniCharRef, const wxUniChar&); +// convert char/wchar_t to wxUniChar to get output in the right encoding: +WX_ARG_NORMALIZER_FORWARD(char, const wxUniChar&); +WX_ARG_NORMALIZER_FORWARD(const char&, const wxUniChar&); +WX_ARG_NORMALIZER_FORWARD(unsigned char, const wxUniChar&); +WX_ARG_NORMALIZER_FORWARD(const unsigned char&, const wxUniChar&); +WX_ARG_NORMALIZER_FORWARD(wchar_t, const wxUniChar&); +WX_ARG_NORMALIZER_FORWARD(const wchar_t&, const wxUniChar&); #undef WX_ARG_NORMALIZER_FORWARD diff --git a/src/common/strvararg.cpp b/src/common/strvararg.cpp index 36b6af060b..34cac3ca1d 100644 --- a/src/common/strvararg.cpp +++ b/src/common/strvararg.cpp @@ -82,12 +82,454 @@ wxArgNormalizedString::operator wxString() const return GetString(); } +// ---------------------------------------------------------------------------- +// wxFormatConverter: class doing the "%s" and "%c" normalization +// ---------------------------------------------------------------------------- + +/* + There are four problems with wxPrintf() etc. format strings: + + 1) The printf vararg macros convert all forms of strings into + wxStringCharType* representation. This may make the format string + incorrect: for example, if %ls was used together with a wchar_t* + variadic argument, this would no longer work, because the templates + would change wchar_t* argument to wxStringCharType* and %ls would now + be incorrect in e.g. UTF-8 build. We need make sure only one specifier + form is used. + + 2) To complicate matters further, the meaning of %s and %c is different + under Windows and on Unix. The Windows/MS convention is as follows: + + In ANSI mode: + + format specifier results in + ----------------------------------- + %s, %hs, %hS char* + %ls, %S, %lS wchar_t* + + In Unicode mode: + + format specifier results in + ----------------------------------- + %hs, %S, %hS char* + %s, %ls, %lS wchar_t* + + (While on POSIX systems we have %C identical to %lc and %c always means + char (in any mode) while %lc always means wchar_t.) + + In other words, we should _only_ use %s on Windows and %ls on Unix for + wxUSE_UNICODE_WCHAR build. + + 3) To make things even worse, we need two forms in UTF-8 build: one for + passing strings to ANSI functions under UTF-8 locales (this one should + use %s) and one for widechar functions used under non-UTF-8 locales + (this one should use %ls). + + And, of course, the same should be done for %c as well. + + 4) Finally, in UTF-8 build when calling ANSI printf() function, we need to + translate %c to %s, because not every Unicode character can be + represented by a char. + + + wxScanf() family of functions is simpler, because we don't normalize their + variadic arguments and we only have to handle 2) above and only for widechar + versions. +*/ + +template +class wxFormatConverterBase +{ +public: + typedef T CharType; + + wxFormatConverterBase() + { + m_fmtOrig = NULL; + m_fmtLast = NULL; + m_nCopied = 0; + } + + wxCharTypeBuffer Convert(const CharType *format) + { + // this is reset to NULL if we modify the format string + m_fmtOrig = format; + + while ( *format ) + { + if ( CopyFmtChar(*format++) == _T('%') ) + { + // skip any flags + while ( IsFlagChar(*format) ) + CopyFmtChar(*format++); + + // and possible width + if ( *format == _T('*') ) + CopyFmtChar(*format++); + else + SkipDigits(&format); + + // precision? + if ( *format == _T('.') ) + { + CopyFmtChar(*format++); + if ( *format == _T('*') ) + CopyFmtChar(*format++); + else + SkipDigits(&format); + } + + // next we can have a size modifier + SizeModifier size; + + switch ( *format ) + { + case 'h': + size = Size_Short; + format++; + break; + + case 'l': + // "ll" has a different meaning! + if ( format[1] != 'l' ) + { + size = Size_Long; + format++; + break; + } + //else: fall through + + default: + size = Size_Default; + } + + CharType outConv = *format; + SizeModifier outSize = size; + + // and finally we should have the type + switch ( *format ) + { + case _T('S'): + case _T('s'): + // all strings were converted into the same form by + // wxArgNormalizer, this form depends on the context + // in which the value is used (scanf/printf/wprintf): + HandleString(*format, size, outConv, outSize); + break; + + case _T('C'): + case _T('c'): + HandleChar(*format, size, outConv, outSize); + break; + + default: + // nothing special to do + break; + } + + if ( outConv == *format && outSize == size ) // no change + { + if ( size != Size_Default ) + CopyFmtChar(*(format - 1)); + CopyFmtChar(*format); + } + else // something changed + { + switch ( outSize ) + { + case Size_Long: + InsertFmtChar(_T('l')); + break; + + case Size_Short: + InsertFmtChar(_T('h')); + break; + + case Size_Default: + // nothing to do + break; + } + InsertFmtChar(outConv); + } + + format++; + } + } + + // notice that we only translated the string if m_fmtOrig == NULL (as + // set by CopyAllBefore()), otherwise we should simply use the original + // format + if ( m_fmtOrig ) + { + return wxCharTypeBuffer::CreateNonOwned(m_fmtOrig); + } + else + { + // NULL-terminate converted format string: + *m_fmtLast = 0; + return m_fmt; + } + } + + virtual ~wxFormatConverterBase() {} + +protected: + enum SizeModifier + { + Size_Default, + Size_Short, + Size_Long + }; + + // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's' + // respectively), 'size' is the preceding size modifier; the new values of + // conversion and size specifiers must be written to outConv and outSize + virtual void HandleString(CharType conv, SizeModifier size, + CharType& outConv, SizeModifier& outSize) = 0; + + // ditto for %C or %c + virtual void HandleChar(CharType conv, SizeModifier size, + CharType& outConv, SizeModifier& outSize) = 0; + +private: + // copy another character to the translated format: this function does the + // copy if we are translating but doesn't do anything at all if we don't, + // so we don't create the translated format string at all unless we really + // need to (i.e. InsertFmtChar() is called) + CharType CopyFmtChar(CharType ch) + { + if ( !m_fmtOrig ) + { + // we're translating, do copy + *(m_fmtLast++) = ch; + } + else + { + // simply increase the count which should be copied by + // CopyAllBefore() later if needed + m_nCopied++; + } + + return ch; + } + + // insert an extra character + void InsertFmtChar(CharType ch) + { + if ( m_fmtOrig ) + { + // so far we haven't translated anything yet + CopyAllBefore(); + } + + *(m_fmtLast++) = ch; + } + + void CopyAllBefore() + { + wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" ); + + // the modified format string is guaranteed to be no longer than + // 3/2 of the original (worst case: the entire format string consists + // of "%s" repeated and is expanded to "%ls" on Unix), so we can + // allocate the buffer now and not worry about running out of space if + // we over-allocate a bit: + size_t fmtLen = wxStrlen(m_fmtOrig); + // worst case is of even length, so there's no rounding error in *3/2: + m_fmt.extend(fmtLen * 3 / 2); + + if ( m_nCopied > 0 ) + wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied); + m_fmtLast = m_fmt.data() + m_nCopied; + + // we won't need it any longer and resetting it also indicates that we + // modified the format + m_fmtOrig = NULL; + } + + static bool IsFlagChar(CharType ch) + { + return ch == _T('-') || ch == _T('+') || + ch == _T('0') || ch == _T(' ') || ch == _T('#'); + } + + void SkipDigits(const CharType **ptpc) + { + while ( **ptpc >= _T('0') && **ptpc <= _T('9') ) + CopyFmtChar(*(*ptpc)++); + } + + // the translated format + wxCharTypeBuffer m_fmt; + CharType *m_fmtLast; + + // the original format + const CharType *m_fmtOrig; + + // the number of characters already copied (i.e. already parsed, but left + // unmodified) + size_t m_nCopied; +}; + + + +#ifdef __WINDOWS + +// on Windows, we should use %s and %c regardless of the build: +class wxPrintfFormatConverterWchar : public wxFormatConverterBase +{ + virtual void HandleString(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 's'; + outSize = Size_Default; + } + + virtual void HandleChar(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 'c'; + outSize = Size_Default; + } +}; + +#else // !__WINDOWS__ + +// on Unix, it's %s for ANSI functions and %ls for widechar: + +#if !wxUSE_UTF8_LOCALE_ONLY +class wxPrintfFormatConverterWchar : public wxFormatConverterBase +{ + virtual void HandleString(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 's'; + outSize = Size_Long; + } + + virtual void HandleChar(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 'c'; + outSize = Size_Long; + } +}; +#endif // !wxUSE_UTF8_LOCALE_ONLY + +#if wxUSE_UNICODE_UTF8 +class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase +{ + virtual void HandleString(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 's'; + outSize = Size_Default; + } + + virtual void HandleChar(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + // added complication: %c should be translated to %s in UTF-8 build + outConv = 's'; + outSize = Size_Default; + } +}; +#endif // wxUSE_UNICODE_UTF8 + +#endif // __WINDOWS__/!__WINDOWS__ + +#if !wxUSE_UNICODE // FIXME-UTF8: remove +class wxPrintfFormatConverterANSI : public wxFormatConverterBase +{ + virtual void HandleString(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 's'; + outSize = Size_Default; + } + + virtual void HandleChar(CharType WXUNUSED(conv), + SizeModifier WXUNUSED(size), + CharType& outConv, SizeModifier& outSize) + { + outConv = 'c'; + outSize = Size_Default; + } +}; +#endif // ANSI + +#ifndef __WINDOWS__ +/* + + wxScanf() format translation is different, we need to translate %s to %ls + and %c to %lc on Unix (but not Windows and for widechar functions only!). + + So to use native functions in order to get our semantics we must do the + following translations in Unicode mode: + + wxWidgets specifier POSIX specifier + ---------------------------------------- + + %hc, %C, %hC %c + %c %lc + + */ +class wxScanfFormatConverterWchar : public wxFormatConverterBase +{ + virtual void HandleString(CharType conv, SizeModifier size, + CharType& outConv, SizeModifier& outSize) + { + outConv = 's'; + outSize = GetOutSize(conv == 'S', size); + } + + virtual void HandleChar(CharType conv, SizeModifier size, + CharType& outConv, SizeModifier& outSize) + { + outConv = 'c'; + outSize = GetOutSize(conv == 'C', size); + } + + SizeModifier GetOutSize(bool convIsUpper, SizeModifier size) + { + // %S and %hS -> %s and %lS -> %ls + if ( convIsUpper ) + { + if ( size == Size_Long ) + return Size_Long; + else + return Size_Default; + } + else // %s or %c + { + if ( size == Size_Default ) + return Size_Long; + else + return size; + } + } +}; + +const wxWCharBuffer wxScanfConvertFormatW(const wchar_t *format) +{ + return wxScanfFormatConverterWchar().Convert(format); +} +#endif // !__WINDOWS__ + + // ---------------------------------------------------------------------------- // wxFormatString // ---------------------------------------------------------------------------- #if !wxUSE_UNICODE_WCHAR -const char* wxFormatString::AsChar() +const char* wxFormatString::InputAsChar() { if ( m_char ) return m_char.data(); @@ -110,10 +552,22 @@ const char* wxFormatString::AsChar() return m_char.data(); } + +const char* wxFormatString::AsChar() +{ + if ( !m_convertedChar ) +#if !wxUSE_UNICODE // FIXME-UTF8: remove this + m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar()); +#else + m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar()); +#endif + + return m_convertedChar.data(); +} #endif // !wxUSE_UNICODE_WCHAR #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY -const wchar_t* wxFormatString::AsWChar() +const wchar_t* wxFormatString::InputAsWChar() { if ( m_wchar ) return m_wchar.data(); @@ -144,4 +598,12 @@ const wchar_t* wxFormatString::AsWChar() return m_wchar.data(); } + +const wchar_t* wxFormatString::AsWChar() +{ + if ( !m_convertedWChar ) + m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar()); + + return m_convertedWChar.data(); +} #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY diff --git a/src/common/wxcrt.cpp b/src/common/wxcrt.cpp index c36513f84c..27285316f9 100644 --- a/src/common/wxcrt.cpp +++ b/src/common/wxcrt.cpp @@ -261,7 +261,7 @@ static int vfwscanf(FILE *stream, const wchar_t *format, va_list argptr) return -1; } -#define vswprintf wxCRT_VsnprintfW_ +#define vswprintf wxCRT_VsnprintfW static int vfwprintf(FILE *stream, const wchar_t *format, va_list argptr) { @@ -285,223 +285,6 @@ static int vwprintf(const wchar_t *format, va_list argptr) #endif // wxNEED_WPRINTF -#ifdef wxNEED_PRINTF_CONVERSION - -// ---------------------------------------------------------------------------- -// wxFormatConverter: class doing the "%s" -> "%ls" conversion -// ---------------------------------------------------------------------------- - -/* - Here are the gory details. We want to follow the Windows/MS conventions, - that is to have - - In ANSI mode: - - format specifier results in - ----------------------------------- - %c, %hc, %hC char - %lc, %C, %lC wchar_t - - In Unicode mode: - - format specifier results in - ----------------------------------- - %hc, %C, %hC char - %c, %lc, %lC wchar_t - - - while on POSIX systems we have %C identical to %lc and %c always means char - (in any mode) while %lc always means wchar_t, - - So to use native functions in order to get our semantics we must do the - following translations in Unicode mode (nothing to do in ANSI mode): - - wxWidgets specifier POSIX specifier - ---------------------------------------- - - %hc, %C, %hC %c - %c %lc - - - And, of course, the same should be done for %s as well. -*/ - -class wxFormatConverter -{ -public: - wxFormatConverter(const wchar_t *format); - - // notice that we only translated the string if m_fmtOrig == NULL (as set - // by CopyAllBefore()), otherwise we should simply use the original format - operator const wchar_t *() const - { return m_fmtOrig ? m_fmtOrig : m_fmt.c_str(); } - -private: - // copy another character to the translated format: this function does the - // copy if we are translating but doesn't do anything at all if we don't, - // so we don't create the translated format string at all unless we really - // need to (i.e. InsertFmtChar() is called) - wchar_t CopyFmtChar(wchar_t ch) - { - if ( !m_fmtOrig ) - { - // we're translating, do copy - m_fmt += ch; - } - else - { - // simply increase the count which should be copied by - // CopyAllBefore() later if needed - m_nCopied++; - } - - return ch; - } - - // insert an extra character - void InsertFmtChar(wchar_t ch) - { - if ( m_fmtOrig ) - { - // so far we haven't translated anything yet - CopyAllBefore(); - } - - m_fmt += ch; - } - - void CopyAllBefore() - { - wxASSERT_MSG( m_fmtOrig && m_fmt.empty(), _T("logic error") ); - - m_fmt = wxString(m_fmtOrig, m_nCopied); - - // we won't need it any longer - m_fmtOrig = NULL; - } - - static bool IsFlagChar(wchar_t ch) - { - return ch == _T('-') || ch == _T('+') || - ch == _T('0') || ch == _T(' ') || ch == _T('#'); - } - - void SkipDigits(const wchar_t **ptpc) - { - while ( **ptpc >= _T('0') && **ptpc <= _T('9') ) - CopyFmtChar(*(*ptpc)++); - } - - // the translated format - wxString m_fmt; - - // the original format - const wchar_t *m_fmtOrig; - - // the number of characters already copied - size_t m_nCopied; -}; - -wxFormatConverter::wxFormatConverter(const wchar_t *format) -{ - m_fmtOrig = format; - m_nCopied = 0; - - while ( *format ) - { - if ( CopyFmtChar(*format++) == _T('%') ) - { - // skip any flags - while ( IsFlagChar(*format) ) - CopyFmtChar(*format++); - - // and possible width - if ( *format == _T('*') ) - CopyFmtChar(*format++); - else - SkipDigits(&format); - - // precision? - if ( *format == _T('.') ) - { - CopyFmtChar(*format++); - if ( *format == _T('*') ) - CopyFmtChar(*format++); - else - SkipDigits(&format); - } - - // next we can have a size modifier - enum - { - Default, - Short, - Long - } size; - - switch ( *format ) - { - case _T('h'): - size = Short; - format++; - break; - - case _T('l'): - // "ll" has a different meaning! - if ( format[1] != _T('l') ) - { - size = Long; - format++; - break; - } - //else: fall through - - default: - size = Default; - } - - // and finally we should have the type - switch ( *format ) - { - case _T('C'): - case _T('S'): - // %C and %hC -> %c and %lC -> %lc - if ( size == Long ) - CopyFmtChar(_T('l')); - - InsertFmtChar(*format++ == _T('C') ? _T('c') : _T('s')); - break; - - case _T('c'): - case _T('s'): - // %c -> %lc but %hc stays %hc and %lc is still %lc - if ( size == Default) - InsertFmtChar(_T('l')); - // fall through - - default: - // nothing special to do - if ( size != Default ) - CopyFmtChar(*(format - 1)); - CopyFmtChar(*format++); - } - } - } -} - -#else // !wxNEED_PRINTF_CONVERSION - // no conversion necessary - #define wxFormatConverter(x) (x) -#endif // wxNEED_PRINTF_CONVERSION/!wxNEED_PRINTF_CONVERSION - -#ifdef __WXDEBUG__ -// For testing the format converter -wxString wxConvertFormat(const wchar_t *format) -{ - return wxString(wxFormatConverter(format)); -} -#endif - // ---------------------------------------------------------------------------- // wxPrintf(), wxScanf() and relatives // ---------------------------------------------------------------------------- @@ -517,7 +300,7 @@ int wxCRT_PrintfW( const wchar_t *format, ... ) va_list argptr; va_start(argptr, format); - int ret = vwprintf( wxFormatConverter(format), argptr ); + int ret = vwprintf( format, argptr ); va_end(argptr); @@ -531,7 +314,7 @@ int wxCRT_FprintfW( FILE *stream, const wchar_t *format, ... ) va_list argptr; va_start( argptr, format ); - int ret = vfwprintf( stream, wxFormatConverter(format), argptr ); + int ret = vfwprintf( stream, format, argptr ); va_end(argptr); @@ -542,29 +325,22 @@ int wxCRT_FprintfW( FILE *stream, const wchar_t *format, ... ) #ifndef wxCRT_VfprintfW int wxCRT_VfprintfW( FILE *stream, const wchar_t *format, va_list argptr ) { - return vfwprintf( stream, wxFormatConverter(format), argptr ); + return vfwprintf( stream, format, argptr ); } #endif #ifndef wxCRT_VprintfW int wxCRT_VprintfW( const wchar_t *format, va_list argptr ) { - return vwprintf( wxFormatConverter(format), argptr ); + return vwprintf( format, argptr ); } #endif -#ifndef wxCRT_VsnprintfW -int wxCRT_VsnprintfW(wchar_t *str, size_t size, const wchar_t *format, va_list argptr ) -{ - return vswprintf( str, size, wxFormatConverter(format), argptr ); -} -#endif // !wxCRT_VsnprintfW - #ifndef wxCRT_VsprintfW int wxCRT_VsprintfW( wchar_t *str, const wchar_t *format, va_list argptr ) { // same as for wxSprintf() - return vswprintf(str, INT_MAX / 4, wxFormatConverter(format), argptr); + return vswprintf(str, INT_MAX / 4, format, argptr); } #endif @@ -576,12 +352,12 @@ int wxCRT_ScanfW(const wchar_t *format, ...) #ifdef __VMS #if (__DECCXX_VER >= 70100000) && !defined(__STD_CFRONT) && !defined( __NONAMESPACE_STD ) - int ret = std::vwscanf(wxFormatConverter(format), argptr); + int ret = std::vwscanf(format, argptr); #else - int ret = vwscanf(wxFormatConverter(format), argptr); + int ret = vwscanf(format, argptr); #endif #else - int ret = vwscanf(wxFormatConverter(format), argptr); + int ret = vwscanf(format, argptr); #endif va_end(argptr); @@ -598,12 +374,12 @@ int wxCRT_SscanfW(const wchar_t *str, const wchar_t *format, ...) #ifdef __VMS #if (__DECCXX_VER >= 70100000) && !defined(__STD_CFRONT) && !defined( __NONAMESPACE_STD ) - int ret = std::vswscanf(str, wxFormatConverter(format), argptr); + int ret = std::vswscanf(str, format, argptr); #else - int ret = vswscanf(str, wxFormatConverter(format), argptr); + int ret = vswscanf(str, format, argptr); #endif #else - int ret = vswscanf(str, wxFormatConverter(format), argptr); + int ret = vswscanf(str, format, argptr); #endif va_end(argptr); @@ -619,12 +395,12 @@ int wxCRT_FscanfW(FILE *stream, const wchar_t *format, ...) va_start(argptr, format); #ifdef __VMS #if (__DECCXX_VER >= 70100000) && !defined(__STD_CFRONT) && !defined( __NONAMESPACE_STD ) - int ret = std::vfwscanf(stream, wxFormatConverter(format), argptr); + int ret = std::vfwscanf(stream, format, argptr); #else - int ret = vfwscanf(stream, wxFormatConverter(format), argptr); + int ret = vfwscanf(stream, format, argptr); #endif #else - int ret = vfwscanf(stream, wxFormatConverter(format), argptr); + int ret = vfwscanf(stream, format, argptr); #endif va_end(argptr); @@ -638,12 +414,12 @@ int wxCRT_VsscanfW(const wchar_t *str, const wchar_t *format, va_list argptr) { #ifdef __VMS #if (__DECCXX_VER >= 70100000) && !defined(__STD_CFRONT) && !defined( __NONAMESPACE_STD ) - return std::vswscanf(str, wxFormatConverter(format), argptr); + return std::vswscanf(str, format, argptr); #else - return vswscanf(str, wxFormatConverter(format), argptr); + return vswscanf(str, format, argptr); #endif #else - return vswscanf(str, wxFormatConverter(format), argptr); + return vswscanf(str, format, argptr); #endif } #endif diff --git a/src/common/wxprintf.cpp b/src/common/wxprintf.cpp index 2c5a6512f8..ee415e8bac 100644 --- a/src/common/wxprintf.cpp +++ b/src/common/wxprintf.cpp @@ -44,7 +44,7 @@ using namespace std ; // special test mode: define all functions below even if we don't really need // them to be able to test them #ifdef wxTEST_PRINTF - #undef wxCRT_VsnprintfW_ + #undef wxCRT_VsnprintfW #undef wxCRT_VsnprintfA #endif @@ -58,9 +58,9 @@ using namespace std ; // common code for both ANSI and Unicode versions // ---------------------------------------------------------------------------- -#if !defined(wxCRT_VsnprintfW_) || !defined(wxCRT_VsnprintfA) +#if !defined(wxCRT_VsnprintfW) || !defined(wxCRT_VsnprintfA) -// wxUSE_STRUTILS says our wxCRT_VsnprintfW_ implementation to use or not to +// wxUSE_STRUTILS says our wxCRT_VsnprintfW implementation to use or not to // use wxStrlen and wxStrncpy functions over one-char processing loops. // // Some benchmarking revealed that wxUSE_STRUTILS == 1 has the following @@ -103,7 +103,7 @@ using namespace std ; namespace { -// the conversion specifiers accepted by wxCRT_VsnprintfW_ +// the conversion specifiers accepted by wxCRT_VsnprintfW enum wxPrintfArgType { wxPAT_INVALID = -1, @@ -130,7 +130,7 @@ enum wxPrintfArgType { wxPAT_NLONGINT // %ln }; -// an argument passed to wxCRT_VsnprintfW_ +// an argument passed to wxCRT_VsnprintfW typedef union { int pad_int; // %d, %i, %o, %u, %x, %X long int pad_longint; // %ld, etc @@ -172,7 +172,7 @@ template<> struct wxPrintfStringHelper // Contains parsed data relative to a conversion specifier given to -// wxCRT_VsnprintfW_ and parsed from the format string +// wxCRT_VsnprintfW and parsed from the format string // NOTE: in C++ there is almost no difference between struct & classes thus // there is no performance gain by using a struct here... template @@ -217,7 +217,7 @@ public: public: // we don't declare this as a constructor otherwise it would be called - // automatically and we don't want this: to be optimized, wxCRT_VsnprintfW_ + // automatically and we don't want this: to be optimized, wxCRT_VsnprintfW // calls this function only on really-used instances of this class. void Init(); @@ -868,7 +868,7 @@ static int wxDoVsnprintf(CharType *buf, size_t lenMax, // useful for debugging, to understand if we are really using this function // rather than the system implementation #if 0 - wprintf(L"Using wxCRT_VsnprintfW_\n"); + wprintf(L"Using wxCRT_VsnprintfW\n"); #endif // required memory: @@ -1023,31 +1023,31 @@ static int wxDoVsnprintf(CharType *buf, size_t lenMax, } // anonymous namespace -#endif // !defined(wxCRT_VsnprintfW_) || !defined(wxCRT_VsnprintfA) +#endif // !defined(wxCRT_VsnprintfW) || !defined(wxCRT_VsnprintfA) // ---------------------------------------------------------------------------- -// wxCRT_VsnprintfW_ +// wxCRT_VsnprintfW // ---------------------------------------------------------------------------- -#if !defined(wxCRT_VsnprintfW_) +#if !defined(wxCRT_VsnprintfW) #if !wxUSE_WXVSNPRINTFW - #error "wxUSE_WXVSNPRINTFW must be 1 if our wxCRT_VsnprintfW_ is used" + #error "wxUSE_WXVSNPRINTFW must be 1 if our wxCRT_VsnprintfW is used" #endif -int wxCRT_VsnprintfW_(wchar_t *buf, size_t len, - const wchar_t *format, va_list argptr) +int wxCRT_VsnprintfW(wchar_t *buf, size_t len, + const wchar_t *format, va_list argptr) { return wxDoVsnprintf(buf, len, format, argptr); } -#else // wxCRT_VsnprintfW_ is defined +#else // wxCRT_VsnprintfW is defined #if wxUSE_WXVSNPRINTFW - #error "wxUSE_WXVSNPRINTFW must be 0 if our wxCRT_VsnprintfW_ is not used" + #error "wxUSE_WXVSNPRINTFW must be 0 if our wxCRT_VsnprintfW is not used" #endif -#endif // !wxCRT_VsnprintfW_ +#endif // !wxCRT_VsnprintfW // ---------------------------------------------------------------------------- // wxCRT_VsnprintfA diff --git a/tests/formatconverter/formatconvertertest.cpp b/tests/formatconverter/formatconvertertest.cpp index b7bcff8873..ec629c59ef 100644 --- a/tests/formatconverter/formatconvertertest.cpp +++ b/tests/formatconverter/formatconvertertest.cpp @@ -47,12 +47,6 @@ #include "wx/wx.h" #endif -// wxFormatConverter can only be tested in a Unicode non-Windows debug build -// -#if defined(wxNEED_PRINTF_CONVERSION) && defined(__WXDEBUG__) -#define CAN_TEST -extern wxString wxConvertFormat(const wxChar *format); -#endif using CppUnit::TestCase; using std::string; @@ -76,7 +70,6 @@ class FormatConverterTestCase : public TestCase CPPUNIT_TEST(format_c); CPPUNIT_TEST(format_hc); CPPUNIT_TEST(format_lc); -#ifdef CAN_TEST CPPUNIT_TEST(format_S); CPPUNIT_TEST(format_hS); CPPUNIT_TEST(format_lS); @@ -84,7 +77,6 @@ class FormatConverterTestCase : public TestCase CPPUNIT_TEST(format_hC); CPPUNIT_TEST(format_lC); CPPUNIT_TEST(testLonger); -#endif CPPUNIT_TEST_SUITE_END(); void format_d(); @@ -97,7 +89,6 @@ class FormatConverterTestCase : public TestCase void format_hc(); void format_lc(); -#ifdef CAN_TEST void format_S(); void format_hS(); void format_lS(); @@ -106,16 +97,19 @@ class FormatConverterTestCase : public TestCase void format_lC(); void testLonger(); - void doTest(const wxChar *input, const wxChar *expected); - void check(const wxString& input, const wxString& expected); -#endif + void doTest(const char *input, const char *expectedScanf, + const char *expectedUtf8, + const char *expectedWcharUnix, + const char *expectedWcharWindows); + void check(const wxString& input, const wxString& expectedScanf, + const wxString& expectedUtf8, + const wxString& expectedWcharUnix, + const wxString& expectedWcharWindows); }; void FormatConverterTestCase::format_d() { -#ifdef CAN_TEST - doTest(_T("d"), _T("d")); -#endif + doTest("d", "d", "d", "d", "d"); CPPUNIT_ASSERT(wxString::Format(_T("%d"), 255) == _T("255")); CPPUNIT_ASSERT(wxString::Format(_T("%05d"), 255) == _T("00255")); CPPUNIT_ASSERT(wxString::Format(_T("% 5d"), 255) == _T(" 255")); @@ -127,27 +121,21 @@ void FormatConverterTestCase::format_d() void FormatConverterTestCase::format_hd() { -#ifdef CAN_TEST - doTest(_T("hd"), _T("hd")); -#endif + doTest("hd", "hd", "hd", "hd", "hd"); short s = 32767; CPPUNIT_ASSERT(wxString::Format(_T("%hd"), s) == _T("32767")); } void FormatConverterTestCase::format_ld() { -#ifdef CAN_TEST - doTest(_T("ld"), _T("ld")); -#endif + doTest("ld", "ld", "ld", "ld", "ld"); long l = 2147483647L; CPPUNIT_ASSERT(wxString::Format(_T("%ld"), l) == _T("2147483647")); } void FormatConverterTestCase::format_s() { -#ifdef CAN_TEST - doTest(_T("s"), _T("ls")); -#endif + doTest("s", "ls", "s", "ls", "s"); CPPUNIT_ASSERT(wxString::Format(_T("%s!"), _T("test")) == _T("test!")); CPPUNIT_ASSERT(wxString::Format(_T("%6s!"), _T("test")) == _T(" test!")); CPPUNIT_ASSERT(wxString::Format(_T("%-6s!"), _T("test")) == _T("test !")); @@ -157,9 +145,7 @@ void FormatConverterTestCase::format_s() void FormatConverterTestCase::format_hs() { -#ifdef CAN_TEST - doTest(_T("hs"), _T("hs")); -#endif + doTest("hs", "hs", "s", "ls", "s"); CPPUNIT_ASSERT(wxString::Format(wxString(_T("%hs!")), "test") == _T("test!")); CPPUNIT_ASSERT(wxString::Format(wxString(_T("%6hs!")), "test") == _T(" test!")); CPPUNIT_ASSERT(wxString::Format(wxString(_T("%-6hs!")), "test") == _T("test !")); @@ -169,9 +155,7 @@ void FormatConverterTestCase::format_hs() void FormatConverterTestCase::format_ls() { -#ifdef CAN_TEST - doTest(_T("ls"), _T("ls")); -#endif + doTest("ls", "ls", "s", "ls", "s"); CPPUNIT_ASSERT(wxString::Format(_T("%ls!"), L"test") == _T("test!")); CPPUNIT_ASSERT(wxString::Format(_T("%6ls!"), L"test") == _T(" test!")); CPPUNIT_ASSERT(wxString::Format(_T("%-6ls!"), L"test") == _T("test !")); @@ -181,9 +165,7 @@ void FormatConverterTestCase::format_ls() void FormatConverterTestCase::format_c() { -#ifdef CAN_TEST - doTest(_T("c"), _T("lc")); -#endif + doTest("c", "lc", "s", "lc", "c"); CPPUNIT_ASSERT(wxString::Format(_T("%c"), _T('x')) == _T("x")); CPPUNIT_ASSERT(wxString::Format(_T("%2c"), _T('x')) == _T(" x")); CPPUNIT_ASSERT(wxString::Format(_T("%-2c"), _T('x')) == _T("x ")); @@ -191,9 +173,7 @@ void FormatConverterTestCase::format_c() void FormatConverterTestCase::format_hc() { -#ifdef CAN_TEST - doTest(_T("hc"), _T("hc")); -#endif + doTest("hc", "hc", "s", "lc", "c"); CPPUNIT_ASSERT(wxString::Format(wxString(_T("%hc")), 'x') == _T("x")); CPPUNIT_ASSERT(wxString::Format(wxString(_T("%2hc")), 'x') == _T(" x")); CPPUNIT_ASSERT(wxString::Format(wxString(_T("%-2hc")), 'x') == _T("x ")); @@ -201,23 +181,26 @@ void FormatConverterTestCase::format_hc() void FormatConverterTestCase::format_lc() { -#ifdef CAN_TEST - doTest(_T("lc"), _T("lc")); -#endif + doTest("lc", "lc", "s", "lc", "c"); CPPUNIT_ASSERT(wxString::Format(_T("%lc"), L'x') == _T("x")); CPPUNIT_ASSERT(wxString::Format(_T("%2lc"), L'x') == _T(" x")); CPPUNIT_ASSERT(wxString::Format(_T("%-2lc"), L'x') == _T("x ")); } -#ifdef CAN_TEST -void FormatConverterTestCase::format_S() { doTest(_T("S"), _T("s")); } -void FormatConverterTestCase::format_hS() { doTest(_T("hS"), _T("s")); } -void FormatConverterTestCase::format_lS() { doTest(_T("lS"), _T("ls")); } +void FormatConverterTestCase::format_S() + { doTest("S", "s", "s", "ls", "s"); } +void FormatConverterTestCase::format_hS() + { doTest("hS", "s", "s", "ls", "s"); } +void FormatConverterTestCase::format_lS() + { doTest("lS", "ls", "s", "ls", "s"); } -void FormatConverterTestCase::format_C() { doTest(_T("C"), _T("c")); } -void FormatConverterTestCase::format_hC() { doTest(_T("hC"), _T("c")); } -void FormatConverterTestCase::format_lC() { doTest(_T("lC"), _T("lc")); } +void FormatConverterTestCase::format_C() + { doTest("C", "c", "s", "lc", "c"); } +void FormatConverterTestCase::format_hC() + { doTest("hC", "c", "s", "lc", "c"); } +void FormatConverterTestCase::format_lC() + { doTest("lC", "lc", "s", "lc", "c"); } // It's possible that although a format converts correctly alone, it leaves // the converter in a bad state that will affect subsequent formats, so @@ -226,42 +209,49 @@ void FormatConverterTestCase::format_lC() { doTest(_T("lC"), _T("lc")); } void FormatConverterTestCase::testLonger() { struct { - const wxChar *input; - const wxChar *expected; + const char *input; + const char *expectedScanf; + const char *expectedWcharUnix; + const char *expectedWcharWindows; + const char *expectedUtf8; } formats[] = { - { _T("%d"), _T("%d"), }, - { _T("%*hd"), _T("%*hd") }, - { _T("%.4ld"), _T("%.4ld") }, - { _T("%-.*s"), _T("%-.*ls") }, - { _T("%.*hs"), _T("%.*hs"), }, - { _T("%-.9ls"), _T("%-.9ls") }, - { _T("%-*c"), _T("%-*lc") }, - { _T("%3hc"), _T("%3hc") }, - { _T("%-5lc"), _T("%-5lc") } + { "%d", "%d", "%d", "%d", "%d" }, + { "%*hd", "%*hd", "%*hd", "%*hd", "%*hd" }, + { "%.4ld", "%.4ld", "%.4ld", "%.4ld", "%.4ld" }, + { "%-.*s", "%-.*ls", "%-.*ls", "%-.*s", "%-.*s" }, + { "%.*hs", "%.*hs", "%.*ls", "%.*s", "%.*s" }, + { "%-.9ls", "%-.9ls", "%-.9ls", "%-.9s", "%-.9s" }, + { "%-*c", "%-*lc", "%-*lc", "%-*c", "%-*s" }, + { "%3hc", "%3hc", "%3lc", "%3c", "%3s" }, + { "%-5lc", "%-5lc", "%-5lc", "%-5c", "%-5s" } }; size_t i, j; - // exclude patterns that don't translate correctly alone from the test - for (i = 0; i < WXSIZEOF(formats); i++) - if (wxConvertFormat(formats[i].input) != formats[i].expected) - formats[i].input = NULL; - // test all possible pairs of the above patterns for (i = 0; i < WXSIZEOF(formats); i++) { if (formats[i].input) { wxString input(formats[i].input); - wxString expected(formats[i].expected); + wxString expectedScanf(formats[i].expectedScanf); + wxString expectedUtf8(formats[i].expectedUtf8); + wxString expectedWcharUnix(formats[i].expectedWcharUnix); + wxString expectedWcharWindows(formats[i].expectedWcharWindows); for (j = 0; j < WXSIZEOF(formats); j++) if (formats[j].input) check(input + formats[j].input, - expected + formats[j].expected); + expectedScanf + formats[j].expectedScanf, + expectedUtf8 + formats[j].expectedUtf8, + expectedWcharUnix + formats[j].expectedWcharUnix, + expectedWcharWindows + formats[j].expectedWcharWindows); } } } -void FormatConverterTestCase::doTest(const wxChar *input, - const wxChar *expected) +void FormatConverterTestCase::doTest(const char *input, + const char *expectedScanf, + const char *expectedUtf8, + const char *expectedWcharUnix, + const char *expectedWcharWindows) { static const wxChar *flag_width[] = { _T(""), _T("*"), _T("10"), _T("-*"), _T("-10"), NULL }; @@ -280,20 +270,55 @@ void FormatConverterTestCase::doTest(const wxChar *input, for (const wxChar **prec = precs; *prec; prec++) for (const wxChar **width = flag_width; *width; width++) check(fmt + *width + *prec + input, - fmt + *width + *prec + expected); + fmt + *width + *prec + expectedScanf, + fmt + *width + *prec + expectedUtf8, + fmt + *width + *prec + expectedWcharUnix, + fmt + *width + *prec + expectedWcharWindows); } void FormatConverterTestCase::check(const wxString& input, - const wxString& expected) + const wxString& expectedScanf, + const wxString& expectedUtf8, + const wxString& expectedWcharUnix, + const wxString& expectedWcharWindows) { - wxString result = wxConvertFormat(input.wc_str()); - wxString msg = _T("input: '") + input + - _T("', result: '") + result + - _T("', expected: '") + expected + _T("'"); - CPPUNIT_ASSERT_MESSAGE(string(msg.mb_str()), result == expected); + wxString result, msg; + +#ifndef __WINDOWS__ + // on windows, wxScanf() string needs no modifications + result = wxScanfConvertFormatW(input.wc_str()); + + msg = _T("input: '") + input + + _T("', result (scanf): '") + result + + _T("', expected: '") + expectedScanf + _T("'"); + CPPUNIT_ASSERT_MESSAGE(string(msg.mb_str()), result == expectedScanf); +#endif // !__WINDOWS__ + +#if wxUSE_UNICODE_UTF8 + result = (const char*)wxFormatString(input); + + msg = _T("input: '") + input + + _T("', result (UTF-8): '") + result + + _T("', expected: '") + expectedUtf8 + _T("'"); + CPPUNIT_ASSERT_MESSAGE(string(msg.mb_str()), result == expectedUtf8); +#endif // wxUSE_UNICODE_UTF8 + +#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY + result = (const wchar_t*)wxFormatString(input); + +#ifdef __WINDOWS__ + wxString expectedWchar(expectedWcharWindows); +#else + wxString expectedWchar(expectedWcharUnix); +#endif + + msg = _T("input: '") + input + + _T("', result (wchar_t): '") + result + + _T("', expected: '") + expectedWchar + _T("'"); + CPPUNIT_ASSERT_MESSAGE(string(msg.mb_str()), result == expectedWchar); +#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY } -#endif // CAN_TEST // register in the unnamed registry so that these tests are run by default CPPUNIT_TEST_SUITE_REGISTRATION(FormatConverterTestCase);