From f6a02087565a52d5c2a2ef7652d56a6a06bfa304 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Tue, 15 Jul 2008 23:57:25 +0000 Subject: [PATCH] fix the output length returned from cWC2MB/MB2WC() to be consistent with From/ToWChar(); fix off by NUL-length bug in wxMBConv implementation of From/ToWChar() in terms of legacy WC2MB/MB2WC() (which are still used in wxMBConv_wxwin) that this uncovered; added a simple test for the bugs the old code resulted in which could be easily seen with wxConvISO8859_1 (see #9739) git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54646 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- include/wx/strconv.h | 13 +++++++---- interface/wx/strconv.h | 52 ++++++++++++++++++++---------------------- src/common/strconv.cpp | 45 +++++++++++++++++++++++------------- 3 files changed, 62 insertions(+), 48 deletions(-) diff --git a/include/wx/strconv.h b/include/wx/strconv.h index a4a18e5e43..7f5ee6c2a5 100644 --- a/include/wx/strconv.h +++ b/include/wx/strconv.h @@ -95,15 +95,18 @@ public: // Convenience functions for converting strings which may contain embedded // NULs and don't have to be NUL-terminated. // - // inLen is the length of the buffer including trailing NUL if any: if the - // last 4 bytes of the buffer are all NULs, these functions are more - // efficient as they avoid copying the string, but otherwise a copy is made - // internally which could be quite bad for (very) long strings. + // inLen is the length of the buffer including trailing NUL if any or + // wxNO_LEN if the input is NUL-terminated. // // outLen receives, if not NULL, the length of the converted string or 0 if // the conversion failed (returning 0 and not -1 in this case makes it // difficult to distinguish between failed conversion and empty input but - // this is done for backwards compatibility) + // this is done for backwards compatibility). Notice that the rules for + // whether outLen accounts or not for the last NUL are the same as for + // To/FromWChar() above: if inLen is specified, outLen is exactly the + // number of characters converted, whether the last one of them was NUL or + // not. But if inLen == wxNO_LEN then outLen doesn't account for the last + // NUL even though it is present. const wxWCharBuffer cMB2WC(const char *in, size_t inLen, size_t *outLen) const; const wxCharBuffer diff --git a/interface/wx/strconv.h b/interface/wx/strconv.h index cdda51bc0c..4bb8683042 100644 --- a/interface/wx/strconv.h +++ b/interface/wx/strconv.h @@ -155,26 +155,27 @@ public: const wchar_t* src, size_t srcLen = wxNO_LEN) const; - //@{ /** - Converts from multibyte encoding to Unicode by calling MB2WC() and + Converts from multibyte encoding to Unicode by calling ToWChar() and allocating a temporary wxWCharBuffer to hold the result. - The first overload takes a @c NUL-terminated input string. The second - one takes a string of exactly the specified length and the string may - include or not the trailing @c NUL character(s). If the string is not - @c NUL-terminated, a temporary @c NUL-terminated copy of it suitable - for passing to wxMBConv::MB2WC is made, so it is more efficient to - ensure that the string is does have the appropriate number of @c NUL - bytes (which is usually 1 but may be 2 or 4 for UTF-16 or UTF-32, see - wxMBConv::GetMBNulLen), especially for long strings. + This function is a convenient wrapper around ToWChar() as it takes care + of allocating the buffer of the necessary size itself. Its parameters + have the same meaning as for ToWChar(), in particular @a inLen can be + specified explicitly in which case exactly that many characters are + converted and @a outLen receives (if non-@NULL) exactly the + corresponding number of wide characters, whether the last one of them + is @c NUL or not. However if @c inLen is @c wxNO_LEN, then @c outLen + doesn't count the trailing @c NUL even if it is always present in this + case. - If @a outLen is not-@NULL, it receives the length of the converted - string. + Finally notice that if the conversion fails, the returned buffer is + invalid and @a outLen is set to 0 (and not @c wxCONV_FAILED for + compatibility concerns). */ - const wxWCharBuffer cMB2WC(const char* in) const; - const wxWCharBuffer cMB2WC(const char* in, size_t inLen, size_t *outLen) const; - //@} + const wxWCharBuffer cMB2WC(const char* in, + size_t inLen = wxNO_LEN, + size_t *outLen = NULL) const; //@{ /** @@ -189,22 +190,19 @@ public: const wxWCharBuffer cMB2WX(const char* psz) const; //@} - //@{ /** - Converts from Unicode to multibyte encoding by calling WC2MB and + Converts from Unicode to multibyte encoding by calling FromWChar() and allocating a temporary wxCharBuffer to hold the result. - The second overload of this function allows to convert a string of the - given length @e inLen, whether it is @c NUL-terminated or not (for wide - character strings, unlike for the multibyte ones, a single @c NUL is - always enough). But notice that just as with @ref wxMBConv::mb2wc - cMB2WC, it is more efficient to pass an already terminated string to - this function as otherwise a copy is made internally. If @a outLen is - not-@NULL, it receives the length of the converted string. + This function is a convenient wrapper around FromWChar() as it takes + care of allocating the buffer of necessary size itself. + + Its parameters have the same meaning as the corresponding parameters of + FromWChar(), please see the description of cMB2WC() for more details. */ - const wxCharBuffer cWC2MB(const wchar_t* in) const; - const wxCharBuffer cWC2MB(const wchar_t* in, size_t inLen, size_t *outLen) const; - //@} + const wxCharBuffer cWC2MB(const wchar_t* in, + size_t inLen = wxNO_LEN, + size_t *outLen = NULL) const; //@{ /** diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index b6c3ca3545..54ac2c8489 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -212,11 +212,11 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, if ( lenChunk == wxCONV_FAILED ) return wxCONV_FAILED; - lenChunk++; // for the L'\0' at the end of this chunk - dstWritten += lenChunk; + if ( !srcEnd ) + dstWritten++; - if ( lenChunk == 1 ) + if ( !lenChunk ) { // nothing left in the input string, conversion succeeded break; @@ -227,10 +227,13 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, if ( dstWritten > dstLen ) return wxCONV_FAILED; - if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED ) + // +1 is for trailing NUL + if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED ) return wxCONV_FAILED; dst += lenChunk; + if ( !srcEnd ) + dst++; } if ( !srcEnd ) @@ -269,13 +272,15 @@ wxMBConv::FromWChar(char *dst, size_t dstLen, // the number of chars [which would be] written to dst [if it were not NULL] size_t dstWritten = 0; + // if we don't know its length we have no choice but to assume that it is + // NUL-terminated (notice that it can still be NUL-terminated even if + // explicit length is given but it doesn't change our return value) + const bool isNulTerminated = srcLen == wxNO_LEN; + // make a copy of the input string unless it is already properly // NUL-terminated - // - // if we don't know its length we have no choice but to assume that it is, - // indeed, properly terminated wxWCharBuffer bufTmp; - if ( srcLen == wxNO_LEN ) + if ( isNulTerminated ) { srcLen = wxWcslen(src) + 1; } @@ -298,18 +303,21 @@ wxMBConv::FromWChar(char *dst, size_t dstLen, if ( lenChunk == wxCONV_FAILED ) return wxCONV_FAILED; - lenChunk += lenNul; dstWritten += lenChunk; + if ( isNulTerminated ) + dstWritten += lenNul; if ( dst ) { if ( dstWritten > dstLen ) return wxCONV_FAILED; - if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED ) + if ( WC2MB(dst, src, lenChunk + lenNul) == wxCONV_FAILED ) return wxCONV_FAILED; dst += lenChunk; + if ( isNulTerminated ) + dst += lenNul; } } @@ -391,13 +399,19 @@ wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const // because we want the buffer to always be NUL-terminated, even if the // input isn't (as otherwise the caller has no way to know its length) wxWCharBuffer wbuf(dstLen); - wbuf.data()[dstLen - 1] = L'\0'; + wbuf.data()[dstLen] = L'\0'; if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED ) { if ( outLen ) { *outLen = dstLen; - if ( wbuf[dstLen - 1] == L'\0' ) + + // we also need to handle NUL-terminated input strings + // specially: for them the output is the length of the string + // excluding the trailing NUL, however if we're asked to + // convert a specific number of characters we return the length + // of the resulting output even if it's NUL-terminated + if ( inLen == wxNO_LEN ) (*outLen)--; } @@ -429,11 +443,10 @@ wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const { *outLen = dstLen; - if ( dstLen >= nulLen && - !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) ) + if ( inLen == wxNO_LEN ) { - // in this case the output is NUL-terminated and we're not - // supposed to count NUL + // in this case both input and output are NUL-terminated + // and we're not supposed to count NUL *outLen -= nulLen; } }