fix the output length returned from cWC2MB/MB2WC() to be consistent with From/ToWChar(); fix off by NUL-length bug in wxMBConv implementation of From/ToWChar() in terms of legacy WC2MB/MB2WC() (which are still used in wxMBConv_wxwin) that this uncovered; added a simple test for the bugs the old code resulted in which could be easily seen with wxConvISO8859_1 (see #9739)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54646 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin 2008-07-15 23:57:25 +00:00
parent 9ca15f291c
commit f6a0208756
3 changed files with 62 additions and 48 deletions

View File

@ -95,15 +95,18 @@ public:
// Convenience functions for converting strings which may contain embedded // Convenience functions for converting strings which may contain embedded
// NULs and don't have to be NUL-terminated. // NULs and don't have to be NUL-terminated.
// //
// inLen is the length of the buffer including trailing NUL if any: if the // inLen is the length of the buffer including trailing NUL if any or
// last 4 bytes of the buffer are all NULs, these functions are more // wxNO_LEN if the input is NUL-terminated.
// efficient as they avoid copying the string, but otherwise a copy is made
// internally which could be quite bad for (very) long strings.
// //
// outLen receives, if not NULL, the length of the converted string or 0 if // outLen receives, if not NULL, the length of the converted string or 0 if
// the conversion failed (returning 0 and not -1 in this case makes it // the conversion failed (returning 0 and not -1 in this case makes it
// difficult to distinguish between failed conversion and empty input but // difficult to distinguish between failed conversion and empty input but
// this is done for backwards compatibility) // this is done for backwards compatibility). Notice that the rules for
// whether outLen accounts or not for the last NUL are the same as for
// To/FromWChar() above: if inLen is specified, outLen is exactly the
// number of characters converted, whether the last one of them was NUL or
// not. But if inLen == wxNO_LEN then outLen doesn't account for the last
// NUL even though it is present.
const wxWCharBuffer const wxWCharBuffer
cMB2WC(const char *in, size_t inLen, size_t *outLen) const; cMB2WC(const char *in, size_t inLen, size_t *outLen) const;
const wxCharBuffer const wxCharBuffer

View File

@ -155,26 +155,27 @@ public:
const wchar_t* src, const wchar_t* src,
size_t srcLen = wxNO_LEN) const; size_t srcLen = wxNO_LEN) const;
//@{
/** /**
Converts from multibyte encoding to Unicode by calling MB2WC() and Converts from multibyte encoding to Unicode by calling ToWChar() and
allocating a temporary wxWCharBuffer to hold the result. allocating a temporary wxWCharBuffer to hold the result.
The first overload takes a @c NUL-terminated input string. The second This function is a convenient wrapper around ToWChar() as it takes care
one takes a string of exactly the specified length and the string may of allocating the buffer of the necessary size itself. Its parameters
include or not the trailing @c NUL character(s). If the string is not have the same meaning as for ToWChar(), in particular @a inLen can be
@c NUL-terminated, a temporary @c NUL-terminated copy of it suitable specified explicitly in which case exactly that many characters are
for passing to wxMBConv::MB2WC is made, so it is more efficient to converted and @a outLen receives (if non-@NULL) exactly the
ensure that the string is does have the appropriate number of @c NUL corresponding number of wide characters, whether the last one of them
bytes (which is usually 1 but may be 2 or 4 for UTF-16 or UTF-32, see is @c NUL or not. However if @c inLen is @c wxNO_LEN, then @c outLen
wxMBConv::GetMBNulLen), especially for long strings. doesn't count the trailing @c NUL even if it is always present in this
case.
If @a outLen is not-@NULL, it receives the length of the converted Finally notice that if the conversion fails, the returned buffer is
string. invalid and @a outLen is set to 0 (and not @c wxCONV_FAILED for
compatibility concerns).
*/ */
const wxWCharBuffer cMB2WC(const char* in) const; const wxWCharBuffer cMB2WC(const char* in,
const wxWCharBuffer cMB2WC(const char* in, size_t inLen, size_t *outLen) const; size_t inLen = wxNO_LEN,
//@} size_t *outLen = NULL) const;
//@{ //@{
/** /**
@ -189,22 +190,19 @@ public:
const wxWCharBuffer cMB2WX(const char* psz) const; const wxWCharBuffer cMB2WX(const char* psz) const;
//@} //@}
//@{
/** /**
Converts from Unicode to multibyte encoding by calling WC2MB and Converts from Unicode to multibyte encoding by calling FromWChar() and
allocating a temporary wxCharBuffer to hold the result. allocating a temporary wxCharBuffer to hold the result.
The second overload of this function allows to convert a string of the This function is a convenient wrapper around FromWChar() as it takes
given length @e inLen, whether it is @c NUL-terminated or not (for wide care of allocating the buffer of necessary size itself.
character strings, unlike for the multibyte ones, a single @c NUL is
always enough). But notice that just as with @ref wxMBConv::mb2wc Its parameters have the same meaning as the corresponding parameters of
cMB2WC, it is more efficient to pass an already terminated string to FromWChar(), please see the description of cMB2WC() for more details.
this function as otherwise a copy is made internally. If @a outLen is
not-@NULL, it receives the length of the converted string.
*/ */
const wxCharBuffer cWC2MB(const wchar_t* in) const; const wxCharBuffer cWC2MB(const wchar_t* in,
const wxCharBuffer cWC2MB(const wchar_t* in, size_t inLen, size_t *outLen) const; size_t inLen = wxNO_LEN,
//@} size_t *outLen = NULL) const;
//@{ //@{
/** /**

View File

@ -212,11 +212,11 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
if ( lenChunk == wxCONV_FAILED ) if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED; return wxCONV_FAILED;
lenChunk++; // for the L'\0' at the end of this chunk
dstWritten += lenChunk; dstWritten += lenChunk;
if ( !srcEnd )
dstWritten++;
if ( lenChunk == 1 ) if ( !lenChunk )
{ {
// nothing left in the input string, conversion succeeded // nothing left in the input string, conversion succeeded
break; break;
@ -227,10 +227,13 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
if ( dstWritten > dstLen ) if ( dstWritten > dstLen )
return wxCONV_FAILED; return wxCONV_FAILED;
if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED ) // +1 is for trailing NUL
if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED )
return wxCONV_FAILED; return wxCONV_FAILED;
dst += lenChunk; dst += lenChunk;
if ( !srcEnd )
dst++;
} }
if ( !srcEnd ) if ( !srcEnd )
@ -269,13 +272,15 @@ wxMBConv::FromWChar(char *dst, size_t dstLen,
// the number of chars [which would be] written to dst [if it were not NULL] // the number of chars [which would be] written to dst [if it were not NULL]
size_t dstWritten = 0; size_t dstWritten = 0;
// if we don't know its length we have no choice but to assume that it is
// NUL-terminated (notice that it can still be NUL-terminated even if
// explicit length is given but it doesn't change our return value)
const bool isNulTerminated = srcLen == wxNO_LEN;
// make a copy of the input string unless it is already properly // make a copy of the input string unless it is already properly
// NUL-terminated // NUL-terminated
//
// if we don't know its length we have no choice but to assume that it is,
// indeed, properly terminated
wxWCharBuffer bufTmp; wxWCharBuffer bufTmp;
if ( srcLen == wxNO_LEN ) if ( isNulTerminated )
{ {
srcLen = wxWcslen(src) + 1; srcLen = wxWcslen(src) + 1;
} }
@ -298,18 +303,21 @@ wxMBConv::FromWChar(char *dst, size_t dstLen,
if ( lenChunk == wxCONV_FAILED ) if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED; return wxCONV_FAILED;
lenChunk += lenNul;
dstWritten += lenChunk; dstWritten += lenChunk;
if ( isNulTerminated )
dstWritten += lenNul;
if ( dst ) if ( dst )
{ {
if ( dstWritten > dstLen ) if ( dstWritten > dstLen )
return wxCONV_FAILED; return wxCONV_FAILED;
if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED ) if ( WC2MB(dst, src, lenChunk + lenNul) == wxCONV_FAILED )
return wxCONV_FAILED; return wxCONV_FAILED;
dst += lenChunk; dst += lenChunk;
if ( isNulTerminated )
dst += lenNul;
} }
} }
@ -391,13 +399,19 @@ wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
// because we want the buffer to always be NUL-terminated, even if the // because we want the buffer to always be NUL-terminated, even if the
// input isn't (as otherwise the caller has no way to know its length) // input isn't (as otherwise the caller has no way to know its length)
wxWCharBuffer wbuf(dstLen); wxWCharBuffer wbuf(dstLen);
wbuf.data()[dstLen - 1] = L'\0'; wbuf.data()[dstLen] = L'\0';
if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED ) if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
{ {
if ( outLen ) if ( outLen )
{ {
*outLen = dstLen; *outLen = dstLen;
if ( wbuf[dstLen - 1] == L'\0' )
// we also need to handle NUL-terminated input strings
// specially: for them the output is the length of the string
// excluding the trailing NUL, however if we're asked to
// convert a specific number of characters we return the length
// of the resulting output even if it's NUL-terminated
if ( inLen == wxNO_LEN )
(*outLen)--; (*outLen)--;
} }
@ -429,11 +443,10 @@ wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
{ {
*outLen = dstLen; *outLen = dstLen;
if ( dstLen >= nulLen && if ( inLen == wxNO_LEN )
!NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
{ {
// in this case the output is NUL-terminated and we're not // in this case both input and output are NUL-terminated
// supposed to count NUL // and we're not supposed to count NUL
*outLen -= nulLen; *outLen -= nulLen;
} }
} }