fix the output length returned from cWC2MB/MB2WC() to be consistent with From/ToWChar(); fix off by NUL-length bug in wxMBConv implementation of From/ToWChar() in terms of legacy WC2MB/MB2WC() (which are still used in wxMBConv_wxwin) that this uncovered; added a simple test for the bugs the old code resulted in which could be easily seen with wxConvISO8859_1 (see #9739)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54646 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin 2008-07-15 23:57:25 +00:00
parent 9ca15f291c
commit f6a0208756
3 changed files with 62 additions and 48 deletions

View File

@ -95,15 +95,18 @@ public:
// Convenience functions for converting strings which may contain embedded
// NULs and don't have to be NUL-terminated.
//
// inLen is the length of the buffer including trailing NUL if any: if the
// last 4 bytes of the buffer are all NULs, these functions are more
// efficient as they avoid copying the string, but otherwise a copy is made
// internally which could be quite bad for (very) long strings.
// inLen is the length of the buffer including trailing NUL if any or
// wxNO_LEN if the input is NUL-terminated.
//
// outLen receives, if not NULL, the length of the converted string or 0 if
// the conversion failed (returning 0 and not -1 in this case makes it
// difficult to distinguish between failed conversion and empty input but
// this is done for backwards compatibility)
// this is done for backwards compatibility). Notice that the rules for
// whether outLen accounts or not for the last NUL are the same as for
// To/FromWChar() above: if inLen is specified, outLen is exactly the
// number of characters converted, whether the last one of them was NUL or
// not. But if inLen == wxNO_LEN then outLen doesn't account for the last
// NUL even though it is present.
const wxWCharBuffer
cMB2WC(const char *in, size_t inLen, size_t *outLen) const;
const wxCharBuffer

View File

@ -155,26 +155,27 @@ public:
const wchar_t* src,
size_t srcLen = wxNO_LEN) const;
//@{
/**
Converts from multibyte encoding to Unicode by calling MB2WC() and
Converts from multibyte encoding to Unicode by calling ToWChar() and
allocating a temporary wxWCharBuffer to hold the result.
The first overload takes a @c NUL-terminated input string. The second
one takes a string of exactly the specified length and the string may
include or not the trailing @c NUL character(s). If the string is not
@c NUL-terminated, a temporary @c NUL-terminated copy of it suitable
for passing to wxMBConv::MB2WC is made, so it is more efficient to
ensure that the string is does have the appropriate number of @c NUL
bytes (which is usually 1 but may be 2 or 4 for UTF-16 or UTF-32, see
wxMBConv::GetMBNulLen), especially for long strings.
This function is a convenient wrapper around ToWChar() as it takes care
of allocating the buffer of the necessary size itself. Its parameters
have the same meaning as for ToWChar(), in particular @a inLen can be
specified explicitly in which case exactly that many characters are
converted and @a outLen receives (if non-@NULL) exactly the
corresponding number of wide characters, whether the last one of them
is @c NUL or not. However if @c inLen is @c wxNO_LEN, then @c outLen
doesn't count the trailing @c NUL even if it is always present in this
case.
If @a outLen is not-@NULL, it receives the length of the converted
string.
Finally notice that if the conversion fails, the returned buffer is
invalid and @a outLen is set to 0 (and not @c wxCONV_FAILED for
compatibility concerns).
*/
const wxWCharBuffer cMB2WC(const char* in) const;
const wxWCharBuffer cMB2WC(const char* in, size_t inLen, size_t *outLen) const;
//@}
const wxWCharBuffer cMB2WC(const char* in,
size_t inLen = wxNO_LEN,
size_t *outLen = NULL) const;
//@{
/**
@ -189,22 +190,19 @@ public:
const wxWCharBuffer cMB2WX(const char* psz) const;
//@}
//@{
/**
Converts from Unicode to multibyte encoding by calling WC2MB and
Converts from Unicode to multibyte encoding by calling FromWChar() and
allocating a temporary wxCharBuffer to hold the result.
The second overload of this function allows to convert a string of the
given length @e inLen, whether it is @c NUL-terminated or not (for wide
character strings, unlike for the multibyte ones, a single @c NUL is
always enough). But notice that just as with @ref wxMBConv::mb2wc
cMB2WC, it is more efficient to pass an already terminated string to
this function as otherwise a copy is made internally. If @a outLen is
not-@NULL, it receives the length of the converted string.
This function is a convenient wrapper around FromWChar() as it takes
care of allocating the buffer of necessary size itself.
Its parameters have the same meaning as the corresponding parameters of
FromWChar(), please see the description of cMB2WC() for more details.
*/
const wxCharBuffer cWC2MB(const wchar_t* in) const;
const wxCharBuffer cWC2MB(const wchar_t* in, size_t inLen, size_t *outLen) const;
//@}
const wxCharBuffer cWC2MB(const wchar_t* in,
size_t inLen = wxNO_LEN,
size_t *outLen = NULL) const;
//@{
/**

View File

@ -212,11 +212,11 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED;
lenChunk++; // for the L'\0' at the end of this chunk
dstWritten += lenChunk;
if ( !srcEnd )
dstWritten++;
if ( lenChunk == 1 )
if ( !lenChunk )
{
// nothing left in the input string, conversion succeeded
break;
@ -227,10 +227,13 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
if ( dstWritten > dstLen )
return wxCONV_FAILED;
if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
// +1 is for trailing NUL
if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED )
return wxCONV_FAILED;
dst += lenChunk;
if ( !srcEnd )
dst++;
}
if ( !srcEnd )
@ -269,13 +272,15 @@ wxMBConv::FromWChar(char *dst, size_t dstLen,
// the number of chars [which would be] written to dst [if it were not NULL]
size_t dstWritten = 0;
// if we don't know its length we have no choice but to assume that it is
// NUL-terminated (notice that it can still be NUL-terminated even if
// explicit length is given but it doesn't change our return value)
const bool isNulTerminated = srcLen == wxNO_LEN;
// make a copy of the input string unless it is already properly
// NUL-terminated
//
// if we don't know its length we have no choice but to assume that it is,
// indeed, properly terminated
wxWCharBuffer bufTmp;
if ( srcLen == wxNO_LEN )
if ( isNulTerminated )
{
srcLen = wxWcslen(src) + 1;
}
@ -298,18 +303,21 @@ wxMBConv::FromWChar(char *dst, size_t dstLen,
if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED;
lenChunk += lenNul;
dstWritten += lenChunk;
if ( isNulTerminated )
dstWritten += lenNul;
if ( dst )
{
if ( dstWritten > dstLen )
return wxCONV_FAILED;
if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
if ( WC2MB(dst, src, lenChunk + lenNul) == wxCONV_FAILED )
return wxCONV_FAILED;
dst += lenChunk;
if ( isNulTerminated )
dst += lenNul;
}
}
@ -391,13 +399,19 @@ wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
// because we want the buffer to always be NUL-terminated, even if the
// input isn't (as otherwise the caller has no way to know its length)
wxWCharBuffer wbuf(dstLen);
wbuf.data()[dstLen - 1] = L'\0';
wbuf.data()[dstLen] = L'\0';
if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
{
if ( outLen )
{
*outLen = dstLen;
if ( wbuf[dstLen - 1] == L'\0' )
// we also need to handle NUL-terminated input strings
// specially: for them the output is the length of the string
// excluding the trailing NUL, however if we're asked to
// convert a specific number of characters we return the length
// of the resulting output even if it's NUL-terminated
if ( inLen == wxNO_LEN )
(*outLen)--;
}
@ -429,11 +443,10 @@ wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
{
*outLen = dstLen;
if ( dstLen >= nulLen &&
!NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
if ( inLen == wxNO_LEN )
{
// in this case the output is NUL-terminated and we're not
// supposed to count NUL
// in this case both input and output are NUL-terminated
// and we're not supposed to count NUL
*outLen -= nulLen;
}
}