fix the output length returned from cWC2MB/MB2WC() to be consistent with From/ToWChar(); fix off by NUL-length bug in wxMBConv implementation of From/ToWChar() in terms of legacy WC2MB/MB2WC() (which are still used in wxMBConv_wxwin) that this uncovered; added a simple test for the bugs the old code resulted in which could be easily seen with wxConvISO8859_1 (see #9739)
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54646 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
parent
9ca15f291c
commit
f6a0208756
@ -95,15 +95,18 @@ public:
|
|||||||
// Convenience functions for converting strings which may contain embedded
|
// Convenience functions for converting strings which may contain embedded
|
||||||
// NULs and don't have to be NUL-terminated.
|
// NULs and don't have to be NUL-terminated.
|
||||||
//
|
//
|
||||||
// inLen is the length of the buffer including trailing NUL if any: if the
|
// inLen is the length of the buffer including trailing NUL if any or
|
||||||
// last 4 bytes of the buffer are all NULs, these functions are more
|
// wxNO_LEN if the input is NUL-terminated.
|
||||||
// efficient as they avoid copying the string, but otherwise a copy is made
|
|
||||||
// internally which could be quite bad for (very) long strings.
|
|
||||||
//
|
//
|
||||||
// outLen receives, if not NULL, the length of the converted string or 0 if
|
// outLen receives, if not NULL, the length of the converted string or 0 if
|
||||||
// the conversion failed (returning 0 and not -1 in this case makes it
|
// the conversion failed (returning 0 and not -1 in this case makes it
|
||||||
// difficult to distinguish between failed conversion and empty input but
|
// difficult to distinguish between failed conversion and empty input but
|
||||||
// this is done for backwards compatibility)
|
// this is done for backwards compatibility). Notice that the rules for
|
||||||
|
// whether outLen accounts or not for the last NUL are the same as for
|
||||||
|
// To/FromWChar() above: if inLen is specified, outLen is exactly the
|
||||||
|
// number of characters converted, whether the last one of them was NUL or
|
||||||
|
// not. But if inLen == wxNO_LEN then outLen doesn't account for the last
|
||||||
|
// NUL even though it is present.
|
||||||
const wxWCharBuffer
|
const wxWCharBuffer
|
||||||
cMB2WC(const char *in, size_t inLen, size_t *outLen) const;
|
cMB2WC(const char *in, size_t inLen, size_t *outLen) const;
|
||||||
const wxCharBuffer
|
const wxCharBuffer
|
||||||
|
@ -155,26 +155,27 @@ public:
|
|||||||
const wchar_t* src,
|
const wchar_t* src,
|
||||||
size_t srcLen = wxNO_LEN) const;
|
size_t srcLen = wxNO_LEN) const;
|
||||||
|
|
||||||
//@{
|
|
||||||
/**
|
/**
|
||||||
Converts from multibyte encoding to Unicode by calling MB2WC() and
|
Converts from multibyte encoding to Unicode by calling ToWChar() and
|
||||||
allocating a temporary wxWCharBuffer to hold the result.
|
allocating a temporary wxWCharBuffer to hold the result.
|
||||||
|
|
||||||
The first overload takes a @c NUL-terminated input string. The second
|
This function is a convenient wrapper around ToWChar() as it takes care
|
||||||
one takes a string of exactly the specified length and the string may
|
of allocating the buffer of the necessary size itself. Its parameters
|
||||||
include or not the trailing @c NUL character(s). If the string is not
|
have the same meaning as for ToWChar(), in particular @a inLen can be
|
||||||
@c NUL-terminated, a temporary @c NUL-terminated copy of it suitable
|
specified explicitly in which case exactly that many characters are
|
||||||
for passing to wxMBConv::MB2WC is made, so it is more efficient to
|
converted and @a outLen receives (if non-@NULL) exactly the
|
||||||
ensure that the string is does have the appropriate number of @c NUL
|
corresponding number of wide characters, whether the last one of them
|
||||||
bytes (which is usually 1 but may be 2 or 4 for UTF-16 or UTF-32, see
|
is @c NUL or not. However if @c inLen is @c wxNO_LEN, then @c outLen
|
||||||
wxMBConv::GetMBNulLen), especially for long strings.
|
doesn't count the trailing @c NUL even if it is always present in this
|
||||||
|
case.
|
||||||
|
|
||||||
If @a outLen is not-@NULL, it receives the length of the converted
|
Finally notice that if the conversion fails, the returned buffer is
|
||||||
string.
|
invalid and @a outLen is set to 0 (and not @c wxCONV_FAILED for
|
||||||
|
compatibility concerns).
|
||||||
*/
|
*/
|
||||||
const wxWCharBuffer cMB2WC(const char* in) const;
|
const wxWCharBuffer cMB2WC(const char* in,
|
||||||
const wxWCharBuffer cMB2WC(const char* in, size_t inLen, size_t *outLen) const;
|
size_t inLen = wxNO_LEN,
|
||||||
//@}
|
size_t *outLen = NULL) const;
|
||||||
|
|
||||||
//@{
|
//@{
|
||||||
/**
|
/**
|
||||||
@ -189,22 +190,19 @@ public:
|
|||||||
const wxWCharBuffer cMB2WX(const char* psz) const;
|
const wxWCharBuffer cMB2WX(const char* psz) const;
|
||||||
//@}
|
//@}
|
||||||
|
|
||||||
//@{
|
|
||||||
/**
|
/**
|
||||||
Converts from Unicode to multibyte encoding by calling WC2MB and
|
Converts from Unicode to multibyte encoding by calling FromWChar() and
|
||||||
allocating a temporary wxCharBuffer to hold the result.
|
allocating a temporary wxCharBuffer to hold the result.
|
||||||
|
|
||||||
The second overload of this function allows to convert a string of the
|
This function is a convenient wrapper around FromWChar() as it takes
|
||||||
given length @e inLen, whether it is @c NUL-terminated or not (for wide
|
care of allocating the buffer of necessary size itself.
|
||||||
character strings, unlike for the multibyte ones, a single @c NUL is
|
|
||||||
always enough). But notice that just as with @ref wxMBConv::mb2wc
|
Its parameters have the same meaning as the corresponding parameters of
|
||||||
cMB2WC, it is more efficient to pass an already terminated string to
|
FromWChar(), please see the description of cMB2WC() for more details.
|
||||||
this function as otherwise a copy is made internally. If @a outLen is
|
|
||||||
not-@NULL, it receives the length of the converted string.
|
|
||||||
*/
|
*/
|
||||||
const wxCharBuffer cWC2MB(const wchar_t* in) const;
|
const wxCharBuffer cWC2MB(const wchar_t* in,
|
||||||
const wxCharBuffer cWC2MB(const wchar_t* in, size_t inLen, size_t *outLen) const;
|
size_t inLen = wxNO_LEN,
|
||||||
//@}
|
size_t *outLen = NULL) const;
|
||||||
|
|
||||||
//@{
|
//@{
|
||||||
/**
|
/**
|
||||||
|
@ -212,11 +212,11 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
|
|||||||
if ( lenChunk == wxCONV_FAILED )
|
if ( lenChunk == wxCONV_FAILED )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
lenChunk++; // for the L'\0' at the end of this chunk
|
|
||||||
|
|
||||||
dstWritten += lenChunk;
|
dstWritten += lenChunk;
|
||||||
|
if ( !srcEnd )
|
||||||
|
dstWritten++;
|
||||||
|
|
||||||
if ( lenChunk == 1 )
|
if ( !lenChunk )
|
||||||
{
|
{
|
||||||
// nothing left in the input string, conversion succeeded
|
// nothing left in the input string, conversion succeeded
|
||||||
break;
|
break;
|
||||||
@ -227,10 +227,13 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
|
|||||||
if ( dstWritten > dstLen )
|
if ( dstWritten > dstLen )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
|
// +1 is for trailing NUL
|
||||||
|
if ( MB2WC(dst, src, lenChunk + 1) == wxCONV_FAILED )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
dst += lenChunk;
|
dst += lenChunk;
|
||||||
|
if ( !srcEnd )
|
||||||
|
dst++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !srcEnd )
|
if ( !srcEnd )
|
||||||
@ -269,13 +272,15 @@ wxMBConv::FromWChar(char *dst, size_t dstLen,
|
|||||||
// the number of chars [which would be] written to dst [if it were not NULL]
|
// the number of chars [which would be] written to dst [if it were not NULL]
|
||||||
size_t dstWritten = 0;
|
size_t dstWritten = 0;
|
||||||
|
|
||||||
|
// if we don't know its length we have no choice but to assume that it is
|
||||||
|
// NUL-terminated (notice that it can still be NUL-terminated even if
|
||||||
|
// explicit length is given but it doesn't change our return value)
|
||||||
|
const bool isNulTerminated = srcLen == wxNO_LEN;
|
||||||
|
|
||||||
// make a copy of the input string unless it is already properly
|
// make a copy of the input string unless it is already properly
|
||||||
// NUL-terminated
|
// NUL-terminated
|
||||||
//
|
|
||||||
// if we don't know its length we have no choice but to assume that it is,
|
|
||||||
// indeed, properly terminated
|
|
||||||
wxWCharBuffer bufTmp;
|
wxWCharBuffer bufTmp;
|
||||||
if ( srcLen == wxNO_LEN )
|
if ( isNulTerminated )
|
||||||
{
|
{
|
||||||
srcLen = wxWcslen(src) + 1;
|
srcLen = wxWcslen(src) + 1;
|
||||||
}
|
}
|
||||||
@ -298,18 +303,21 @@ wxMBConv::FromWChar(char *dst, size_t dstLen,
|
|||||||
if ( lenChunk == wxCONV_FAILED )
|
if ( lenChunk == wxCONV_FAILED )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
lenChunk += lenNul;
|
|
||||||
dstWritten += lenChunk;
|
dstWritten += lenChunk;
|
||||||
|
if ( isNulTerminated )
|
||||||
|
dstWritten += lenNul;
|
||||||
|
|
||||||
if ( dst )
|
if ( dst )
|
||||||
{
|
{
|
||||||
if ( dstWritten > dstLen )
|
if ( dstWritten > dstLen )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
|
if ( WC2MB(dst, src, lenChunk + lenNul) == wxCONV_FAILED )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
dst += lenChunk;
|
dst += lenChunk;
|
||||||
|
if ( isNulTerminated )
|
||||||
|
dst += lenNul;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -391,13 +399,19 @@ wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
|
|||||||
// because we want the buffer to always be NUL-terminated, even if the
|
// because we want the buffer to always be NUL-terminated, even if the
|
||||||
// input isn't (as otherwise the caller has no way to know its length)
|
// input isn't (as otherwise the caller has no way to know its length)
|
||||||
wxWCharBuffer wbuf(dstLen);
|
wxWCharBuffer wbuf(dstLen);
|
||||||
wbuf.data()[dstLen - 1] = L'\0';
|
wbuf.data()[dstLen] = L'\0';
|
||||||
if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
|
if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
|
||||||
{
|
{
|
||||||
if ( outLen )
|
if ( outLen )
|
||||||
{
|
{
|
||||||
*outLen = dstLen;
|
*outLen = dstLen;
|
||||||
if ( wbuf[dstLen - 1] == L'\0' )
|
|
||||||
|
// we also need to handle NUL-terminated input strings
|
||||||
|
// specially: for them the output is the length of the string
|
||||||
|
// excluding the trailing NUL, however if we're asked to
|
||||||
|
// convert a specific number of characters we return the length
|
||||||
|
// of the resulting output even if it's NUL-terminated
|
||||||
|
if ( inLen == wxNO_LEN )
|
||||||
(*outLen)--;
|
(*outLen)--;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -429,11 +443,10 @@ wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
|
|||||||
{
|
{
|
||||||
*outLen = dstLen;
|
*outLen = dstLen;
|
||||||
|
|
||||||
if ( dstLen >= nulLen &&
|
if ( inLen == wxNO_LEN )
|
||||||
!NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
|
|
||||||
{
|
{
|
||||||
// in this case the output is NUL-terminated and we're not
|
// in this case both input and output are NUL-terminated
|
||||||
// supposed to count NUL
|
// and we're not supposed to count NUL
|
||||||
*outLen -= nulLen;
|
*outLen -= nulLen;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user