Merge branch 'utf8-stdstring-interop' of https://github.com/minoki/wxWidgets

Make it easier to interoperate with the code using UTF-8-encoded std::strings.

Closes #17461.
This commit is contained in:
Vadim Zeitlin 2016-03-28 21:43:36 +02:00
commit 4e4286f0e2
4 changed files with 62 additions and 8 deletions

View File

@ -62,6 +62,7 @@ All:
- wxLogInfo() now logs messages if the log level is high enough, even without - wxLogInfo() now logs messages if the log level is high enough, even without
wxLog::SetVerbose() which now only affects wxLogVerbose(). wxLog::SetVerbose() which now only affects wxLogVerbose().
- Add wxFileType::GetExpandedCommand() (troelsk). - Add wxFileType::GetExpandedCommand() (troelsk).
- Make it easier to convert to/from UTF-8-encoded std::string (ARATA Mizuki).
All (GUI): All (GUI):

View File

@ -1241,12 +1241,17 @@ public:
// wxStringImpl is std::string in the encoding we want // wxStringImpl is std::string in the encoding we want
#define wxStringToStdStringRetType const std::string& #define wxStringToStdStringRetType const std::string&
const std::string& ToStdString() const { return m_impl; } const std::string& ToStdString() const { return m_impl; }
std::string ToStdString(const wxMBConv& conv) const
{
wxScopedCharBuffer buf(mb_str(conv));
return std::string(buf.data(), buf.length());
}
#else #else
// wxStringImpl is either not std::string or needs conversion // wxStringImpl is either not std::string or needs conversion
#define wxStringToStdStringRetType std::string #define wxStringToStdStringRetType std::string
std::string ToStdString() const std::string ToStdString(const wxMBConv& conv = wxConvLibc) const
{ {
wxScopedCharBuffer buf(mb_str()); wxScopedCharBuffer buf(mb_str(conv));
return std::string(buf.data(), buf.length()); return std::string(buf.data(), buf.length());
} }
#endif #endif
@ -1611,6 +1616,24 @@ public:
return FromImpl(wxStringImpl(utf8, len)); return FromImpl(wxStringImpl(utf8, len));
} }
#if wxUSE_STD_STRING
static wxString FromUTF8Unchecked(const std::string& utf8)
{
wxASSERT( wxStringOperations::IsValidUtf8String(utf8.c_str(), utf8.length()) );
/*
Note that, under wxUSE_UNICODE_UTF8 and wxUSE_STD_STRING, wxStringImpl can be
initialized with a std::string whether wxUSE_STL_BASED_WXSTRING is 1 or not.
*/
return FromImpl(utf8);
}
static wxString FromUTF8(const std::string& utf8)
{
if ( utf8.empty() || !wxStringOperations::IsValidUtf8String(utf8.c_str(), utf8.length()) )
return wxString();
return FromImpl(utf8);
}
#endif
const wxScopedCharBuffer utf8_str() const const wxScopedCharBuffer utf8_str() const
{ return wxCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length()); } { return wxCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length()); }
@ -1627,6 +1650,12 @@ public:
"string must be valid UTF-8" ); "string must be valid UTF-8" );
return s; return s;
} }
#if wxUSE_STD_STRING
static wxString FromUTF8(const std::string& utf8)
{ return FromUTF8(utf8.c_str(), utf8.length()); }
static wxString FromUTF8Unchecked(const std::string& utf8)
{ return FromUTF8Unchecked(utf8.c_str(), utf8.length()); }
#endif
const wxScopedCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); } const wxScopedCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); }
#else // ANSI #else // ANSI
static wxString FromUTF8(const char *utf8) static wxString FromUTF8(const char *utf8)
@ -1654,6 +1683,12 @@ public:
return wxString(buf.data(), wlen); return wxString(buf.data(), wlen);
} }
#if wxUSE_STD_STRING
static wxString FromUTF8(const std::string& utf8)
{ return FromUTF8(utf8.c_str(), utf8.length()); }
static wxString FromUTF8Unchecked(const std::string& utf8)
{ return FromUTF8Unchecked(utf8.c_str(), utf8.length()); }
#endif
const wxScopedCharBuffer utf8_str() const const wxScopedCharBuffer utf8_str() const
{ return wxMBConvUTF8().cWC2MB(wc_str()); } { return wxMBConvUTF8().cWC2MB(wc_str()); }
#endif #endif

View File

@ -85,8 +85,8 @@
- String in UTF-8 encoding using wxString::utf8_str(). - String in UTF-8 encoding using wxString::utf8_str().
- String in any given encoding using mb_str() with the appropriate - String in any given encoding using mb_str() with the appropriate
wxMBConv object. This is also a potentially destructive operation. wxMBConv object. This is also a potentially destructive operation.
- Standard @c std::string using wxString::ToStdString(). The contents - Standard @c std::string using wxString::ToStdString(). The encoding
of the returned string use the current locale encoding, so this of the returned string is specified with a wxMBConv object, so this
conversion is potentially destructive as well. conversion is potentially destructive as well.
- Wide C string using wxString::wc_str(). - Wide C string using wxString::wc_str().
- Standard @c std::wstring using wxString::ToStdWstring(). - Standard @c std::wstring using wxString::ToStdWstring().
@ -745,10 +745,10 @@ public:
const TYPE ToAscii(char replaceWith = '_') const; const TYPE ToAscii(char replaceWith = '_') const;
/** /**
Return the string as an std::string in current locale encoding. Return the string as an std::string using @e conv's wxMBConv::cWC2MB method.
Note that if the conversion of (Unicode) string contents to the current Note that if the conversion of (Unicode) string contents using @e conv
locale fails, the return string will be empty. Be sure to check for fails, the return string will be empty. Be sure to check for
this to avoid silent data loss. this to avoid silent data loss.
Instead of using this function it's also possible to write Instead of using this function it's also possible to write
@ -760,9 +760,12 @@ public:
@endcode @endcode
but using ToStdString() may make the code more clear. but using ToStdString() may make the code more clear.
@param conv
The converter to be used. This parameter is new in wxWidgets 3.1.1.
@since 2.9.1 @since 2.9.1
*/ */
std::string ToStdString() const; std::string ToStdString(const wxMBConv& conv = wxConvLibc) const;
/** /**
Return the string as an std::wstring. Return the string as an std::wstring.
@ -1813,10 +1816,14 @@ public:
alternative to this function called FromUTF8Unchecked() which, unlike alternative to this function called FromUTF8Unchecked() which, unlike
this one, doesn't check that the input string is valid. this one, doesn't check that the input string is valid.
The overload taking @c std::string is only available starting with
wxWidgets 3.1.1.
@since 2.8.4 @since 2.8.4
*/ */
static wxString FromUTF8(const char* s); static wxString FromUTF8(const char* s);
static wxString FromUTF8(const char* s, size_t len); static wxString FromUTF8(const char* s, size_t len);
static wxString FromUTF8(const std::string& s);
//@} //@}
//@{ //@{
@ -1833,10 +1840,14 @@ public:
string to this function will result in creating a corrupted wxString string to this function will result in creating a corrupted wxString
and all the subsequent operations on it will be undefined. and all the subsequent operations on it will be undefined.
The overload taking @c std::string is only available starting with
wxWidgets 3.1.1.
@since 2.8.9 @since 2.8.9
*/ */
static wxString FromUTF8Unchecked(const char* s); static wxString FromUTF8Unchecked(const char* s);
static wxString FromUTF8Unchecked(const char* s, size_t len); static wxString FromUTF8Unchecked(const char* s, size_t len);
static wxString FromUTF8Unchecked(const std::string& s);
//@} //@}
}; };

View File

@ -608,5 +608,12 @@ void StdStringTestCase::StdConversion()
wxStdWideString s8(s4); wxStdWideString s8(s4);
CPPUNIT_ASSERT( s8 == "hello" ); CPPUNIT_ASSERT( s8 == "hello" );
std::string s9("\xF0\x9F\x90\xB1\0\xE7\x8C\xAB", 9); /* U+1F431 U+0000 U+732B */
wxString s10 = wxString::FromUTF8(s9);
CPPUNIT_ASSERT_EQUAL( s9, s10.ToStdString(wxConvUTF8) );
std::string s11("xyz\0\xFF", 5); /* an invalid UTF-8 sequence */
CPPUNIT_ASSERT_EQUAL( wxString::FromUTF8(s11), "" );
} }
#endif // wxUSE_STD_STRING #endif // wxUSE_STD_STRING