diff --git a/build/bakefiles/files.bkl b/build/bakefiles/files.bkl index 0c01325b7f..6257d1f853 100644 --- a/build/bakefiles/files.bkl +++ b/build/bakefiles/files.bkl @@ -290,6 +290,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file! src/common/clntdata.cpp src/common/cmdline.cpp src/common/config.cpp + src/common/convauto.cpp src/common/datetime.cpp src/common/datstrm.cpp src/common/dircmn.cpp @@ -362,6 +363,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file! wx/cmdline.h wx/confbase.h wx/config.h + wx/convauto.h wx/containr.h wx/datetime.h wx/datstrm.h diff --git a/docs/changes.txt b/docs/changes.txt index cc5d1829fb..e24ae287c8 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -27,6 +27,7 @@ Deprecated methods since 2.6.x and their replacements All: +- wx(F)File, wxTextFile and wxInputStreams recognize Unicode BOM now - wxLaunchDefaultBrowser() now supports wxBROWSER_NEW_WINDOW flag. - Added wxStandardPaths::GetResourcesDir() and GetLocalizedResourcesDir() - Added wxStringTokenizer::GetLastDelimiter(); improved documentation. diff --git a/include/wx/convauto.h b/include/wx/convauto.h new file mode 100644 index 0000000000..ec32b93c43 --- /dev/null +++ b/include/wx/convauto.h @@ -0,0 +1,99 @@ +/////////////////////////////////////////////////////////////////////////////// +// Name: wx/convauto.h +// Purpose: wxConvAuto class declaration +// Author: Vadim Zeitlin +// Created: 2006-04-03 +// RCS-ID: $Id$ +// Copyright: (c) 2006 Vadim Zeitlin +// Licence: wxWindows licence +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _WX_CONVAUTO_H_ +#define _WX_CONVAUTO_H_ + +#include "wx/strconv.h" + +#if wxUSE_WCHAR_T + +// ---------------------------------------------------------------------------- +// wxConvAuto: uses BOM to automatically detect input encoding +// ---------------------------------------------------------------------------- + +class WXDLLIMPEXP_BASE wxConvAuto : public wxMBConv +{ +public: + // default ctor, the real conversion will be created on demand + wxConvAuto() { m_conv = NULL; /* the rest will be initialized later */ } + + // copy ctor doesn't initialize anything neither as conversion can only be + // deduced on first use + wxConvAuto(const wxConvAuto& WXUNUSED(other)) { m_conv = NULL; } + + virtual ~wxConvAuto() { if ( m_conv && m_ownsConv ) delete m_conv; } + + // override the base class virtual function(s) to use our m_conv + virtual size_t ToWChar(wchar_t *dst, size_t dstLen, + const char *src, size_t srcLen = -1) const; + + virtual size_t FromWChar(char *dst, size_t dstLen, + const wchar_t *src, size_t srcLen = -1) const; + + virtual size_t GetMBNulLen() const { return m_conv->GetMBNulLen(); } + +private: + // all currently recognized BOM values + enum BOMType + { + BOM_None, + BOM_UTF32BE, + BOM_UTF32LE, + BOM_UTF16BE, + BOM_UTF16LE, + BOM_UTF8 + }; + + // return the BOM type of this buffer + static BOMType DetectBOM(const char *src, size_t srcLen); + + // initialize m_conv with the conversion to use by default (UTF-8) + void InitWithDefault() + { + m_conv = &wxConvUTF8; + m_ownsConv = false; + } + + // create the correct conversion object for the given BOM type + void InitFromBOM(BOMType bomType); + + // create the correct conversion object for the BOM present in the + // beginning of the buffer; adjust the buffer to skip the BOM if found + void InitFromInput(const char **src, size_t *len); + + // adjust src and len to skip over the BOM (identified by m_bomType) at the + // start of the buffer + void SkipBOM(const char **src, size_t *len) const; + + + // conversion object which we really use, NULL until the first call to + // either ToWChar() or FromWChar() + wxMBConv *m_conv; + + // our BOM type + BOMType m_bomType; + + // true if we allocated m_conv ourselves, false if we just use an existing + // global conversion + bool m_ownsConv; + + // true if we already skipped BOM when converting (and not just calculating + // the size) + bool m_consumedBOM; + + + DECLARE_NO_ASSIGN_CLASS(wxConvAuto); +}; + +#endif // wxUSE_WCHAR_T + +#endif // _WX_CONVAUTO_H_ + diff --git a/include/wx/datstrm.h b/include/wx/datstrm.h index f49620a6ae..80fa7e285e 100644 --- a/include/wx/datstrm.h +++ b/include/wx/datstrm.h @@ -14,7 +14,7 @@ #include "wx/stream.h" #include "wx/longlong.h" -#include "wx/strconv.h" +#include "wx/convauto.h" #if wxUSE_STREAMS @@ -22,7 +22,7 @@ class WXDLLIMPEXP_BASE wxDataInputStream { public: #if wxUSE_UNICODE - wxDataInputStream(wxInputStream& s, wxMBConv& conv = wxConvUTF8); + wxDataInputStream(wxInputStream& s, const wxMBConv& conv = wxConvAuto()); #else wxDataInputStream(wxInputStream& s); #endif @@ -83,7 +83,7 @@ protected: wxInputStream *m_input; bool m_be_order; #if wxUSE_UNICODE - wxMBConv& m_conv; + wxMBConv m_conv; #endif DECLARE_NO_COPY_CLASS(wxDataInputStream) @@ -93,7 +93,7 @@ class WXDLLIMPEXP_BASE wxDataOutputStream { public: #if wxUSE_UNICODE - wxDataOutputStream(wxOutputStream& s, wxMBConv& conv = wxConvUTF8); + wxDataOutputStream(wxOutputStream& s, const wxMBConv& conv = wxConvAuto()); #else wxDataOutputStream(wxOutputStream& s); #endif @@ -157,7 +157,7 @@ protected: wxOutputStream *m_output; bool m_be_order; #if wxUSE_UNICODE - wxMBConv& m_conv; + wxMBConv m_conv; #endif DECLARE_NO_COPY_CLASS(wxDataOutputStream) diff --git a/include/wx/ffile.h b/include/wx/ffile.h index c254ed04ed..58aca44eaf 100644 --- a/include/wx/ffile.h +++ b/include/wx/ffile.h @@ -18,6 +18,7 @@ #include "wx/string.h" #include "wx/filefn.h" +#include "wx/convauto.h" #include @@ -54,14 +55,14 @@ public: // read/write (unbuffered) // read all data from the file into a string (useful for text files) - bool ReadAll(wxString *str, wxMBConv& conv = wxConvUTF8); + bool ReadAll(wxString *str, const wxMBConv& conv = wxConvAuto()); // returns number of bytes read - use Eof() and Error() to see if an error // occurred or not size_t Read(void *pBuf, size_t nCount); // returns the number of bytes written size_t Write(const void *pBuf, size_t nCount); // returns true on success - bool Write(const wxString& s, wxMBConv& conv = wxConvUTF8) + bool Write(const wxString& s, const wxMBConv& conv = wxConvAuto()) { const wxWX2MBbuf buf = s.mb_str(conv); size_t size = strlen(buf); diff --git a/include/wx/file.h b/include/wx/file.h index 243d2b83f8..7f3f0e8184 100644 --- a/include/wx/file.h +++ b/include/wx/file.h @@ -97,7 +97,7 @@ public: // returns the number of bytes written size_t Write(const void *pBuf, size_t nCount); // returns true on success - bool Write(const wxString& s, wxMBConv& conv = wxConvUTF8) + bool Write(const wxString& s, const wxMBConv& conv = wxConvUTF8) { const wxWX2MBbuf buf = s.mb_str(conv); size_t size = strlen(buf); @@ -172,7 +172,7 @@ public: // I/O (both functions return true on success, false on failure) bool Write(const void *p, size_t n) { return m_file.Write(p, n) == n; } - bool Write(const wxString& str, wxMBConv& conv = wxConvUTF8) + bool Write(const wxString& str, const wxMBConv& conv = wxConvUTF8) { return m_file.Write(str, conv); } // different ways to close the file diff --git a/include/wx/fileconf.h b/include/wx/fileconf.h index 79828da710..ebb4ab5b29 100644 --- a/include/wx/fileconf.h +++ b/include/wx/fileconf.h @@ -122,11 +122,11 @@ public: const wxString& localFilename = wxEmptyString, const wxString& globalFilename = wxEmptyString, long style = wxCONFIG_USE_LOCAL_FILE | wxCONFIG_USE_GLOBAL_FILE, - wxMBConv& conv = wxConvUTF8); + const wxMBConv& conv = wxConvAuto()); #if wxUSE_STREAMS // ctor that takes an input stream. - wxFileConfig(wxInputStream &inStream, wxMBConv& conv = wxConvUTF8); + wxFileConfig(wxInputStream &inStream, const wxMBConv& conv = wxConvAuto()); #endif // wxUSE_STREAMS // dtor will save unsaved data @@ -169,7 +169,7 @@ public: // save the entire config file text to the given stream, note that the text // won't be saved again in dtor when Flush() is called if you use this method // as it won't be "changed" any more - virtual bool Save(wxOutputStream& os, wxMBConv& conv = wxConvUTF8); + virtual bool Save(wxOutputStream& os, const wxMBConv& conv = wxConvAuto()); #endif // wxUSE_STREAMS public: @@ -227,7 +227,7 @@ private: wxFileConfigGroup *m_pRootGroup, // the top (unnamed) group *m_pCurrentGroup; // the current group - wxMBConv &m_conv; + wxMBConv m_conv; #ifdef __UNIX__ int m_umask; // the umask to use for file creation diff --git a/include/wx/memtext.h b/include/wx/memtext.h index aa2df7b637..a093f40b04 100644 --- a/include/wx/memtext.h +++ b/include/wx/memtext.h @@ -37,11 +37,11 @@ protected: virtual bool OnClose() { return true; } - virtual bool OnRead(wxMBConv& WXUNUSED(conv)) + virtual bool OnRead(const wxMBConv& WXUNUSED(conv)) { return true; } virtual bool OnWrite(wxTextFileType WXUNUSED(typeNew), - wxMBConv& WXUNUSED(conv) = wxConvUTF8) + const wxMBConv& WXUNUSED(conv) = wxConvUTF8) { return true; } private: diff --git a/include/wx/string.h b/include/wx/string.h index 5f9bdf042c..6cf19b5022 100644 --- a/include/wx/string.h +++ b/include/wx/string.h @@ -325,7 +325,9 @@ public: { InitWith(psz, 0, npos); } wxStringBase(const wxChar *psz, size_t nLength) { InitWith(psz, 0, nLength); } - wxStringBase(const wxChar *psz, wxMBConv& WXUNUSED(conv), size_t nLength = npos) + wxStringBase(const wxChar *psz, + const wxMBConv& WXUNUSED(conv), + size_t nLength = npos) { InitWith(psz, 0, nLength); } // take nLen chars starting at nPos wxStringBase(const wxStringBase& str, size_t nPos, size_t nLen) @@ -650,7 +652,9 @@ public: : wxStringBase(psz ? psz : wxT("")) { } wxString(const wxChar *psz, size_t nLength) : wxStringBase(psz, nLength) { } - wxString(const wxChar *psz, wxMBConv& WXUNUSED(conv), size_t nLength = npos) + wxString(const wxChar *psz, + const wxMBConv& WXUNUSED(conv), + size_t nLength = npos) : wxStringBase(psz, nLength == npos ? wxStrlen(psz) : nLength) { } // even if we're not built with wxUSE_STL == 1 it is very convenient to allow @@ -666,7 +670,7 @@ public: #if wxUSE_UNICODE // from multibyte string - wxString(const char *psz, wxMBConv& conv, size_t nLength = npos); + wxString(const char *psz, const wxMBConv& conv, size_t nLength = npos); // from wxWCharBuffer (i.e. return from wxGetString) wxString(const wxWCharBuffer& psz) : wxStringBase(psz.data()) { } #else // ANSI @@ -679,7 +683,9 @@ public: #if wxUSE_WCHAR_T // from wide (Unicode) string - wxString(const wchar_t *pwz, wxMBConv& conv = wxConvLibc, size_t nLength = npos); + wxString(const wchar_t *pwz, + const wxMBConv& conv = wxConvLibc, + size_t nLength = npos); #endif // !wxUSE_WCHAR_T // from wxCharBuffer @@ -809,14 +815,14 @@ public: // type differs because a function may either return pointer to the buffer // directly or have to use intermediate buffer for translation. #if wxUSE_UNICODE - const wxCharBuffer mb_str(wxMBConv& conv = wxConvLibc) const; + const wxCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const; const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); } const wxChar* wc_str() const { return c_str(); } // for compatibility with !wxUSE_UNICODE version - const wxChar* wc_str(wxMBConv& WXUNUSED(conv)) const { return c_str(); } + const wxChar* wc_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); } #if wxMBFILES const wxCharBuffer fn_str() const { return mb_str(wxConvFile); } @@ -827,12 +833,12 @@ public: const wxChar* mb_str() const { return c_str(); } // for compatibility with wxUSE_UNICODE version - const wxChar* mb_str(wxMBConv& WXUNUSED(conv)) const { return c_str(); } + const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); } const wxWX2MBbuf mbc_str() const { return mb_str(); } #if wxUSE_WCHAR_T - const wxWCharBuffer wc_str(wxMBConv& conv) const; + const wxWCharBuffer wc_str(const wxMBConv& conv) const; #endif // wxUSE_WCHAR_T #ifdef __WXOSX__ const wxCharBuffer fn_str() const { return wxConvFile.cWC2WX( wc_str( wxConvLocal ) ); } diff --git a/include/wx/textbuf.h b/include/wx/textbuf.h index 5749dd4134..f768a56324 100644 --- a/include/wx/textbuf.h +++ b/include/wx/textbuf.h @@ -14,6 +14,7 @@ #include "wx/defs.h" #include "wx/arrstr.h" +#include "wx/convauto.h" // ---------------------------------------------------------------------------- // constants @@ -80,10 +81,10 @@ public: bool Create(const wxString& strBufferName); // Open() also loads buffer in memory on success - bool Open(wxMBConv& conv = wxConvUTF8); + bool Open(const wxMBConv& conv = wxConvAuto()); // same as Open() but with (another) buffer name - bool Open(const wxString& strBufferName, wxMBConv& conv = wxConvUTF8); + bool Open(const wxString& strBufferName, const wxMBConv& conv = wxConvAuto()); // closes the buffer and frees memory, losing all changes bool Close(); @@ -161,7 +162,7 @@ public: // change the buffer (default argument means "don't change type") // possibly in another format bool Write(wxTextFileType typeNew = wxTextFileType_None, - wxMBConv& conv = wxConvUTF8); + const wxMBConv& conv = wxConvAuto()); // dtor virtual ~wxTextBuffer(); @@ -183,8 +184,8 @@ protected: virtual bool OnOpen(const wxString &strBufferName, wxTextBufferOpenMode openmode) = 0; virtual bool OnClose() = 0; - virtual bool OnRead(wxMBConv& conv) = 0; - virtual bool OnWrite(wxTextFileType typeNew, wxMBConv& conv) = 0; + virtual bool OnRead(const wxMBConv& conv) = 0; + virtual bool OnWrite(wxTextFileType typeNew, const wxMBConv& conv) = 0; static wxString ms_eof; // dummy string returned at EOF wxString m_strBufferName; // name of the buffer diff --git a/include/wx/textfile.h b/include/wx/textfile.h index f5ceaedb26..588aedd24a 100644 --- a/include/wx/textfile.h +++ b/include/wx/textfile.h @@ -39,8 +39,8 @@ protected: virtual bool OnOpen(const wxString &strBufferName, wxTextBufferOpenMode OpenMode); virtual bool OnClose(); - virtual bool OnRead(wxMBConv& conv); - virtual bool OnWrite(wxTextFileType typeNew, wxMBConv& conv); + virtual bool OnRead(const wxMBConv& conv); + virtual bool OnWrite(wxTextFileType typeNew, const wxMBConv& conv); private: diff --git a/include/wx/txtstrm.h b/include/wx/txtstrm.h index 6344c8ff62..d8011a8e49 100644 --- a/include/wx/txtstrm.h +++ b/include/wx/txtstrm.h @@ -13,6 +13,7 @@ #define _WX_TXTSTREAM_H_ #include "wx/stream.h" +#include "wx/convauto.h" #if wxUSE_STREAMS @@ -36,9 +37,11 @@ class WXDLLIMPEXP_BASE wxTextInputStream { public: #if wxUSE_UNICODE - wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t"), wxMBConv& conv = wxConvUTF8 ); + wxTextInputStream(wxInputStream& s, + const wxString &sep=wxT(" \t"), + const wxMBConv& conv = wxConvAuto()); #else - wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t") ); + wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t")); #endif ~wxTextInputStream(); @@ -78,7 +81,7 @@ protected: char m_lastBytes[10]; // stores the bytes that were read for the last character #if wxUSE_UNICODE - wxMBConv &m_conv; + wxMBConv m_conv; #endif bool EatEOL(const wxChar &c); @@ -102,9 +105,11 @@ class WXDLLIMPEXP_BASE wxTextOutputStream { public: #if wxUSE_UNICODE - wxTextOutputStream( wxOutputStream& s, wxEOL mode = wxEOL_NATIVE, wxMBConv& conv = wxConvUTF8 ); + wxTextOutputStream(wxOutputStream& s, + wxEOL mode = wxEOL_NATIVE, + const wxMBConv& conv = wxConvAuto()); #else - wxTextOutputStream( wxOutputStream& s, wxEOL mode = wxEOL_NATIVE ); + wxTextOutputStream(wxOutputStream& s, wxEOL mode = wxEOL_NATIVE); #endif virtual ~wxTextOutputStream(); @@ -139,7 +144,7 @@ protected: wxEOL m_mode; #if wxUSE_UNICODE - wxMBConv &m_conv; + wxMBConv m_conv; #endif DECLARE_NO_COPY_CLASS(wxTextOutputStream) diff --git a/src/common/convauto.cpp b/src/common/convauto.cpp new file mode 100644 index 0000000000..d43bb6d1ba --- /dev/null +++ b/src/common/convauto.cpp @@ -0,0 +1,214 @@ +/////////////////////////////////////////////////////////////////////////////// +// Name: src/common/convauto.cpp +// Purpose: implementation of wxConvAuto +// Author: Vadim Zeitlin +// Created: 2006-04-04 +// RCS-ID: $Id$ +// Copyright: (c) 2006 Vadim Zeitlin +// Licence: wxWindows licence +/////////////////////////////////////////////////////////////////////////////// + +// ============================================================================ +// declarations +// ============================================================================ + +// ---------------------------------------------------------------------------- +// headers +// ---------------------------------------------------------------------------- + +// for compilers that support precompilation, includes "wx.h". +#include "wx/wxprec.h" + +#ifdef __BORLANDC__ + #pragma hdrstop +#endif + +#if wxUSE_WCHAR_T + +#ifndef WX_PRECOMP +#endif //WX_PRECOMP + +#include "wx/convauto.h" + +// ============================================================================ +// implementation +// ============================================================================ + +/* static */ +wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen) +{ + if ( srcLen < 2 ) + { + // minimal BOM is 2 bytes so bail out immediately and simplify the code + // below which wouldn't need to check for length for UTF-16 cases + return BOM_None; + } + + // examine the buffer for BOM presence + // + // see http://www.unicode.org/faq/utf_bom.html#BOM + switch ( *src++ ) + { + case '\0': + // could only be big endian UTF-32 (00 00 FE FF) + if ( srcLen >= 4 && + src[0] == '\0' && + src[1] == '\xfe' && + src[2] == '\xff' ) + { + return BOM_UTF32BE; + } + break; + + case '\xfe': + // could only be big endian UTF-16 (FE FF) + if ( *src++ == '\xff' ) + { + return BOM_UTF16BE; + } + break; + + case '\xff': + // could be either little endian UTF-16 or UTF-32, both start + // with FF FE + if ( *src++ == '\xfe' ) + { + return srcLen >= 4 && src[0] == '\0' && src[1] == '\0' + ? BOM_UTF32LE + : BOM_UTF16LE; + } + break; + + case '\xef': + // is this UTF-8 BOM (EF BB BF)? + if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' ) + { + return BOM_UTF8; + } + break; + } + + return BOM_None; +} + +void wxConvAuto::InitFromBOM(BOMType bomType) +{ + m_consumedBOM = false; + + switch ( bomType ) + { + case BOM_UTF32BE: + m_conv = new wxMBConvUTF32BE; + m_ownsConv = true; + break; + + case BOM_UTF32LE: + m_conv = new wxMBConvUTF32LE; + m_ownsConv = true; + break; + + case BOM_UTF16BE: + m_conv = new wxMBConvUTF16BE; + m_ownsConv = true; + break; + + case BOM_UTF16LE: + m_conv = new wxMBConvUTF16LE; + m_ownsConv = true; + break; + + case BOM_UTF8: + m_conv = &wxConvUTF8; + m_ownsConv = false; + break; + + default: + wxFAIL_MSG( _T("unexpected BOM type") ); + // fall through: still need to create something + + case BOM_None: + InitWithDefault(); + m_consumedBOM = true; // as there is nothing to consume + } +} + +void wxConvAuto::SkipBOM(const char **src, size_t *len) const +{ + int ofs; + switch ( m_bomType ) + { + case BOM_UTF32BE: + case BOM_UTF32LE: + ofs = 4; + break; + + case BOM_UTF16BE: + case BOM_UTF16LE: + ofs = 2; + break; + + case BOM_UTF8: + ofs = 3; + break; + + default: + wxFAIL_MSG( _T("unexpected BOM type") ); + // fall through: still need to create something + + case BOM_None: + ofs = 0; + } + + *src += ofs; + if ( *len != (size_t)-1 ) + *len -= ofs; +} + +void wxConvAuto::InitFromInput(const char **src, size_t *len) +{ + m_bomType = DetectBOM(*src, *len); + InitFromBOM(m_bomType); + SkipBOM(src, len); +} + +size_t +wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen, + const char *src, size_t srcLen) const +{ + // we check BOM and create the appropriate conversion the first time we're + // called but we also need to ensure that the BOM is skipped not only + // during this initial call but also during the first call with non-NULL + // dst as typically we're first called with NULL dst to calculate the + // needed buffer size + wxConvAuto *self = wx_const_cast(wxConvAuto *, this); + if ( !m_conv ) + { + self->InitFromInput(&src, &srcLen); + if ( dst ) + self->m_consumedBOM = true; + } + + if ( !m_consumedBOM && dst ) + { + self->m_consumedBOM = true; + SkipBOM(&src, &srcLen); + } + + return m_conv->ToWChar(dst, dstLen, src, srcLen); +} + +size_t +wxConvAuto::FromWChar(char *dst, size_t dstLen, + const wchar_t *src, size_t srcLen) const +{ + if ( !m_conv ) + { + // default to UTF-8 for the multibyte output + wx_const_cast(wxConvAuto *, this)->InitWithDefault(); + } + + return m_conv->FromWChar(dst, dstLen, src, srcLen); +} + +#endif // wxUSE_WCHAR_T + diff --git a/src/common/datstrm.cpp b/src/common/datstrm.cpp index a1961c3f02..67a2e45805 100644 --- a/src/common/datstrm.cpp +++ b/src/common/datstrm.cpp @@ -26,7 +26,7 @@ // --------------------------------------------------------------------------- #if wxUSE_UNICODE -wxDataInputStream::wxDataInputStream(wxInputStream& s, wxMBConv& conv) +wxDataInputStream::wxDataInputStream(wxInputStream& s, const wxMBConv& conv) : m_input(&s), m_be_order(false), m_conv(conv) #else wxDataInputStream::wxDataInputStream(wxInputStream& s) @@ -445,7 +445,7 @@ wxDataInputStream& wxDataInputStream::operator>>(float& f) // --------------------------------------------------------------------------- #if wxUSE_UNICODE -wxDataOutputStream::wxDataOutputStream(wxOutputStream& s, wxMBConv& conv) +wxDataOutputStream::wxDataOutputStream(wxOutputStream& s, const wxMBConv& conv) : m_output(&s), m_be_order(false), m_conv(conv) #else wxDataOutputStream::wxDataOutputStream(wxOutputStream& s) diff --git a/src/common/ffile.cpp b/src/common/ffile.cpp index d34756376a..997bf03d2b 100644 --- a/src/common/ffile.cpp +++ b/src/common/ffile.cpp @@ -103,7 +103,7 @@ bool wxFFile::Close() // read/write // ---------------------------------------------------------------------------- -bool wxFFile::ReadAll(wxString *str, wxMBConv& conv) +bool wxFFile::ReadAll(wxString *str, const wxMBConv& conv) { wxCHECK_MSG( str, false, wxT("invalid parameter") ); wxCHECK_MSG( IsOpened(), false, wxT("can't read from closed file") ); diff --git a/src/common/fileconf.cpp b/src/common/fileconf.cpp index d492e6466d..5e84cf2c41 100644 --- a/src/common/fileconf.cpp +++ b/src/common/fileconf.cpp @@ -426,7 +426,8 @@ void wxFileConfig::Init() // constructor supports creation of wxFileConfig objects of any type wxFileConfig::wxFileConfig(const wxString& appName, const wxString& vendorName, const wxString& strLocal, const wxString& strGlobal, - long style, wxMBConv& conv) + long style, + const wxMBConv& conv) : wxConfigBase(::GetAppName(appName), vendorName, strLocal, strGlobal, style), @@ -474,7 +475,7 @@ wxFileConfig::wxFileConfig(const wxString& appName, const wxString& vendorName, #if wxUSE_STREAMS -wxFileConfig::wxFileConfig(wxInputStream &inStream, wxMBConv& conv) +wxFileConfig::wxFileConfig(wxInputStream &inStream, const wxMBConv& conv) : m_conv(conv) { // always local_file when this constructor is called (?) @@ -1036,7 +1037,7 @@ bool wxFileConfig::Flush(bool /* bCurrentOnly */) #if wxUSE_STREAMS -bool wxFileConfig::Save(wxOutputStream& os, wxMBConv& conv) +bool wxFileConfig::Save(wxOutputStream& os, const wxMBConv& conv) { // save unconditionally, even if not dirty for ( wxFileConfigLineList *p = m_linesHead; p != NULL; p = p->Next() ) diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 01e0dc358c..3670582926 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -203,21 +203,16 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, size_t lenChunk = MB2WC(NULL, src, 0); if ( lenChunk == 0 ) { - // nothing left in the input string, conversion succeeded + // nothing left in the input string, conversion succeeded; but + // still account for the trailing NULL + dstWritten++; break; } if ( lenChunk == wxCONV_FAILED ) return wxCONV_FAILED; - // if we already have a previous chunk, leave the NUL separating it - // from this one - if ( dstWritten ) - { - dstWritten++; - if ( dst ) - dst++; - } + lenChunk++; // for trailing NUL dstWritten += lenChunk; @@ -226,8 +221,7 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, if ( dstWritten > dstLen ) return wxCONV_FAILED; - lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */); - if ( lenChunk == wxCONV_FAILED ) + if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED ) return wxCONV_FAILED; dst += lenChunk; @@ -390,11 +384,11 @@ wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const const size_t dstLen = ToWChar(NULL, 0, in, inLen); if ( dstLen != wxCONV_FAILED ) { - wxWCharBuffer wbuf(dstLen); + wxWCharBuffer wbuf(dstLen - 1); if ( ToWChar(wbuf.data(), dstLen, in, inLen) ) { if ( outLen ) - *outLen = dstLen; + *outLen = dstLen - 1; return wbuf; } } @@ -411,11 +405,11 @@ wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const const size_t dstLen = FromWChar(NULL, 0, in, inLen); if ( dstLen != wxCONV_FAILED ) { - wxCharBuffer buf(dstLen); + wxCharBuffer buf(dstLen - 1); if ( FromWChar(buf.data(), dstLen, in, inLen) ) { if ( outLen ) - *outLen = dstLen; + *outLen = dstLen - 1; return buf; } } @@ -1825,35 +1819,27 @@ public: // wouldn't work if reading an incomplete MB char didn't result in an // error // - // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in - // an error (tested under Windows Server 2003) and apparently it is - // done on purpose, i.e. the function accepts any input in this case - // and although I'd prefer to return error on ill-formed output, our - // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is - // explicitly ill-formed according to RFC 2152) neither so we don't - // even have any fallback here... - // // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or - // Win XP or newer and if it is specified on older versions, conversion - // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS) - // fails. So we can only use the flag on newer Windows versions. - // Additionally, the flag is not supported by UTF7, symbol and CJK - // encodings. See here: + // Win XP or newer and it is not supported for UTF-[78] so we always + // use our own conversions in this case. See // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp + if ( m_CodePage == CP_UTF8 ) + { + return wxConvUTF8.MB2WC(buf, psz, n); + } + + if ( m_CodePage == CP_UTF7 ) + { + return wxConvUTF7.MB2WC(buf, psz, n); + } + int flags = 0; - if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL && - m_CodePage < 50000 && - IsAtLeastWin2kSP4() ) + if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) && + IsAtLeastWin2kSP4() ) { flags = MB_ERR_INVALID_CHARS; } - else if ( m_CodePage == CP_UTF8 ) - { - // Avoid round-trip in the special case of UTF-8 by using our - // own UTF-8 conversion code: - return wxMBConvUTF8().MB2WC(buf, psz, n); - } const size_t len = ::MultiByteToWideChar ( diff --git a/src/common/string.cpp b/src/common/string.cpp index 4e128a124f..de7500d0b2 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -1006,7 +1006,7 @@ int STRINGCLASS::compare(size_t nStart, size_t nLen, #if wxUSE_UNICODE // from multibyte string -wxString::wxString(const char *psz, wxMBConv& conv, size_t nLength) +wxString::wxString(const char *psz, const wxMBConv& conv, size_t nLength) { // anything to do? if ( psz && nLength != 0 ) @@ -1031,7 +1031,7 @@ wxString::wxString(const char *psz, wxMBConv& conv, size_t nLength) } //Convert wxString in Unicode mode to a multi-byte string -const wxCharBuffer wxString::mb_str(wxMBConv& conv) const +const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const { return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL); } @@ -1041,7 +1041,7 @@ const wxCharBuffer wxString::mb_str(wxMBConv& conv) const #if wxUSE_WCHAR_T // from wide string -wxString::wxString(const wchar_t *pwz, wxMBConv& conv, size_t nLength) +wxString::wxString(const wchar_t *pwz, const wxMBConv& conv, size_t nLength) { // anything to do? if ( pwz && nLength != 0 ) @@ -1067,7 +1067,7 @@ wxString::wxString(const wchar_t *pwz, wxMBConv& conv, size_t nLength) //Converts this string to a wide character string if unicode //mode is not enabled and wxUSE_WCHAR_T is enabled -const wxWCharBuffer wxString::wc_str(wxMBConv& conv) const +const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const { return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL); } diff --git a/src/common/textbuf.cpp b/src/common/textbuf.cpp index b1950d24bc..2aa823d9b3 100644 --- a/src/common/textbuf.cpp +++ b/src/common/textbuf.cpp @@ -181,14 +181,14 @@ bool wxTextBuffer::Create() return true; } -bool wxTextBuffer::Open(const wxString& strBufferName, wxMBConv& conv) +bool wxTextBuffer::Open(const wxString& strBufferName, const wxMBConv& conv) { m_strBufferName = strBufferName; return Open(conv); } -bool wxTextBuffer::Open(wxMBConv& conv) +bool wxTextBuffer::Open(const wxMBConv& conv) { // buffer name must be either given in ctor or in Open(const wxString&) wxASSERT( !m_strBufferName.empty() ); @@ -276,7 +276,7 @@ bool wxTextBuffer::Close() return true; } -bool wxTextBuffer::Write(wxTextFileType typeNew, wxMBConv& conv) +bool wxTextBuffer::Write(wxTextFileType typeNew, const wxMBConv& conv) { return OnWrite(typeNew, conv); } diff --git a/src/common/textfile.cpp b/src/common/textfile.cpp index 40f5d4b44d..3334375b44 100644 --- a/src/common/textfile.cpp +++ b/src/common/textfile.cpp @@ -86,7 +86,7 @@ bool wxTextFile::OnClose() } -bool wxTextFile::OnRead(wxMBConv& conv) +bool wxTextFile::OnRead(const wxMBConv& conv) { // file should be opened and we must be in it's beginning wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 ); @@ -114,18 +114,8 @@ bool wxTextFile::OnRead(wxMBConv& conv) return false; } - eof = nRead == 0; - if ( eof ) - { - // append 4 trailing NUL bytes: this is needed to ensure that the - // string is going to be NUL-terminated, whatever is the encoding - // used (even UTF-32) - block[0] = - block[1] = - block[2] = - block[3] = '\0'; - nRead = 4; - } + if ( nRead == 0 ) + break; // this shouldn't happen but don't overwrite the buffer if it does wxCHECK_MSG( bufPos + nRead <= bufSize, false, @@ -136,7 +126,7 @@ bool wxTextFile::OnRead(wxMBConv& conv) bufPos += nRead; } - const wxString str(buf, conv); + const wxString str(buf, conv, bufPos); // this doesn't risk to happen in ANSI build #if wxUSE_UNICODE @@ -211,7 +201,7 @@ bool wxTextFile::OnRead(wxMBConv& conv) } -bool wxTextFile::OnWrite(wxTextFileType typeNew, wxMBConv& conv) +bool wxTextFile::OnWrite(wxTextFileType typeNew, const wxMBConv& conv) { wxFileName fn = m_strBufferName; diff --git a/src/common/txtstrm.cpp b/src/common/txtstrm.cpp index 61260824e2..884c4b84c0 100644 --- a/src/common/txtstrm.cpp +++ b/src/common/txtstrm.cpp @@ -35,7 +35,9 @@ // ---------------------------------------------------------------------------- #if wxUSE_UNICODE -wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, wxMBConv& conv) +wxTextInputStream::wxTextInputStream(wxInputStream &s, + const wxString &sep, + const wxMBConv& conv) : m_input(s), m_separators(sep), m_conv(conv) { memset((void*)m_lastBytes, 0, 10); @@ -298,7 +300,9 @@ wxTextInputStream& wxTextInputStream::operator>>(float& f) #if wxUSE_UNICODE -wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode, wxMBConv& conv) +wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, + wxEOL mode, + const wxMBConv& conv) : m_output(s), m_conv(conv) #else wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode)