1. changed all "wxMBConv& conv" parameters to "const wxMBConv&"

2. this allows to use wxConvAuto() instead of wxConvUTF8 as default value for this parameter in the classes which read text from the file: wxConvAuto automatically recognizes the BOM at the start of file and uses the correct conversion 3. don't use Windows for UTF-7 conversions as there is no way to make it fail on invalid UTF-7 strings; use our own wxMBConvUtf7 instead git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38570 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2006-04-05 14:37:47 +00:00 · 2006-04-05 14:37:47 +00:00 · 830f8f11bc
commit 830f8f11bc
parent cc845a6142
21 changed files with 413 additions and 103 deletions
--- a/build/bakefiles/files.bkl
+++ b/build/bakefiles/files.bkl
@ -290,6 +290,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file!
    src/common/clntdata.cpp
    src/common/cmdline.cpp
    src/common/config.cpp
+    src/common/convauto.cpp
    src/common/datetime.cpp
    src/common/datstrm.cpp
    src/common/dircmn.cpp
@ -362,6 +363,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file!
    wx/cmdline.h
    wx/confbase.h
    wx/config.h
+    wx/convauto.h
    wx/containr.h
    wx/datetime.h
    wx/datstrm.h
--- a/docs/changes.txt
+++ b/docs/changes.txt
@ -27,6 +27,7 @@ Deprecated methods since 2.6.x and their replacements

 All:

+- wx(F)File, wxTextFile and wxInputStreams recognize Unicode BOM now
 - wxLaunchDefaultBrowser() now supports wxBROWSER_NEW_WINDOW flag.
 - Added wxStandardPaths::GetResourcesDir() and GetLocalizedResourcesDir()
 - Added wxStringTokenizer::GetLastDelimiter(); improved documentation.
--- a/include/wx/convauto.h
+++ b/include/wx/convauto.h
@ -0,0 +1,99 @@
+///////////////////////////////////////////////////////////////////////////////
+// Name:        wx/convauto.h
+// Purpose:     wxConvAuto class declaration
+// Author:      Vadim Zeitlin
+// Created:     2006-04-03
+// RCS-ID:      $Id$
+// Copyright:   (c) 2006 Vadim Zeitlin
+// Licence:     wxWindows licence
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _WX_CONVAUTO_H_
+#define _WX_CONVAUTO_H_
+
+#include "wx/strconv.h"
+
+#if wxUSE_WCHAR_T
+
+// ----------------------------------------------------------------------------
+// wxConvAuto: uses BOM to automatically detect input encoding
+// ----------------------------------------------------------------------------
+
+class WXDLLIMPEXP_BASE wxConvAuto : public wxMBConv
+{
+public:
+    // default ctor, the real conversion will be created on demand
+    wxConvAuto() { m_conv = NULL; /* the rest will be initialized later */ }
+
+    // copy ctor doesn't initialize anything neither as conversion can only be
+    // deduced on first use
+    wxConvAuto(const wxConvAuto& WXUNUSED(other)) { m_conv = NULL; }
+
+    virtual ~wxConvAuto() { if ( m_conv && m_ownsConv ) delete m_conv; }
+
+    // override the base class virtual function(s) to use our m_conv
+    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
+                           const char *src, size_t srcLen = -1) const;
+
+    virtual size_t FromWChar(char *dst, size_t dstLen,
+                             const wchar_t *src, size_t srcLen = -1) const;
+
+    virtual size_t GetMBNulLen() const { return m_conv->GetMBNulLen(); }
+
+private:
+    // all currently recognized BOM values
+    enum BOMType
+    {
+        BOM_None,
+        BOM_UTF32BE,
+        BOM_UTF32LE,
+        BOM_UTF16BE,
+        BOM_UTF16LE,
+        BOM_UTF8
+    };
+
+    // return the BOM type of this buffer
+    static BOMType DetectBOM(const char *src, size_t srcLen);
+
+    // initialize m_conv with the conversion to use by default (UTF-8)
+    void InitWithDefault()
+    {
+        m_conv = &wxConvUTF8;
+        m_ownsConv = false;
+    }
+
+    // create the correct conversion object for the given BOM type
+    void InitFromBOM(BOMType bomType);
+
+    // create the correct conversion object for the BOM present in the
+    // beginning of the buffer; adjust the buffer to skip the BOM if found
+    void InitFromInput(const char **src, size_t *len);
+
+    // adjust src and len to skip over the BOM (identified by m_bomType) at the
+    // start of the buffer
+    void SkipBOM(const char **src, size_t *len) const;
+
+
+    // conversion object which we really use, NULL until the first call to
+    // either ToWChar() or FromWChar()
+    wxMBConv *m_conv;
+
+    // our BOM type
+    BOMType m_bomType;
+
+    // true if we allocated m_conv ourselves, false if we just use an existing
+    // global conversion
+    bool m_ownsConv;
+
+    // true if we already skipped BOM when converting (and not just calculating
+    // the size)
+    bool m_consumedBOM;
+
+
+    DECLARE_NO_ASSIGN_CLASS(wxConvAuto);
+};
+
+#endif // wxUSE_WCHAR_T
+
+#endif // _WX_CONVAUTO_H_
+
--- a/include/wx/datstrm.h
+++ b/include/wx/datstrm.h
@ -14,7 +14,7 @@

 #include "wx/stream.h"
 #include "wx/longlong.h"
-#include "wx/strconv.h"
+#include "wx/convauto.h"

 #if wxUSE_STREAMS

@ -22,7 +22,7 @@ class WXDLLIMPEXP_BASE wxDataInputStream
 {
 public:
 #if wxUSE_UNICODE
-    wxDataInputStream(wxInputStream& s, wxMBConv& conv = wxConvUTF8);
+    wxDataInputStream(wxInputStream& s, const wxMBConv& conv = wxConvAuto());
 #else
    wxDataInputStream(wxInputStream& s);
 #endif
@ -83,7 +83,7 @@ protected:
    wxInputStream *m_input;
    bool m_be_order;
 #if wxUSE_UNICODE
-    wxMBConv& m_conv;
+    wxMBConv m_conv;
 #endif

    DECLARE_NO_COPY_CLASS(wxDataInputStream)
@ -93,7 +93,7 @@ class WXDLLIMPEXP_BASE wxDataOutputStream
 {
 public:
 #if wxUSE_UNICODE
-    wxDataOutputStream(wxOutputStream& s, wxMBConv& conv = wxConvUTF8);
+    wxDataOutputStream(wxOutputStream& s, const wxMBConv& conv = wxConvAuto());
 #else
    wxDataOutputStream(wxOutputStream& s);
 #endif
@ -157,7 +157,7 @@ protected:
    wxOutputStream *m_output;
    bool m_be_order;
 #if wxUSE_UNICODE
-    wxMBConv& m_conv;
+    wxMBConv m_conv;
 #endif

    DECLARE_NO_COPY_CLASS(wxDataOutputStream)
--- a/include/wx/ffile.h
+++ b/include/wx/ffile.h
@ -18,6 +18,7 @@

 #include  "wx/string.h"
 #include  "wx/filefn.h"
+#include  "wx/convauto.h"

 #include <stdio.h>

@ -54,14 +55,14 @@ public:

  // read/write (unbuffered)
    // read all data from the file into a string (useful for text files)
-  bool ReadAll(wxString *str, wxMBConv& conv = wxConvUTF8);
+  bool ReadAll(wxString *str, const wxMBConv& conv = wxConvAuto());
    // returns number of bytes read - use Eof() and Error() to see if an error
    // occurred or not
  size_t Read(void *pBuf, size_t nCount);
    // returns the number of bytes written
  size_t Write(const void *pBuf, size_t nCount);
    // returns true on success
-  bool Write(const wxString& s, wxMBConv& conv = wxConvUTF8)
+  bool Write(const wxString& s, const wxMBConv& conv = wxConvAuto())
  {
      const wxWX2MBbuf buf = s.mb_str(conv);
      size_t size = strlen(buf);
--- a/include/wx/file.h
+++ b/include/wx/file.h
@ -97,7 +97,7 @@ public:
    // returns the number of bytes written
  size_t Write(const void *pBuf, size_t nCount);
    // returns true on success
-  bool Write(const wxString& s, wxMBConv& conv = wxConvUTF8)
+  bool Write(const wxString& s, const wxMBConv& conv = wxConvUTF8)
  {
      const wxWX2MBbuf buf = s.mb_str(conv);
      size_t size = strlen(buf);
@ -172,7 +172,7 @@ public:

  // I/O (both functions return true on success, false on failure)
  bool Write(const void *p, size_t n) { return m_file.Write(p, n) == n; }
-  bool Write(const wxString& str, wxMBConv& conv = wxConvUTF8)
+  bool Write(const wxString& str, const wxMBConv& conv = wxConvUTF8)
    { return m_file.Write(str, conv); }

  // different ways to close the file
--- a/include/wx/fileconf.h
+++ b/include/wx/fileconf.h
@ -122,11 +122,11 @@ public:
               const wxString& localFilename = wxEmptyString,
               const wxString& globalFilename = wxEmptyString,
               long style = wxCONFIG_USE_LOCAL_FILE | wxCONFIG_USE_GLOBAL_FILE,
-               wxMBConv& conv = wxConvUTF8);
+               const wxMBConv& conv = wxConvAuto());

 #if wxUSE_STREAMS
    // ctor that takes an input stream.
-  wxFileConfig(wxInputStream &inStream, wxMBConv& conv = wxConvUTF8);
+  wxFileConfig(wxInputStream &inStream, const wxMBConv& conv = wxConvAuto());
 #endif // wxUSE_STREAMS

    // dtor will save unsaved data
@ -169,7 +169,7 @@ public:
  // save the entire config file text to the given stream, note that the text
  // won't be saved again in dtor when Flush() is called if you use this method
  // as it won't be "changed" any more
-  virtual bool Save(wxOutputStream& os, wxMBConv& conv = wxConvUTF8);
+  virtual bool Save(wxOutputStream& os, const wxMBConv& conv = wxConvAuto());
 #endif // wxUSE_STREAMS

 public:
@ -227,7 +227,7 @@ private:
  wxFileConfigGroup *m_pRootGroup,      // the top (unnamed) group
                    *m_pCurrentGroup;   // the current group

-  wxMBConv   &m_conv;
+  wxMBConv    m_conv;

 #ifdef __UNIX__
  int m_umask;                          // the umask to use for file creation
--- a/include/wx/memtext.h
+++ b/include/wx/memtext.h
@ -37,11 +37,11 @@ protected:
    virtual bool OnClose()
        { return true; }

-    virtual bool OnRead(wxMBConv& WXUNUSED(conv))
+    virtual bool OnRead(const wxMBConv& WXUNUSED(conv))
        { return true; }

    virtual bool OnWrite(wxTextFileType WXUNUSED(typeNew),
-                         wxMBConv& WXUNUSED(conv) = wxConvUTF8)
+                         const wxMBConv& WXUNUSED(conv) = wxConvUTF8)
        { return true; }

 private:
--- a/include/wx/string.h
+++ b/include/wx/string.h
@ -325,7 +325,9 @@ public:
      { InitWith(psz, 0, npos); }
  wxStringBase(const wxChar *psz, size_t nLength)
      { InitWith(psz, 0, nLength); }
-  wxStringBase(const wxChar *psz, wxMBConv& WXUNUSED(conv), size_t nLength = npos)
+  wxStringBase(const wxChar *psz,
+               const wxMBConv& WXUNUSED(conv),
+               size_t nLength = npos)
      { InitWith(psz, 0, nLength); }
    // take nLen chars starting at nPos
  wxStringBase(const wxStringBase& str, size_t nPos, size_t nLen)
@ -650,7 +652,9 @@ public:
      : wxStringBase(psz ? psz : wxT("")) { }
  wxString(const wxChar *psz, size_t nLength)
      : wxStringBase(psz, nLength) { }
-  wxString(const wxChar *psz, wxMBConv& WXUNUSED(conv), size_t nLength = npos)
+  wxString(const wxChar *psz,
+           const wxMBConv& WXUNUSED(conv),
+           size_t nLength = npos)
      : wxStringBase(psz, nLength == npos ? wxStrlen(psz) : nLength) { }

  // even if we're not built with wxUSE_STL == 1 it is very convenient to allow
@ -666,7 +670,7 @@ public:

 #if wxUSE_UNICODE
    // from multibyte string
-  wxString(const char *psz, wxMBConv& conv, size_t nLength = npos);
+  wxString(const char *psz, const wxMBConv& conv, size_t nLength = npos);
    // from wxWCharBuffer (i.e. return from wxGetString)
  wxString(const wxWCharBuffer& psz) : wxStringBase(psz.data()) { }
 #else // ANSI
@ -679,7 +683,9 @@ public:

 #if wxUSE_WCHAR_T
    // from wide (Unicode) string
-  wxString(const wchar_t *pwz, wxMBConv& conv = wxConvLibc, size_t nLength = npos);
+  wxString(const wchar_t *pwz,
+           const wxMBConv& conv = wxConvLibc,
+           size_t nLength = npos);
 #endif // !wxUSE_WCHAR_T

    // from wxCharBuffer
@ -809,14 +815,14 @@ public:
    // type differs because a function may either return pointer to the buffer
    // directly or have to use intermediate buffer for translation.
 #if wxUSE_UNICODE
-    const wxCharBuffer mb_str(wxMBConv& conv = wxConvLibc) const;
+    const wxCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const;

    const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); }

    const wxChar* wc_str() const { return c_str(); }

    // for compatibility with !wxUSE_UNICODE version
-    const wxChar* wc_str(wxMBConv& WXUNUSED(conv)) const { return c_str(); }
+    const wxChar* wc_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); }

 #if wxMBFILES
    const wxCharBuffer fn_str() const { return mb_str(wxConvFile); }
@ -827,12 +833,12 @@ public:
    const wxChar* mb_str() const { return c_str(); }

    // for compatibility with wxUSE_UNICODE version
-    const wxChar* mb_str(wxMBConv& WXUNUSED(conv)) const { return c_str(); }
+    const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); }

    const wxWX2MBbuf mbc_str() const { return mb_str(); }

 #if wxUSE_WCHAR_T
-    const wxWCharBuffer wc_str(wxMBConv& conv) const;
+    const wxWCharBuffer wc_str(const wxMBConv& conv) const;
 #endif // wxUSE_WCHAR_T
 #ifdef __WXOSX__
    const wxCharBuffer fn_str() const { return wxConvFile.cWC2WX( wc_str( wxConvLocal ) ); }
--- a/include/wx/textbuf.h
+++ b/include/wx/textbuf.h
@ -14,6 +14,7 @@

 #include "wx/defs.h"
 #include "wx/arrstr.h"
+#include "wx/convauto.h"

 // ----------------------------------------------------------------------------
 // constants
@ -80,10 +81,10 @@ public:
    bool Create(const wxString& strBufferName);

    // Open() also loads buffer in memory on success
-    bool Open(wxMBConv& conv = wxConvUTF8);
+    bool Open(const wxMBConv& conv = wxConvAuto());

    // same as Open() but with (another) buffer name
-    bool Open(const wxString& strBufferName, wxMBConv& conv = wxConvUTF8);
+    bool Open(const wxString& strBufferName, const wxMBConv& conv = wxConvAuto());

    // closes the buffer and frees memory, losing all changes
    bool Close();
@ -161,7 +162,7 @@ public:
    // change the buffer (default argument means "don't change type")
    // possibly in another format
    bool Write(wxTextFileType typeNew = wxTextFileType_None,
-               wxMBConv& conv = wxConvUTF8);
+               const wxMBConv& conv = wxConvAuto());

    // dtor
    virtual ~wxTextBuffer();
@ -183,8 +184,8 @@ protected:
    virtual bool OnOpen(const wxString &strBufferName,
                        wxTextBufferOpenMode openmode) = 0;
    virtual bool OnClose() = 0;
-    virtual bool OnRead(wxMBConv& conv) = 0;
-    virtual bool OnWrite(wxTextFileType typeNew, wxMBConv& conv) = 0;
+    virtual bool OnRead(const wxMBConv& conv) = 0;
+    virtual bool OnWrite(wxTextFileType typeNew, const wxMBConv& conv) = 0;

    static wxString ms_eof;     // dummy string returned at EOF
    wxString m_strBufferName;   // name of the buffer
--- a/include/wx/textfile.h
+++ b/include/wx/textfile.h
@ -39,8 +39,8 @@ protected:
    virtual bool OnOpen(const wxString &strBufferName,
                        wxTextBufferOpenMode OpenMode);
    virtual bool OnClose();
-    virtual bool OnRead(wxMBConv& conv);
-    virtual bool OnWrite(wxTextFileType typeNew, wxMBConv& conv);
+    virtual bool OnRead(const wxMBConv& conv);
+    virtual bool OnWrite(wxTextFileType typeNew, const wxMBConv& conv);

 private:

--- a/include/wx/txtstrm.h
+++ b/include/wx/txtstrm.h
@ -13,6 +13,7 @@
 #define _WX_TXTSTREAM_H_

 #include "wx/stream.h"
+#include "wx/convauto.h"

 #if wxUSE_STREAMS

@ -36,9 +37,11 @@ class WXDLLIMPEXP_BASE wxTextInputStream
 {
 public:
 #if wxUSE_UNICODE
-    wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t"), wxMBConv& conv = wxConvUTF8 );
+    wxTextInputStream(wxInputStream& s,
+                      const wxString &sep=wxT(" \t"),
+                      const wxMBConv& conv = wxConvAuto());
 #else
-    wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t") );
+    wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t"));
 #endif
    ~wxTextInputStream();

@ -78,7 +81,7 @@ protected:
    char m_lastBytes[10]; // stores the bytes that were read for the last character

 #if wxUSE_UNICODE
-    wxMBConv &m_conv;
+    wxMBConv m_conv;
 #endif

    bool   EatEOL(const wxChar &c);
@ -102,9 +105,11 @@ class WXDLLIMPEXP_BASE wxTextOutputStream
 {
 public:
 #if wxUSE_UNICODE
-    wxTextOutputStream( wxOutputStream& s, wxEOL mode = wxEOL_NATIVE, wxMBConv& conv = wxConvUTF8  );
+    wxTextOutputStream(wxOutputStream& s,
+                       wxEOL mode = wxEOL_NATIVE,
+                       const wxMBConv& conv = wxConvAuto());
 #else
-    wxTextOutputStream( wxOutputStream& s, wxEOL mode = wxEOL_NATIVE );
+    wxTextOutputStream(wxOutputStream& s, wxEOL mode = wxEOL_NATIVE);
 #endif
    virtual ~wxTextOutputStream();

@ -139,7 +144,7 @@ protected:
    wxEOL           m_mode;

 #if wxUSE_UNICODE
-    wxMBConv &m_conv;
+    wxMBConv m_conv;
 #endif

    DECLARE_NO_COPY_CLASS(wxTextOutputStream)
--- a/src/common/convauto.cpp
+++ b/src/common/convauto.cpp
@ -0,0 +1,214 @@
+///////////////////////////////////////////////////////////////////////////////
+// Name:        src/common/convauto.cpp
+// Purpose:     implementation of wxConvAuto
+// Author:      Vadim Zeitlin
+// Created:     2006-04-04
+// RCS-ID:      $Id$
+// Copyright:   (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
+// Licence:     wxWindows licence
+///////////////////////////////////////////////////////////////////////////////
+
+// ============================================================================
+// declarations
+// ============================================================================
+
+// ----------------------------------------------------------------------------
+// headers
+// ----------------------------------------------------------------------------
+
+// for compilers that support precompilation, includes "wx.h".
+#include "wx/wxprec.h"
+
+#ifdef __BORLANDC__
+    #pragma hdrstop
+#endif
+
+#if wxUSE_WCHAR_T
+
+#ifndef WX_PRECOMP
+#endif //WX_PRECOMP
+
+#include "wx/convauto.h"
+
+// ============================================================================
+// implementation
+// ============================================================================
+
+/* static */
+wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen)
+{
+    if ( srcLen < 2 )
+    {
+        // minimal BOM is 2 bytes so bail out immediately and simplify the code
+        // below which wouldn't need to check for length for UTF-16 cases
+        return BOM_None;
+    }
+
+    // examine the buffer for BOM presence
+    //
+    // see http://www.unicode.org/faq/utf_bom.html#BOM
+    switch ( *src++ )
+    {
+        case '\0':
+            // could only be big endian UTF-32 (00 00 FE FF)
+            if ( srcLen >= 4 &&
+                    src[0] == '\0' &&
+                        src[1] == '\xfe' &&
+                            src[2] == '\xff' )
+            {
+                return BOM_UTF32BE;
+            }
+            break;
+
+        case '\xfe':
+            // could only be big endian UTF-16 (FE FF)
+            if ( *src++ == '\xff' )
+            {
+                return BOM_UTF16BE;
+            }
+            break;
+
+        case '\xff':
+            // could be either little endian UTF-16 or UTF-32, both start
+            // with FF FE
+            if ( *src++ == '\xfe' )
+            {
+                return srcLen >= 4 && src[0] == '\0' && src[1] == '\0'
+                            ? BOM_UTF32LE
+                            : BOM_UTF16LE;
+            }
+            break;
+
+        case '\xef':
+            // is this UTF-8 BOM (EF BB BF)?
+            if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' )
+            {
+                return BOM_UTF8;
+            }
+            break;
+    }
+
+    return BOM_None;
+}
+
+void wxConvAuto::InitFromBOM(BOMType bomType)
+{
+    m_consumedBOM = false;
+
+    switch ( bomType )
+    {
+        case BOM_UTF32BE:
+            m_conv = new wxMBConvUTF32BE;
+            m_ownsConv = true;
+            break;
+
+        case BOM_UTF32LE:
+            m_conv = new wxMBConvUTF32LE;
+            m_ownsConv = true;
+            break;
+
+        case BOM_UTF16BE:
+            m_conv = new wxMBConvUTF16BE;
+            m_ownsConv = true;
+            break;
+
+        case BOM_UTF16LE:
+            m_conv = new wxMBConvUTF16LE;
+            m_ownsConv = true;
+            break;
+
+        case BOM_UTF8:
+            m_conv = &wxConvUTF8;
+            m_ownsConv = false;
+            break;
+
+        default:
+            wxFAIL_MSG( _T("unexpected BOM type") );
+            // fall through: still need to create something
+
+        case BOM_None:
+            InitWithDefault();
+            m_consumedBOM = true; // as there is nothing to consume
+    }
+}
+
+void wxConvAuto::SkipBOM(const char **src, size_t *len) const
+{
+    int ofs;
+    switch ( m_bomType )
+    {
+        case BOM_UTF32BE:
+        case BOM_UTF32LE:
+            ofs = 4;
+            break;
+
+        case BOM_UTF16BE:
+        case BOM_UTF16LE:
+            ofs = 2;
+            break;
+
+        case BOM_UTF8:
+            ofs = 3;
+            break;
+
+        default:
+            wxFAIL_MSG( _T("unexpected BOM type") );
+            // fall through: still need to create something
+
+        case BOM_None:
+            ofs = 0;
+    }
+
+    *src += ofs;
+    if ( *len != (size_t)-1 )
+        *len -= ofs;
+}
+
+void wxConvAuto::InitFromInput(const char **src, size_t *len)
+{
+    m_bomType = DetectBOM(*src, *len);
+    InitFromBOM(m_bomType);
+    SkipBOM(src, len);
+}
+
+size_t
+wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
+                    const char *src, size_t srcLen) const
+{
+    // we check BOM and create the appropriate conversion the first time we're
+    // called but we also need to ensure that the BOM is skipped not only
+    // during this initial call but also during the first call with non-NULL
+    // dst as typically we're first called with NULL dst to calculate the
+    // needed buffer size
+    wxConvAuto *self = wx_const_cast(wxConvAuto *, this);
+    if ( !m_conv )
+    {
+        self->InitFromInput(&src, &srcLen);
+        if ( dst )
+            self->m_consumedBOM = true;
+    }
+
+    if ( !m_consumedBOM && dst )
+    {
+        self->m_consumedBOM = true;
+        SkipBOM(&src, &srcLen);
+    }
+
+    return m_conv->ToWChar(dst, dstLen, src, srcLen);
+}
+
+size_t
+wxConvAuto::FromWChar(char *dst, size_t dstLen,
+                      const wchar_t *src, size_t srcLen) const
+{
+    if ( !m_conv )
+    {
+        // default to UTF-8 for the multibyte output
+        wx_const_cast(wxConvAuto *, this)->InitWithDefault();
+    }
+
+    return m_conv->FromWChar(dst, dstLen, src, srcLen);
+}
+
+#endif // wxUSE_WCHAR_T
+
--- a/src/common/datstrm.cpp
+++ b/src/common/datstrm.cpp
@ -26,7 +26,7 @@
 // ---------------------------------------------------------------------------

 #if wxUSE_UNICODE
-wxDataInputStream::wxDataInputStream(wxInputStream& s, wxMBConv& conv)
+wxDataInputStream::wxDataInputStream(wxInputStream& s, const wxMBConv& conv)
  : m_input(&s), m_be_order(false), m_conv(conv)
 #else
 wxDataInputStream::wxDataInputStream(wxInputStream& s)
@ -445,7 +445,7 @@ wxDataInputStream& wxDataInputStream::operator>>(float& f)
 // ---------------------------------------------------------------------------

 #if wxUSE_UNICODE
-wxDataOutputStream::wxDataOutputStream(wxOutputStream& s, wxMBConv& conv)
+wxDataOutputStream::wxDataOutputStream(wxOutputStream& s, const wxMBConv& conv)
  : m_output(&s), m_be_order(false), m_conv(conv)
 #else
 wxDataOutputStream::wxDataOutputStream(wxOutputStream& s)
--- a/src/common/ffile.cpp
+++ b/src/common/ffile.cpp
@ -103,7 +103,7 @@ bool wxFFile::Close()
 // read/write
 // ----------------------------------------------------------------------------

-bool wxFFile::ReadAll(wxString *str, wxMBConv& conv)
+bool wxFFile::ReadAll(wxString *str, const wxMBConv& conv)
 {
    wxCHECK_MSG( str, false, wxT("invalid parameter") );
    wxCHECK_MSG( IsOpened(), false, wxT("can't read from closed file") );
--- a/src/common/fileconf.cpp
+++ b/src/common/fileconf.cpp
@ -426,7 +426,8 @@ void wxFileConfig::Init()
 // constructor supports creation of wxFileConfig objects of any type
 wxFileConfig::wxFileConfig(const wxString& appName, const wxString& vendorName,
                           const wxString& strLocal, const wxString& strGlobal,
-                           long style, wxMBConv& conv)
+                           long style,
+                           const wxMBConv& conv)
            : wxConfigBase(::GetAppName(appName), vendorName,
                           strLocal, strGlobal,
                           style),
@ -474,7 +475,7 @@ wxFileConfig::wxFileConfig(const wxString& appName, const wxString& vendorName,

 #if wxUSE_STREAMS

-wxFileConfig::wxFileConfig(wxInputStream &inStream, wxMBConv& conv)
+wxFileConfig::wxFileConfig(wxInputStream &inStream, const wxMBConv& conv)
            : m_conv(conv)
 {
    // always local_file when this constructor is called (?)
@ -1036,7 +1037,7 @@ bool wxFileConfig::Flush(bool /* bCurrentOnly */)

 #if wxUSE_STREAMS

-bool wxFileConfig::Save(wxOutputStream& os, wxMBConv& conv)
+bool wxFileConfig::Save(wxOutputStream& os, const wxMBConv& conv)
 {
    // save unconditionally, even if not dirty
    for ( wxFileConfigLineList *p = m_linesHead; p != NULL; p = p->Next() )
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@ -203,21 +203,16 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
        size_t lenChunk = MB2WC(NULL, src, 0);
        if ( lenChunk == 0 )
        {
-            // nothing left in the input string, conversion succeeded
+            // nothing left in the input string, conversion succeeded; but
+            // still account for the trailing NULL
+            dstWritten++;
            break;
        }

        if ( lenChunk == wxCONV_FAILED )
            return wxCONV_FAILED;

-        // if we already have a previous chunk, leave the NUL separating it
-        // from this one
-        if ( dstWritten )
-        {
-            dstWritten++;
-            if ( dst )
-                dst++;
-        }
+        lenChunk++; // for trailing NUL

        dstWritten += lenChunk;

@ -226,8 +221,7 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
            if ( dstWritten > dstLen )
                return wxCONV_FAILED;

-            lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
-            if ( lenChunk == wxCONV_FAILED )
+            if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
                return wxCONV_FAILED;

            dst += lenChunk;
@ -390,11 +384,11 @@ wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
    const size_t dstLen = ToWChar(NULL, 0, in, inLen);
    if ( dstLen != wxCONV_FAILED )
    {
-        wxWCharBuffer wbuf(dstLen);
+        wxWCharBuffer wbuf(dstLen - 1);
        if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
        {
            if ( outLen )
-                *outLen = dstLen;
+                *outLen = dstLen - 1;
            return wbuf;
        }
    }
@ -411,11 +405,11 @@ wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
    const size_t dstLen = FromWChar(NULL, 0, in, inLen);
    if ( dstLen != wxCONV_FAILED )
    {
-        wxCharBuffer buf(dstLen);
+        wxCharBuffer buf(dstLen - 1);
        if ( FromWChar(buf.data(), dstLen, in, inLen) )
        {
            if ( outLen )
-                *outLen = dstLen;
+                *outLen = dstLen - 1;
            return buf;
        }
    }
@ -1825,35 +1819,27 @@ public:
        // wouldn't work if reading an incomplete MB char didn't result in an
        // error
        //
-        // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
-        // an error (tested under Windows Server 2003) and apparently it is
-        // done on purpose, i.e. the function accepts any input in this case
-        // and although I'd prefer to return error on ill-formed output, our
-        // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
-        // explicitly ill-formed according to RFC 2152) neither so we don't
-        // even have any fallback here...
-        //
        // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
-        // Win XP or newer and if it is specified on older versions, conversion
-        // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
-        // fails. So we can only use the flag on newer Windows versions.
-        // Additionally, the flag is not supported by UTF7, symbol and CJK
-        // encodings. See here:
+        // Win XP or newer and it is not supported for UTF-[78] so we always
+        // use our own conversions in this case. See
        //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
        //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
+        if ( m_CodePage == CP_UTF8 )
+        {
+            return wxConvUTF8.MB2WC(buf, psz, n);
+        }
+
+        if ( m_CodePage == CP_UTF7 )
+        {
+            return wxConvUTF7.MB2WC(buf, psz, n);
+        }
+
        int flags = 0;
-        if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
-             m_CodePage < 50000 &&
-             IsAtLeastWin2kSP4() )
+        if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
+                IsAtLeastWin2kSP4() )
        {
            flags = MB_ERR_INVALID_CHARS;
        }
-        else if ( m_CodePage == CP_UTF8 )
-        {
-            // Avoid round-trip in the special case of UTF-8 by using our
-            // own UTF-8 conversion code:
-            return wxMBConvUTF8().MB2WC(buf, psz, n);
-        }

        const size_t len = ::MultiByteToWideChar
                             (
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@ -1006,7 +1006,7 @@ int STRINGCLASS::compare(size_t nStart, size_t nLen,
 #if wxUSE_UNICODE

 // from multibyte string
-wxString::wxString(const char *psz, wxMBConv& conv, size_t nLength)
+wxString::wxString(const char *psz, const wxMBConv& conv, size_t nLength)
 {
    // anything to do?
    if ( psz && nLength != 0 )
@ -1031,7 +1031,7 @@ wxString::wxString(const char *psz, wxMBConv& conv, size_t nLength)
 }

 //Convert wxString in Unicode mode to a multi-byte string
-const wxCharBuffer wxString::mb_str(wxMBConv& conv) const
+const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
 {
    return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL);
 }
@ -1041,7 +1041,7 @@ const wxCharBuffer wxString::mb_str(wxMBConv& conv) const
 #if wxUSE_WCHAR_T

 // from wide string
-wxString::wxString(const wchar_t *pwz, wxMBConv& conv, size_t nLength)
+wxString::wxString(const wchar_t *pwz, const wxMBConv& conv, size_t nLength)
 {
    // anything to do?
    if ( pwz && nLength != 0 )
@ -1067,7 +1067,7 @@ wxString::wxString(const wchar_t *pwz, wxMBConv& conv, size_t nLength)

 //Converts this string to a wide character string if unicode
 //mode is not enabled and wxUSE_WCHAR_T is enabled
-const wxWCharBuffer wxString::wc_str(wxMBConv& conv) const
+const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
 {
    return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL);
 }
--- a/src/common/textbuf.cpp
+++ b/src/common/textbuf.cpp
@ -181,14 +181,14 @@ bool wxTextBuffer::Create()
    return true;
 }

-bool wxTextBuffer::Open(const wxString& strBufferName, wxMBConv& conv)
+bool wxTextBuffer::Open(const wxString& strBufferName, const wxMBConv& conv)
 {
    m_strBufferName = strBufferName;

    return Open(conv);
 }

-bool wxTextBuffer::Open(wxMBConv& conv)
+bool wxTextBuffer::Open(const wxMBConv& conv)
 {
    // buffer name must be either given in ctor or in Open(const wxString&)
    wxASSERT( !m_strBufferName.empty() );
@ -276,7 +276,7 @@ bool wxTextBuffer::Close()
    return true;
 }

-bool wxTextBuffer::Write(wxTextFileType typeNew, wxMBConv& conv)
+bool wxTextBuffer::Write(wxTextFileType typeNew, const wxMBConv& conv)
 {
    return OnWrite(typeNew, conv);
 }
--- a/src/common/textfile.cpp
+++ b/src/common/textfile.cpp
@ -86,7 +86,7 @@ bool wxTextFile::OnClose()
 }


-bool wxTextFile::OnRead(wxMBConv& conv)
+bool wxTextFile::OnRead(const wxMBConv& conv)
 {
    // file should be opened and we must be in it's beginning
    wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 );
@ -114,18 +114,8 @@ bool wxTextFile::OnRead(wxMBConv& conv)
            return false;
        }

-        eof = nRead == 0;
-        if ( eof )
-        {
-            // append 4 trailing NUL bytes: this is needed to ensure that the
-            // string is going to be NUL-terminated, whatever is the encoding
-            // used (even UTF-32)
-            block[0] =
-            block[1] =
-            block[2] =
-            block[3] = '\0';
-            nRead = 4;
-        }
+        if ( nRead == 0 )
+            break;

        // this shouldn't happen but don't overwrite the buffer if it does
        wxCHECK_MSG( bufPos + nRead <= bufSize, false,
@ -136,7 +126,7 @@ bool wxTextFile::OnRead(wxMBConv& conv)
        bufPos += nRead;
    }

-    const wxString str(buf, conv);
+    const wxString str(buf, conv, bufPos);

    // this doesn't risk to happen in ANSI build
 #if wxUSE_UNICODE
@ -211,7 +201,7 @@ bool wxTextFile::OnRead(wxMBConv& conv)
 }


-bool wxTextFile::OnWrite(wxTextFileType typeNew, wxMBConv& conv)
+bool wxTextFile::OnWrite(wxTextFileType typeNew, const wxMBConv& conv)
 {
    wxFileName fn = m_strBufferName;

--- a/src/common/txtstrm.cpp
+++ b/src/common/txtstrm.cpp
@ -35,7 +35,9 @@
 // ----------------------------------------------------------------------------

 #if wxUSE_UNICODE
-wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, wxMBConv& conv)
+wxTextInputStream::wxTextInputStream(wxInputStream &s,
+                                     const wxString &sep,
+                                     const wxMBConv& conv)
  : m_input(s), m_separators(sep), m_conv(conv)
 {
    memset((void*)m_lastBytes, 0, 10);
@ -298,7 +300,9 @@ wxTextInputStream& wxTextInputStream::operator>>(float& f)


 #if wxUSE_UNICODE
-wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode, wxMBConv& conv)
+wxTextOutputStream::wxTextOutputStream(wxOutputStream& s,
+                                       wxEOL mode,
+                                       const wxMBConv& conv)
  : m_output(s), m_conv(conv)
 #else
 wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode)