1. changed all "wxMBConv& conv" parameters to "const wxMBConv&"
2. this allows to use wxConvAuto() instead of wxConvUTF8 as default value for this parameter in the classes which read text from the file: wxConvAuto automatically recognizes the BOM at the start of file and uses the correct conversion 3. don't use Windows for UTF-7 conversions as there is no way to make it fail on invalid UTF-7 strings; use our own wxMBConvUtf7 instead git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38570 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
parent
cc845a6142
commit
830f8f11bc
@ -290,6 +290,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file!
|
||||
src/common/clntdata.cpp
|
||||
src/common/cmdline.cpp
|
||||
src/common/config.cpp
|
||||
src/common/convauto.cpp
|
||||
src/common/datetime.cpp
|
||||
src/common/datstrm.cpp
|
||||
src/common/dircmn.cpp
|
||||
@ -362,6 +363,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file!
|
||||
wx/cmdline.h
|
||||
wx/confbase.h
|
||||
wx/config.h
|
||||
wx/convauto.h
|
||||
wx/containr.h
|
||||
wx/datetime.h
|
||||
wx/datstrm.h
|
||||
|
@ -27,6 +27,7 @@ Deprecated methods since 2.6.x and their replacements
|
||||
|
||||
All:
|
||||
|
||||
- wx(F)File, wxTextFile and wxInputStreams recognize Unicode BOM now
|
||||
- wxLaunchDefaultBrowser() now supports wxBROWSER_NEW_WINDOW flag.
|
||||
- Added wxStandardPaths::GetResourcesDir() and GetLocalizedResourcesDir()
|
||||
- Added wxStringTokenizer::GetLastDelimiter(); improved documentation.
|
||||
|
99
include/wx/convauto.h
Normal file
99
include/wx/convauto.h
Normal file
@ -0,0 +1,99 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Name: wx/convauto.h
|
||||
// Purpose: wxConvAuto class declaration
|
||||
// Author: Vadim Zeitlin
|
||||
// Created: 2006-04-03
|
||||
// RCS-ID: $Id$
|
||||
// Copyright: (c) 2006 Vadim Zeitlin
|
||||
// Licence: wxWindows licence
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WX_CONVAUTO_H_
|
||||
#define _WX_CONVAUTO_H_
|
||||
|
||||
#include "wx/strconv.h"
|
||||
|
||||
#if wxUSE_WCHAR_T
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// wxConvAuto: uses BOM to automatically detect input encoding
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
class WXDLLIMPEXP_BASE wxConvAuto : public wxMBConv
|
||||
{
|
||||
public:
|
||||
// default ctor, the real conversion will be created on demand
|
||||
wxConvAuto() { m_conv = NULL; /* the rest will be initialized later */ }
|
||||
|
||||
// copy ctor doesn't initialize anything neither as conversion can only be
|
||||
// deduced on first use
|
||||
wxConvAuto(const wxConvAuto& WXUNUSED(other)) { m_conv = NULL; }
|
||||
|
||||
virtual ~wxConvAuto() { if ( m_conv && m_ownsConv ) delete m_conv; }
|
||||
|
||||
// override the base class virtual function(s) to use our m_conv
|
||||
virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
|
||||
const char *src, size_t srcLen = -1) const;
|
||||
|
||||
virtual size_t FromWChar(char *dst, size_t dstLen,
|
||||
const wchar_t *src, size_t srcLen = -1) const;
|
||||
|
||||
virtual size_t GetMBNulLen() const { return m_conv->GetMBNulLen(); }
|
||||
|
||||
private:
|
||||
// all currently recognized BOM values
|
||||
enum BOMType
|
||||
{
|
||||
BOM_None,
|
||||
BOM_UTF32BE,
|
||||
BOM_UTF32LE,
|
||||
BOM_UTF16BE,
|
||||
BOM_UTF16LE,
|
||||
BOM_UTF8
|
||||
};
|
||||
|
||||
// return the BOM type of this buffer
|
||||
static BOMType DetectBOM(const char *src, size_t srcLen);
|
||||
|
||||
// initialize m_conv with the conversion to use by default (UTF-8)
|
||||
void InitWithDefault()
|
||||
{
|
||||
m_conv = &wxConvUTF8;
|
||||
m_ownsConv = false;
|
||||
}
|
||||
|
||||
// create the correct conversion object for the given BOM type
|
||||
void InitFromBOM(BOMType bomType);
|
||||
|
||||
// create the correct conversion object for the BOM present in the
|
||||
// beginning of the buffer; adjust the buffer to skip the BOM if found
|
||||
void InitFromInput(const char **src, size_t *len);
|
||||
|
||||
// adjust src and len to skip over the BOM (identified by m_bomType) at the
|
||||
// start of the buffer
|
||||
void SkipBOM(const char **src, size_t *len) const;
|
||||
|
||||
|
||||
// conversion object which we really use, NULL until the first call to
|
||||
// either ToWChar() or FromWChar()
|
||||
wxMBConv *m_conv;
|
||||
|
||||
// our BOM type
|
||||
BOMType m_bomType;
|
||||
|
||||
// true if we allocated m_conv ourselves, false if we just use an existing
|
||||
// global conversion
|
||||
bool m_ownsConv;
|
||||
|
||||
// true if we already skipped BOM when converting (and not just calculating
|
||||
// the size)
|
||||
bool m_consumedBOM;
|
||||
|
||||
|
||||
DECLARE_NO_ASSIGN_CLASS(wxConvAuto);
|
||||
};
|
||||
|
||||
#endif // wxUSE_WCHAR_T
|
||||
|
||||
#endif // _WX_CONVAUTO_H_
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#include "wx/stream.h"
|
||||
#include "wx/longlong.h"
|
||||
#include "wx/strconv.h"
|
||||
#include "wx/convauto.h"
|
||||
|
||||
#if wxUSE_STREAMS
|
||||
|
||||
@ -22,7 +22,7 @@ class WXDLLIMPEXP_BASE wxDataInputStream
|
||||
{
|
||||
public:
|
||||
#if wxUSE_UNICODE
|
||||
wxDataInputStream(wxInputStream& s, wxMBConv& conv = wxConvUTF8);
|
||||
wxDataInputStream(wxInputStream& s, const wxMBConv& conv = wxConvAuto());
|
||||
#else
|
||||
wxDataInputStream(wxInputStream& s);
|
||||
#endif
|
||||
@ -83,7 +83,7 @@ protected:
|
||||
wxInputStream *m_input;
|
||||
bool m_be_order;
|
||||
#if wxUSE_UNICODE
|
||||
wxMBConv& m_conv;
|
||||
wxMBConv m_conv;
|
||||
#endif
|
||||
|
||||
DECLARE_NO_COPY_CLASS(wxDataInputStream)
|
||||
@ -93,7 +93,7 @@ class WXDLLIMPEXP_BASE wxDataOutputStream
|
||||
{
|
||||
public:
|
||||
#if wxUSE_UNICODE
|
||||
wxDataOutputStream(wxOutputStream& s, wxMBConv& conv = wxConvUTF8);
|
||||
wxDataOutputStream(wxOutputStream& s, const wxMBConv& conv = wxConvAuto());
|
||||
#else
|
||||
wxDataOutputStream(wxOutputStream& s);
|
||||
#endif
|
||||
@ -157,7 +157,7 @@ protected:
|
||||
wxOutputStream *m_output;
|
||||
bool m_be_order;
|
||||
#if wxUSE_UNICODE
|
||||
wxMBConv& m_conv;
|
||||
wxMBConv m_conv;
|
||||
#endif
|
||||
|
||||
DECLARE_NO_COPY_CLASS(wxDataOutputStream)
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
#include "wx/string.h"
|
||||
#include "wx/filefn.h"
|
||||
#include "wx/convauto.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
@ -54,14 +55,14 @@ public:
|
||||
|
||||
// read/write (unbuffered)
|
||||
// read all data from the file into a string (useful for text files)
|
||||
bool ReadAll(wxString *str, wxMBConv& conv = wxConvUTF8);
|
||||
bool ReadAll(wxString *str, const wxMBConv& conv = wxConvAuto());
|
||||
// returns number of bytes read - use Eof() and Error() to see if an error
|
||||
// occurred or not
|
||||
size_t Read(void *pBuf, size_t nCount);
|
||||
// returns the number of bytes written
|
||||
size_t Write(const void *pBuf, size_t nCount);
|
||||
// returns true on success
|
||||
bool Write(const wxString& s, wxMBConv& conv = wxConvUTF8)
|
||||
bool Write(const wxString& s, const wxMBConv& conv = wxConvAuto())
|
||||
{
|
||||
const wxWX2MBbuf buf = s.mb_str(conv);
|
||||
size_t size = strlen(buf);
|
||||
|
@ -97,7 +97,7 @@ public:
|
||||
// returns the number of bytes written
|
||||
size_t Write(const void *pBuf, size_t nCount);
|
||||
// returns true on success
|
||||
bool Write(const wxString& s, wxMBConv& conv = wxConvUTF8)
|
||||
bool Write(const wxString& s, const wxMBConv& conv = wxConvUTF8)
|
||||
{
|
||||
const wxWX2MBbuf buf = s.mb_str(conv);
|
||||
size_t size = strlen(buf);
|
||||
@ -172,7 +172,7 @@ public:
|
||||
|
||||
// I/O (both functions return true on success, false on failure)
|
||||
bool Write(const void *p, size_t n) { return m_file.Write(p, n) == n; }
|
||||
bool Write(const wxString& str, wxMBConv& conv = wxConvUTF8)
|
||||
bool Write(const wxString& str, const wxMBConv& conv = wxConvUTF8)
|
||||
{ return m_file.Write(str, conv); }
|
||||
|
||||
// different ways to close the file
|
||||
|
@ -122,11 +122,11 @@ public:
|
||||
const wxString& localFilename = wxEmptyString,
|
||||
const wxString& globalFilename = wxEmptyString,
|
||||
long style = wxCONFIG_USE_LOCAL_FILE | wxCONFIG_USE_GLOBAL_FILE,
|
||||
wxMBConv& conv = wxConvUTF8);
|
||||
const wxMBConv& conv = wxConvAuto());
|
||||
|
||||
#if wxUSE_STREAMS
|
||||
// ctor that takes an input stream.
|
||||
wxFileConfig(wxInputStream &inStream, wxMBConv& conv = wxConvUTF8);
|
||||
wxFileConfig(wxInputStream &inStream, const wxMBConv& conv = wxConvAuto());
|
||||
#endif // wxUSE_STREAMS
|
||||
|
||||
// dtor will save unsaved data
|
||||
@ -169,7 +169,7 @@ public:
|
||||
// save the entire config file text to the given stream, note that the text
|
||||
// won't be saved again in dtor when Flush() is called if you use this method
|
||||
// as it won't be "changed" any more
|
||||
virtual bool Save(wxOutputStream& os, wxMBConv& conv = wxConvUTF8);
|
||||
virtual bool Save(wxOutputStream& os, const wxMBConv& conv = wxConvAuto());
|
||||
#endif // wxUSE_STREAMS
|
||||
|
||||
public:
|
||||
@ -227,7 +227,7 @@ private:
|
||||
wxFileConfigGroup *m_pRootGroup, // the top (unnamed) group
|
||||
*m_pCurrentGroup; // the current group
|
||||
|
||||
wxMBConv &m_conv;
|
||||
wxMBConv m_conv;
|
||||
|
||||
#ifdef __UNIX__
|
||||
int m_umask; // the umask to use for file creation
|
||||
|
@ -37,11 +37,11 @@ protected:
|
||||
virtual bool OnClose()
|
||||
{ return true; }
|
||||
|
||||
virtual bool OnRead(wxMBConv& WXUNUSED(conv))
|
||||
virtual bool OnRead(const wxMBConv& WXUNUSED(conv))
|
||||
{ return true; }
|
||||
|
||||
virtual bool OnWrite(wxTextFileType WXUNUSED(typeNew),
|
||||
wxMBConv& WXUNUSED(conv) = wxConvUTF8)
|
||||
const wxMBConv& WXUNUSED(conv) = wxConvUTF8)
|
||||
{ return true; }
|
||||
|
||||
private:
|
||||
|
@ -325,7 +325,9 @@ public:
|
||||
{ InitWith(psz, 0, npos); }
|
||||
wxStringBase(const wxChar *psz, size_t nLength)
|
||||
{ InitWith(psz, 0, nLength); }
|
||||
wxStringBase(const wxChar *psz, wxMBConv& WXUNUSED(conv), size_t nLength = npos)
|
||||
wxStringBase(const wxChar *psz,
|
||||
const wxMBConv& WXUNUSED(conv),
|
||||
size_t nLength = npos)
|
||||
{ InitWith(psz, 0, nLength); }
|
||||
// take nLen chars starting at nPos
|
||||
wxStringBase(const wxStringBase& str, size_t nPos, size_t nLen)
|
||||
@ -650,7 +652,9 @@ public:
|
||||
: wxStringBase(psz ? psz : wxT("")) { }
|
||||
wxString(const wxChar *psz, size_t nLength)
|
||||
: wxStringBase(psz, nLength) { }
|
||||
wxString(const wxChar *psz, wxMBConv& WXUNUSED(conv), size_t nLength = npos)
|
||||
wxString(const wxChar *psz,
|
||||
const wxMBConv& WXUNUSED(conv),
|
||||
size_t nLength = npos)
|
||||
: wxStringBase(psz, nLength == npos ? wxStrlen(psz) : nLength) { }
|
||||
|
||||
// even if we're not built with wxUSE_STL == 1 it is very convenient to allow
|
||||
@ -666,7 +670,7 @@ public:
|
||||
|
||||
#if wxUSE_UNICODE
|
||||
// from multibyte string
|
||||
wxString(const char *psz, wxMBConv& conv, size_t nLength = npos);
|
||||
wxString(const char *psz, const wxMBConv& conv, size_t nLength = npos);
|
||||
// from wxWCharBuffer (i.e. return from wxGetString)
|
||||
wxString(const wxWCharBuffer& psz) : wxStringBase(psz.data()) { }
|
||||
#else // ANSI
|
||||
@ -679,7 +683,9 @@ public:
|
||||
|
||||
#if wxUSE_WCHAR_T
|
||||
// from wide (Unicode) string
|
||||
wxString(const wchar_t *pwz, wxMBConv& conv = wxConvLibc, size_t nLength = npos);
|
||||
wxString(const wchar_t *pwz,
|
||||
const wxMBConv& conv = wxConvLibc,
|
||||
size_t nLength = npos);
|
||||
#endif // !wxUSE_WCHAR_T
|
||||
|
||||
// from wxCharBuffer
|
||||
@ -809,14 +815,14 @@ public:
|
||||
// type differs because a function may either return pointer to the buffer
|
||||
// directly or have to use intermediate buffer for translation.
|
||||
#if wxUSE_UNICODE
|
||||
const wxCharBuffer mb_str(wxMBConv& conv = wxConvLibc) const;
|
||||
const wxCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const;
|
||||
|
||||
const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); }
|
||||
|
||||
const wxChar* wc_str() const { return c_str(); }
|
||||
|
||||
// for compatibility with !wxUSE_UNICODE version
|
||||
const wxChar* wc_str(wxMBConv& WXUNUSED(conv)) const { return c_str(); }
|
||||
const wxChar* wc_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); }
|
||||
|
||||
#if wxMBFILES
|
||||
const wxCharBuffer fn_str() const { return mb_str(wxConvFile); }
|
||||
@ -827,12 +833,12 @@ public:
|
||||
const wxChar* mb_str() const { return c_str(); }
|
||||
|
||||
// for compatibility with wxUSE_UNICODE version
|
||||
const wxChar* mb_str(wxMBConv& WXUNUSED(conv)) const { return c_str(); }
|
||||
const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); }
|
||||
|
||||
const wxWX2MBbuf mbc_str() const { return mb_str(); }
|
||||
|
||||
#if wxUSE_WCHAR_T
|
||||
const wxWCharBuffer wc_str(wxMBConv& conv) const;
|
||||
const wxWCharBuffer wc_str(const wxMBConv& conv) const;
|
||||
#endif // wxUSE_WCHAR_T
|
||||
#ifdef __WXOSX__
|
||||
const wxCharBuffer fn_str() const { return wxConvFile.cWC2WX( wc_str( wxConvLocal ) ); }
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include "wx/defs.h"
|
||||
#include "wx/arrstr.h"
|
||||
#include "wx/convauto.h"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// constants
|
||||
@ -80,10 +81,10 @@ public:
|
||||
bool Create(const wxString& strBufferName);
|
||||
|
||||
// Open() also loads buffer in memory on success
|
||||
bool Open(wxMBConv& conv = wxConvUTF8);
|
||||
bool Open(const wxMBConv& conv = wxConvAuto());
|
||||
|
||||
// same as Open() but with (another) buffer name
|
||||
bool Open(const wxString& strBufferName, wxMBConv& conv = wxConvUTF8);
|
||||
bool Open(const wxString& strBufferName, const wxMBConv& conv = wxConvAuto());
|
||||
|
||||
// closes the buffer and frees memory, losing all changes
|
||||
bool Close();
|
||||
@ -161,7 +162,7 @@ public:
|
||||
// change the buffer (default argument means "don't change type")
|
||||
// possibly in another format
|
||||
bool Write(wxTextFileType typeNew = wxTextFileType_None,
|
||||
wxMBConv& conv = wxConvUTF8);
|
||||
const wxMBConv& conv = wxConvAuto());
|
||||
|
||||
// dtor
|
||||
virtual ~wxTextBuffer();
|
||||
@ -183,8 +184,8 @@ protected:
|
||||
virtual bool OnOpen(const wxString &strBufferName,
|
||||
wxTextBufferOpenMode openmode) = 0;
|
||||
virtual bool OnClose() = 0;
|
||||
virtual bool OnRead(wxMBConv& conv) = 0;
|
||||
virtual bool OnWrite(wxTextFileType typeNew, wxMBConv& conv) = 0;
|
||||
virtual bool OnRead(const wxMBConv& conv) = 0;
|
||||
virtual bool OnWrite(wxTextFileType typeNew, const wxMBConv& conv) = 0;
|
||||
|
||||
static wxString ms_eof; // dummy string returned at EOF
|
||||
wxString m_strBufferName; // name of the buffer
|
||||
|
@ -39,8 +39,8 @@ protected:
|
||||
virtual bool OnOpen(const wxString &strBufferName,
|
||||
wxTextBufferOpenMode OpenMode);
|
||||
virtual bool OnClose();
|
||||
virtual bool OnRead(wxMBConv& conv);
|
||||
virtual bool OnWrite(wxTextFileType typeNew, wxMBConv& conv);
|
||||
virtual bool OnRead(const wxMBConv& conv);
|
||||
virtual bool OnWrite(wxTextFileType typeNew, const wxMBConv& conv);
|
||||
|
||||
private:
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#define _WX_TXTSTREAM_H_
|
||||
|
||||
#include "wx/stream.h"
|
||||
#include "wx/convauto.h"
|
||||
|
||||
#if wxUSE_STREAMS
|
||||
|
||||
@ -36,9 +37,11 @@ class WXDLLIMPEXP_BASE wxTextInputStream
|
||||
{
|
||||
public:
|
||||
#if wxUSE_UNICODE
|
||||
wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t"), wxMBConv& conv = wxConvUTF8 );
|
||||
wxTextInputStream(wxInputStream& s,
|
||||
const wxString &sep=wxT(" \t"),
|
||||
const wxMBConv& conv = wxConvAuto());
|
||||
#else
|
||||
wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t") );
|
||||
wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t"));
|
||||
#endif
|
||||
~wxTextInputStream();
|
||||
|
||||
@ -78,7 +81,7 @@ protected:
|
||||
char m_lastBytes[10]; // stores the bytes that were read for the last character
|
||||
|
||||
#if wxUSE_UNICODE
|
||||
wxMBConv &m_conv;
|
||||
wxMBConv m_conv;
|
||||
#endif
|
||||
|
||||
bool EatEOL(const wxChar &c);
|
||||
@ -102,9 +105,11 @@ class WXDLLIMPEXP_BASE wxTextOutputStream
|
||||
{
|
||||
public:
|
||||
#if wxUSE_UNICODE
|
||||
wxTextOutputStream( wxOutputStream& s, wxEOL mode = wxEOL_NATIVE, wxMBConv& conv = wxConvUTF8 );
|
||||
wxTextOutputStream(wxOutputStream& s,
|
||||
wxEOL mode = wxEOL_NATIVE,
|
||||
const wxMBConv& conv = wxConvAuto());
|
||||
#else
|
||||
wxTextOutputStream( wxOutputStream& s, wxEOL mode = wxEOL_NATIVE );
|
||||
wxTextOutputStream(wxOutputStream& s, wxEOL mode = wxEOL_NATIVE);
|
||||
#endif
|
||||
virtual ~wxTextOutputStream();
|
||||
|
||||
@ -139,7 +144,7 @@ protected:
|
||||
wxEOL m_mode;
|
||||
|
||||
#if wxUSE_UNICODE
|
||||
wxMBConv &m_conv;
|
||||
wxMBConv m_conv;
|
||||
#endif
|
||||
|
||||
DECLARE_NO_COPY_CLASS(wxTextOutputStream)
|
||||
|
214
src/common/convauto.cpp
Normal file
214
src/common/convauto.cpp
Normal file
@ -0,0 +1,214 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Name: src/common/convauto.cpp
|
||||
// Purpose: implementation of wxConvAuto
|
||||
// Author: Vadim Zeitlin
|
||||
// Created: 2006-04-04
|
||||
// RCS-ID: $Id$
|
||||
// Copyright: (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
|
||||
// Licence: wxWindows licence
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// ============================================================================
|
||||
// declarations
|
||||
// ============================================================================
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// headers
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// for compilers that support precompilation, includes "wx.h".
|
||||
#include "wx/wxprec.h"
|
||||
|
||||
#ifdef __BORLANDC__
|
||||
#pragma hdrstop
|
||||
#endif
|
||||
|
||||
#if wxUSE_WCHAR_T
|
||||
|
||||
#ifndef WX_PRECOMP
|
||||
#endif //WX_PRECOMP
|
||||
|
||||
#include "wx/convauto.h"
|
||||
|
||||
// ============================================================================
|
||||
// implementation
|
||||
// ============================================================================
|
||||
|
||||
/* static */
|
||||
wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen)
|
||||
{
|
||||
if ( srcLen < 2 )
|
||||
{
|
||||
// minimal BOM is 2 bytes so bail out immediately and simplify the code
|
||||
// below which wouldn't need to check for length for UTF-16 cases
|
||||
return BOM_None;
|
||||
}
|
||||
|
||||
// examine the buffer for BOM presence
|
||||
//
|
||||
// see http://www.unicode.org/faq/utf_bom.html#BOM
|
||||
switch ( *src++ )
|
||||
{
|
||||
case '\0':
|
||||
// could only be big endian UTF-32 (00 00 FE FF)
|
||||
if ( srcLen >= 4 &&
|
||||
src[0] == '\0' &&
|
||||
src[1] == '\xfe' &&
|
||||
src[2] == '\xff' )
|
||||
{
|
||||
return BOM_UTF32BE;
|
||||
}
|
||||
break;
|
||||
|
||||
case '\xfe':
|
||||
// could only be big endian UTF-16 (FE FF)
|
||||
if ( *src++ == '\xff' )
|
||||
{
|
||||
return BOM_UTF16BE;
|
||||
}
|
||||
break;
|
||||
|
||||
case '\xff':
|
||||
// could be either little endian UTF-16 or UTF-32, both start
|
||||
// with FF FE
|
||||
if ( *src++ == '\xfe' )
|
||||
{
|
||||
return srcLen >= 4 && src[0] == '\0' && src[1] == '\0'
|
||||
? BOM_UTF32LE
|
||||
: BOM_UTF16LE;
|
||||
}
|
||||
break;
|
||||
|
||||
case '\xef':
|
||||
// is this UTF-8 BOM (EF BB BF)?
|
||||
if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' )
|
||||
{
|
||||
return BOM_UTF8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return BOM_None;
|
||||
}
|
||||
|
||||
void wxConvAuto::InitFromBOM(BOMType bomType)
|
||||
{
|
||||
m_consumedBOM = false;
|
||||
|
||||
switch ( bomType )
|
||||
{
|
||||
case BOM_UTF32BE:
|
||||
m_conv = new wxMBConvUTF32BE;
|
||||
m_ownsConv = true;
|
||||
break;
|
||||
|
||||
case BOM_UTF32LE:
|
||||
m_conv = new wxMBConvUTF32LE;
|
||||
m_ownsConv = true;
|
||||
break;
|
||||
|
||||
case BOM_UTF16BE:
|
||||
m_conv = new wxMBConvUTF16BE;
|
||||
m_ownsConv = true;
|
||||
break;
|
||||
|
||||
case BOM_UTF16LE:
|
||||
m_conv = new wxMBConvUTF16LE;
|
||||
m_ownsConv = true;
|
||||
break;
|
||||
|
||||
case BOM_UTF8:
|
||||
m_conv = &wxConvUTF8;
|
||||
m_ownsConv = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
wxFAIL_MSG( _T("unexpected BOM type") );
|
||||
// fall through: still need to create something
|
||||
|
||||
case BOM_None:
|
||||
InitWithDefault();
|
||||
m_consumedBOM = true; // as there is nothing to consume
|
||||
}
|
||||
}
|
||||
|
||||
void wxConvAuto::SkipBOM(const char **src, size_t *len) const
|
||||
{
|
||||
int ofs;
|
||||
switch ( m_bomType )
|
||||
{
|
||||
case BOM_UTF32BE:
|
||||
case BOM_UTF32LE:
|
||||
ofs = 4;
|
||||
break;
|
||||
|
||||
case BOM_UTF16BE:
|
||||
case BOM_UTF16LE:
|
||||
ofs = 2;
|
||||
break;
|
||||
|
||||
case BOM_UTF8:
|
||||
ofs = 3;
|
||||
break;
|
||||
|
||||
default:
|
||||
wxFAIL_MSG( _T("unexpected BOM type") );
|
||||
// fall through: still need to create something
|
||||
|
||||
case BOM_None:
|
||||
ofs = 0;
|
||||
}
|
||||
|
||||
*src += ofs;
|
||||
if ( *len != (size_t)-1 )
|
||||
*len -= ofs;
|
||||
}
|
||||
|
||||
void wxConvAuto::InitFromInput(const char **src, size_t *len)
|
||||
{
|
||||
m_bomType = DetectBOM(*src, *len);
|
||||
InitFromBOM(m_bomType);
|
||||
SkipBOM(src, len);
|
||||
}
|
||||
|
||||
size_t
|
||||
wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
|
||||
const char *src, size_t srcLen) const
|
||||
{
|
||||
// we check BOM and create the appropriate conversion the first time we're
|
||||
// called but we also need to ensure that the BOM is skipped not only
|
||||
// during this initial call but also during the first call with non-NULL
|
||||
// dst as typically we're first called with NULL dst to calculate the
|
||||
// needed buffer size
|
||||
wxConvAuto *self = wx_const_cast(wxConvAuto *, this);
|
||||
if ( !m_conv )
|
||||
{
|
||||
self->InitFromInput(&src, &srcLen);
|
||||
if ( dst )
|
||||
self->m_consumedBOM = true;
|
||||
}
|
||||
|
||||
if ( !m_consumedBOM && dst )
|
||||
{
|
||||
self->m_consumedBOM = true;
|
||||
SkipBOM(&src, &srcLen);
|
||||
}
|
||||
|
||||
return m_conv->ToWChar(dst, dstLen, src, srcLen);
|
||||
}
|
||||
|
||||
size_t
|
||||
wxConvAuto::FromWChar(char *dst, size_t dstLen,
|
||||
const wchar_t *src, size_t srcLen) const
|
||||
{
|
||||
if ( !m_conv )
|
||||
{
|
||||
// default to UTF-8 for the multibyte output
|
||||
wx_const_cast(wxConvAuto *, this)->InitWithDefault();
|
||||
}
|
||||
|
||||
return m_conv->FromWChar(dst, dstLen, src, srcLen);
|
||||
}
|
||||
|
||||
#endif // wxUSE_WCHAR_T
|
||||
|
@ -26,7 +26,7 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#if wxUSE_UNICODE
|
||||
wxDataInputStream::wxDataInputStream(wxInputStream& s, wxMBConv& conv)
|
||||
wxDataInputStream::wxDataInputStream(wxInputStream& s, const wxMBConv& conv)
|
||||
: m_input(&s), m_be_order(false), m_conv(conv)
|
||||
#else
|
||||
wxDataInputStream::wxDataInputStream(wxInputStream& s)
|
||||
@ -445,7 +445,7 @@ wxDataInputStream& wxDataInputStream::operator>>(float& f)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#if wxUSE_UNICODE
|
||||
wxDataOutputStream::wxDataOutputStream(wxOutputStream& s, wxMBConv& conv)
|
||||
wxDataOutputStream::wxDataOutputStream(wxOutputStream& s, const wxMBConv& conv)
|
||||
: m_output(&s), m_be_order(false), m_conv(conv)
|
||||
#else
|
||||
wxDataOutputStream::wxDataOutputStream(wxOutputStream& s)
|
||||
|
@ -103,7 +103,7 @@ bool wxFFile::Close()
|
||||
// read/write
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
bool wxFFile::ReadAll(wxString *str, wxMBConv& conv)
|
||||
bool wxFFile::ReadAll(wxString *str, const wxMBConv& conv)
|
||||
{
|
||||
wxCHECK_MSG( str, false, wxT("invalid parameter") );
|
||||
wxCHECK_MSG( IsOpened(), false, wxT("can't read from closed file") );
|
||||
|
@ -426,7 +426,8 @@ void wxFileConfig::Init()
|
||||
// constructor supports creation of wxFileConfig objects of any type
|
||||
wxFileConfig::wxFileConfig(const wxString& appName, const wxString& vendorName,
|
||||
const wxString& strLocal, const wxString& strGlobal,
|
||||
long style, wxMBConv& conv)
|
||||
long style,
|
||||
const wxMBConv& conv)
|
||||
: wxConfigBase(::GetAppName(appName), vendorName,
|
||||
strLocal, strGlobal,
|
||||
style),
|
||||
@ -474,7 +475,7 @@ wxFileConfig::wxFileConfig(const wxString& appName, const wxString& vendorName,
|
||||
|
||||
#if wxUSE_STREAMS
|
||||
|
||||
wxFileConfig::wxFileConfig(wxInputStream &inStream, wxMBConv& conv)
|
||||
wxFileConfig::wxFileConfig(wxInputStream &inStream, const wxMBConv& conv)
|
||||
: m_conv(conv)
|
||||
{
|
||||
// always local_file when this constructor is called (?)
|
||||
@ -1036,7 +1037,7 @@ bool wxFileConfig::Flush(bool /* bCurrentOnly */)
|
||||
|
||||
#if wxUSE_STREAMS
|
||||
|
||||
bool wxFileConfig::Save(wxOutputStream& os, wxMBConv& conv)
|
||||
bool wxFileConfig::Save(wxOutputStream& os, const wxMBConv& conv)
|
||||
{
|
||||
// save unconditionally, even if not dirty
|
||||
for ( wxFileConfigLineList *p = m_linesHead; p != NULL; p = p->Next() )
|
||||
|
@ -203,21 +203,16 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
|
||||
size_t lenChunk = MB2WC(NULL, src, 0);
|
||||
if ( lenChunk == 0 )
|
||||
{
|
||||
// nothing left in the input string, conversion succeeded
|
||||
// nothing left in the input string, conversion succeeded; but
|
||||
// still account for the trailing NULL
|
||||
dstWritten++;
|
||||
break;
|
||||
}
|
||||
|
||||
if ( lenChunk == wxCONV_FAILED )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
// if we already have a previous chunk, leave the NUL separating it
|
||||
// from this one
|
||||
if ( dstWritten )
|
||||
{
|
||||
dstWritten++;
|
||||
if ( dst )
|
||||
dst++;
|
||||
}
|
||||
lenChunk++; // for trailing NUL
|
||||
|
||||
dstWritten += lenChunk;
|
||||
|
||||
@ -226,8 +221,7 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
|
||||
if ( dstWritten > dstLen )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
|
||||
if ( lenChunk == wxCONV_FAILED )
|
||||
if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
|
||||
return wxCONV_FAILED;
|
||||
|
||||
dst += lenChunk;
|
||||
@ -390,11 +384,11 @@ wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
|
||||
const size_t dstLen = ToWChar(NULL, 0, in, inLen);
|
||||
if ( dstLen != wxCONV_FAILED )
|
||||
{
|
||||
wxWCharBuffer wbuf(dstLen);
|
||||
wxWCharBuffer wbuf(dstLen - 1);
|
||||
if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
|
||||
{
|
||||
if ( outLen )
|
||||
*outLen = dstLen;
|
||||
*outLen = dstLen - 1;
|
||||
return wbuf;
|
||||
}
|
||||
}
|
||||
@ -411,11 +405,11 @@ wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
|
||||
const size_t dstLen = FromWChar(NULL, 0, in, inLen);
|
||||
if ( dstLen != wxCONV_FAILED )
|
||||
{
|
||||
wxCharBuffer buf(dstLen);
|
||||
wxCharBuffer buf(dstLen - 1);
|
||||
if ( FromWChar(buf.data(), dstLen, in, inLen) )
|
||||
{
|
||||
if ( outLen )
|
||||
*outLen = dstLen;
|
||||
*outLen = dstLen - 1;
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
@ -1825,35 +1819,27 @@ public:
|
||||
// wouldn't work if reading an incomplete MB char didn't result in an
|
||||
// error
|
||||
//
|
||||
// note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
|
||||
// an error (tested under Windows Server 2003) and apparently it is
|
||||
// done on purpose, i.e. the function accepts any input in this case
|
||||
// and although I'd prefer to return error on ill-formed output, our
|
||||
// own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
|
||||
// explicitly ill-formed according to RFC 2152) neither so we don't
|
||||
// even have any fallback here...
|
||||
//
|
||||
// Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
|
||||
// Win XP or newer and if it is specified on older versions, conversion
|
||||
// from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
|
||||
// fails. So we can only use the flag on newer Windows versions.
|
||||
// Additionally, the flag is not supported by UTF7, symbol and CJK
|
||||
// encodings. See here:
|
||||
// Win XP or newer and it is not supported for UTF-[78] so we always
|
||||
// use our own conversions in this case. See
|
||||
// http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
|
||||
// http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
|
||||
if ( m_CodePage == CP_UTF8 )
|
||||
{
|
||||
return wxConvUTF8.MB2WC(buf, psz, n);
|
||||
}
|
||||
|
||||
if ( m_CodePage == CP_UTF7 )
|
||||
{
|
||||
return wxConvUTF7.MB2WC(buf, psz, n);
|
||||
}
|
||||
|
||||
int flags = 0;
|
||||
if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
|
||||
m_CodePage < 50000 &&
|
||||
IsAtLeastWin2kSP4() )
|
||||
if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
|
||||
IsAtLeastWin2kSP4() )
|
||||
{
|
||||
flags = MB_ERR_INVALID_CHARS;
|
||||
}
|
||||
else if ( m_CodePage == CP_UTF8 )
|
||||
{
|
||||
// Avoid round-trip in the special case of UTF-8 by using our
|
||||
// own UTF-8 conversion code:
|
||||
return wxMBConvUTF8().MB2WC(buf, psz, n);
|
||||
}
|
||||
|
||||
const size_t len = ::MultiByteToWideChar
|
||||
(
|
||||
|
@ -1006,7 +1006,7 @@ int STRINGCLASS::compare(size_t nStart, size_t nLen,
|
||||
#if wxUSE_UNICODE
|
||||
|
||||
// from multibyte string
|
||||
wxString::wxString(const char *psz, wxMBConv& conv, size_t nLength)
|
||||
wxString::wxString(const char *psz, const wxMBConv& conv, size_t nLength)
|
||||
{
|
||||
// anything to do?
|
||||
if ( psz && nLength != 0 )
|
||||
@ -1031,7 +1031,7 @@ wxString::wxString(const char *psz, wxMBConv& conv, size_t nLength)
|
||||
}
|
||||
|
||||
//Convert wxString in Unicode mode to a multi-byte string
|
||||
const wxCharBuffer wxString::mb_str(wxMBConv& conv) const
|
||||
const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
|
||||
{
|
||||
return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL);
|
||||
}
|
||||
@ -1041,7 +1041,7 @@ const wxCharBuffer wxString::mb_str(wxMBConv& conv) const
|
||||
#if wxUSE_WCHAR_T
|
||||
|
||||
// from wide string
|
||||
wxString::wxString(const wchar_t *pwz, wxMBConv& conv, size_t nLength)
|
||||
wxString::wxString(const wchar_t *pwz, const wxMBConv& conv, size_t nLength)
|
||||
{
|
||||
// anything to do?
|
||||
if ( pwz && nLength != 0 )
|
||||
@ -1067,7 +1067,7 @@ wxString::wxString(const wchar_t *pwz, wxMBConv& conv, size_t nLength)
|
||||
|
||||
//Converts this string to a wide character string if unicode
|
||||
//mode is not enabled and wxUSE_WCHAR_T is enabled
|
||||
const wxWCharBuffer wxString::wc_str(wxMBConv& conv) const
|
||||
const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
|
||||
{
|
||||
return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL);
|
||||
}
|
||||
|
@ -181,14 +181,14 @@ bool wxTextBuffer::Create()
|
||||
return true;
|
||||
}
|
||||
|
||||
bool wxTextBuffer::Open(const wxString& strBufferName, wxMBConv& conv)
|
||||
bool wxTextBuffer::Open(const wxString& strBufferName, const wxMBConv& conv)
|
||||
{
|
||||
m_strBufferName = strBufferName;
|
||||
|
||||
return Open(conv);
|
||||
}
|
||||
|
||||
bool wxTextBuffer::Open(wxMBConv& conv)
|
||||
bool wxTextBuffer::Open(const wxMBConv& conv)
|
||||
{
|
||||
// buffer name must be either given in ctor or in Open(const wxString&)
|
||||
wxASSERT( !m_strBufferName.empty() );
|
||||
@ -276,7 +276,7 @@ bool wxTextBuffer::Close()
|
||||
return true;
|
||||
}
|
||||
|
||||
bool wxTextBuffer::Write(wxTextFileType typeNew, wxMBConv& conv)
|
||||
bool wxTextBuffer::Write(wxTextFileType typeNew, const wxMBConv& conv)
|
||||
{
|
||||
return OnWrite(typeNew, conv);
|
||||
}
|
||||
|
@ -86,7 +86,7 @@ bool wxTextFile::OnClose()
|
||||
}
|
||||
|
||||
|
||||
bool wxTextFile::OnRead(wxMBConv& conv)
|
||||
bool wxTextFile::OnRead(const wxMBConv& conv)
|
||||
{
|
||||
// file should be opened and we must be in it's beginning
|
||||
wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 );
|
||||
@ -114,18 +114,8 @@ bool wxTextFile::OnRead(wxMBConv& conv)
|
||||
return false;
|
||||
}
|
||||
|
||||
eof = nRead == 0;
|
||||
if ( eof )
|
||||
{
|
||||
// append 4 trailing NUL bytes: this is needed to ensure that the
|
||||
// string is going to be NUL-terminated, whatever is the encoding
|
||||
// used (even UTF-32)
|
||||
block[0] =
|
||||
block[1] =
|
||||
block[2] =
|
||||
block[3] = '\0';
|
||||
nRead = 4;
|
||||
}
|
||||
if ( nRead == 0 )
|
||||
break;
|
||||
|
||||
// this shouldn't happen but don't overwrite the buffer if it does
|
||||
wxCHECK_MSG( bufPos + nRead <= bufSize, false,
|
||||
@ -136,7 +126,7 @@ bool wxTextFile::OnRead(wxMBConv& conv)
|
||||
bufPos += nRead;
|
||||
}
|
||||
|
||||
const wxString str(buf, conv);
|
||||
const wxString str(buf, conv, bufPos);
|
||||
|
||||
// this doesn't risk to happen in ANSI build
|
||||
#if wxUSE_UNICODE
|
||||
@ -211,7 +201,7 @@ bool wxTextFile::OnRead(wxMBConv& conv)
|
||||
}
|
||||
|
||||
|
||||
bool wxTextFile::OnWrite(wxTextFileType typeNew, wxMBConv& conv)
|
||||
bool wxTextFile::OnWrite(wxTextFileType typeNew, const wxMBConv& conv)
|
||||
{
|
||||
wxFileName fn = m_strBufferName;
|
||||
|
||||
|
@ -35,7 +35,9 @@
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#if wxUSE_UNICODE
|
||||
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, wxMBConv& conv)
|
||||
wxTextInputStream::wxTextInputStream(wxInputStream &s,
|
||||
const wxString &sep,
|
||||
const wxMBConv& conv)
|
||||
: m_input(s), m_separators(sep), m_conv(conv)
|
||||
{
|
||||
memset((void*)m_lastBytes, 0, 10);
|
||||
@ -298,7 +300,9 @@ wxTextInputStream& wxTextInputStream::operator>>(float& f)
|
||||
|
||||
|
||||
#if wxUSE_UNICODE
|
||||
wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode, wxMBConv& conv)
|
||||
wxTextOutputStream::wxTextOutputStream(wxOutputStream& s,
|
||||
wxEOL mode,
|
||||
const wxMBConv& conv)
|
||||
: m_output(s), m_conv(conv)
|
||||
#else
|
||||
wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode)
|
||||
|
Loading…
Reference in New Issue
Block a user