added conversions to/from UTF 16/32 LE/BE (patch 809685)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@23792 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin 2003-09-22 00:12:10 +00:00
parent 9fc852b7ff
commit c91830cb4b
4 changed files with 616 additions and 24 deletions

View File

@ -68,6 +68,12 @@ enum wxFontEncoding
wxFONTENCODING_UTF7, // UTF-7 Unicode encoding
wxFONTENCODING_UTF8, // UTF-8 Unicode encoding
wxFONTENCODING_UTF16, // UTF-16 Unicode encoding
wxFONTENCODING_UTF16BE, // UTF-16 Big Endian Unicode encoding
wxFONTENCODING_UTF16LE, // UTF-16 Little Endian Unicode encoding
wxFONTENCODING_UTF32, // UTF-32 Unicode encoding
wxFONTENCODING_UTF32BE, // UTF-32 Big Endian Unicode encoding
wxFONTENCODING_UTF32LE, // UTF-32 Little Endian Unicode encoding
// Far Eastern encodings
// Chinese

View File

@ -91,6 +91,50 @@ public:
WXDLLIMPEXP_DATA_BASE(extern wxMBConvUTF8) wxConvUTF8;
// ----------------------------------------------------------------------------
// wxMBConvUTF16LE (for conversion using UTF16 Little Endian encoding)
// ----------------------------------------------------------------------------
class WXDLLIMPEXP_BASE wxMBConvUTF16LE : public wxMBConv
{
public:
virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
};
// ----------------------------------------------------------------------------
// wxMBConvUTF16BE (for conversion using UTF16 Big Endian encoding)
// ----------------------------------------------------------------------------
class WXDLLIMPEXP_BASE wxMBConvUTF16BE : public wxMBConv
{
public:
virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
};
// ----------------------------------------------------------------------------
// wxMBConvUCS4LE (for conversion using UTF32 Little Endian encoding)
// ----------------------------------------------------------------------------
class WXDLLIMPEXP_BASE wxMBConvUTF32LE : public wxMBConv
{
public:
virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
};
// ----------------------------------------------------------------------------
// wxMBConvUCS4BE (for conversion using UTF32 Big Endian encoding)
// ----------------------------------------------------------------------------
class WXDLLIMPEXP_BASE wxMBConvUTF32BE : public wxMBConv
{
public:
virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
};
// ----------------------------------------------------------------------------
// wxCSConv (for conversion based on loadable char sets)
// ----------------------------------------------------------------------------

View File

@ -87,6 +87,12 @@ static wxFontEncoding gs_encodings[] =
wxFONTENCODING_CP437,
wxFONTENCODING_UTF7,
wxFONTENCODING_UTF8,
wxFONTENCODING_UTF16,
wxFONTENCODING_UTF16BE,
wxFONTENCODING_UTF16LE,
wxFONTENCODING_UTF32,
wxFONTENCODING_UTF32BE,
wxFONTENCODING_UTF32LE,
wxFONTENCODING_EUC_JP,
};
@ -124,6 +130,12 @@ static const wxChar* gs_encodingDescs[] =
wxTRANSLATE( "Windows/DOS OEM (CP 437)" ),
wxTRANSLATE( "Unicode 7 bit (UTF-7)" ),
wxTRANSLATE( "Unicode 8 bit (UTF-8)" ),
wxTRANSLATE( "Unicode 16 bit (UTF-16)" ),
wxTRANSLATE( "Unicode 16 bit Big Endian (UTF-16BE)" ),
wxTRANSLATE( "Unicode 16 bit Little Endian (UTF-16LE)" ),
wxTRANSLATE( "Unicode 32 bit (UTF-32)" ),
wxTRANSLATE( "Unicode 32 bit Big Endian (UTF-32BE)" ),
wxTRANSLATE( "Unicode 32 bit Little Endian (UTF-32LE)" ),
wxTRANSLATE( "Extended Unix Codepage for Japanese (EUC-JP)" ),
};
@ -161,6 +173,12 @@ static const wxChar* gs_encodingNames[] =
wxT( "windows-437" ),
wxT( "utf-7" ),
wxT( "utf-8" ),
wxT( "utf-16" ),
wxT( "utf-16be" ),
wxT( "utf-16le" ),
wxT( "utf-32" ),
wxT( "utf-32be" ),
wxT( "utf-32le" ),
wxT( "euc-jp" ),
};
@ -455,6 +473,30 @@ wxFontMapperBase::NonInteractiveCharsetToEncoding(const wxString& charset)
{
encoding = wxFONTENCODING_UTF8;
}
else if ( cs == wxT("UTF-16") )
{
encoding = wxFONTENCODING_UTF16;
}
else if ( cs == wxT("UTF-16BE") )
{
encoding = wxFONTENCODING_UTF16BE;
}
else if ( cs == wxT("UTF-16LE") )
{
encoding = wxFONTENCODING_UTF16LE;
}
else if ( cs == wxT("UTF-32") || cs == wxT("UCS-4") )
{
encoding = wxFONTENCODING_UTF32;
}
else if ( cs == wxT("UTF-32BE") || cs == wxT("UCS-4BE") )
{
encoding = wxFONTENCODING_UTF32BE;
}
else if ( cs == wxT("UTF-32LE") || cs == wxT("UCS-4LE") )
{
encoding = wxFONTENCODING_UTF32LE;
}
else if ( cs == wxT("GB2312") )
{
encoding = wxFONTENCODING_GB2312;

View File

@ -143,16 +143,15 @@ IMPLEMENT_DYNAMIC_CLASS(wxStrConvModule, wxModule)
// ============================================================================
// ----------------------------------------------------------------------------
// UTF-16 en/decoding
// UTF-16 en/decoding to/from UCS-4
// ----------------------------------------------------------------------------
#ifdef WC_UTF16
static size_t encode_utf16(wxUint32 input, wchar_t *output)
static size_t encode_utf16(wxUint32 input, wxUint16 *output)
{
if (input<=0xffff)
{
if (output) *output++ = (wchar_t) input;
if (output) *output++ = (wxUint16) input;
return 1;
}
else if (input>=0x110000)
@ -163,14 +162,14 @@ static size_t encode_utf16(wxUint32 input, wchar_t *output)
{
if (output)
{
*output++ = (wchar_t) ((input >> 10)+0xd7c0);
*output++ = (wchar_t) ((input&0x3ff)+0xdc00);
*output++ = (wxUint16) ((input >> 10)+0xd7c0);
*output++ = (wxUint16) ((input&0x3ff)+0xdc00);
}
return 2;
}
}
static size_t decode_utf16(const wchar_t* input, wxUint32& output)
static size_t decode_utf16(const wxUint16* input, wxUint32& output)
{
if ((*input<0xd800) || (*input>0xdfff))
{
@ -189,7 +188,6 @@ static size_t decode_utf16(const wchar_t* input, wxUint32& output)
}
}
#endif // WC_UTF16
// ----------------------------------------------------------------------------
// wxMBConv
@ -266,8 +264,8 @@ const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
size_t nLen = WC2MB(NULL, pwz, 0);
if ( nLen != (size_t)-1 )
{
wxCharBuffer buf(nLen);
WC2MB(buf.data(), pwz, nLen + 1);
wxCharBuffer buf(nLen+3); // space for a wxUint32 trailing zero
WC2MB(buf.data(), pwz, nLen + 4);
return buf;
}
@ -422,6 +420,463 @@ size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
return len;
}
// ----------------------------------------------------------------------------
// UTF-16
// ----------------------------------------------------------------------------
#ifdef WORDS_BIGENDIAN
#define wxMBConvUTF16straight wxMBConvUTF16BE
#define wxMBConvUTF16swap wxMBConvUTF16LE
#else
#define wxMBConvUTF16swap wxMBConvUTF16BE
#define wxMBConvUTF16straight wxMBConvUTF16LE
#endif
WXDLLIMPEXP_DATA_BASE(wxMBConvUTF16LE) wxConvUTF16LE;
WXDLLIMPEXP_DATA_BASE(wxMBConvUTF16BE) wxConvUTF16BE;
#ifdef WC_UTF16
// copy 16bit MB to 16bit String
size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len=0;
while (*(wxUint16*)psz && (!buf || len < n))
{
if (buf)
*buf++ = *(wxUint16*)psz;
len++;
psz += sizeof(wxUint16);
}
if (buf && len<n) *buf=0;
return len;
}
// copy 16bit String to 16bit MB
size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len=0;
while (*psz && (!buf || len < n))
{
if (buf)
{
*(wxUint16*)buf = *psz;
buf += sizeof(wxUint16);
}
len += sizeof(wxUint16);
psz++;
}
if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
return len;
}
// swap 16bit MB to 16bit String
size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len=0;
while (*(wxUint16*)psz && (!buf || len < n))
{
if (buf)
{
((char *)buf)[0] = psz[1];
((char *)buf)[1] = psz[0];
buf++;
}
len++;
psz += sizeof(wxUint16);
}
if (buf && len<n) *buf=0;
return len;
}
// swap 16bit MB to 16bit String
size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len=0;
while (*psz && (!buf || len < n))
{
if (buf)
{
*buf++ = ((char*)psz)[1];
*buf++ = ((char*)psz)[0];
}
len += sizeof(wxUint16);
psz++;
}
if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
return len;
}
#else // WC_UTF16
// copy 16bit MB to 32bit String
size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len=0;
while (*(wxUint16*)psz && (!buf || len < n))
{
wxUint32 cc;
size_t pa=decode_utf16((wxUint16*)psz, cc);
if (pa == (size_t)-1)
return pa;
if (buf)
*buf++ = cc;
len++;
psz += pa * sizeof(wxUint16);
}
if (buf && len<n) *buf=0;
return len;
}
// copy 32bit String to 16bit MB
size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len=0;
while (*psz && (!buf || len < n))
{
wxUint16 cc[2];
size_t pa=encode_utf16(*psz, cc);
if (pa == (size_t)-1)
return pa;
if (buf)
{
*((wxUint16*)buf)++ = cc[0];
if (pa > 1)
*((wxUint16*)buf)++ = cc[1];
}
len += pa*sizeof(wxUint16);
psz++;
}
if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
return len;
}
// swap 16bit MB to 32bit String
size_t wxMBConvUTF16swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len=0;
while (*(wxUint16*)psz && (!buf || len < n))
{
wxUint32 cc;
char tmp[4];
tmp[0]=psz[1]; tmp[1]=psz[0];
tmp[2]=psz[3]; tmp[3]=psz[2];
size_t pa=decode_utf16((wxUint16*)tmp, cc);
if (pa == (size_t)-1)
return pa;
if (buf)
*buf++ = cc;
len++;
psz += pa * sizeof(wxUint16);
}
if (buf && len<n) *buf=0;
return len;
}
// swap 32bit String to 16bit MB
size_t wxMBConvUTF16swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len=0;
while (*psz && (!buf || len < n))
{
wxUint16 cc[2];
size_t pa=encode_utf16(*psz, cc);
if (pa == (size_t)-1)
return pa;
if (buf)
{
*buf++ = ((char*)cc)[1];
*buf++ = ((char*)cc)[0];
if (pa > 1)
{
*buf++ = ((char*)cc)[3];
*buf++ = ((char*)cc)[2];
}
}
len += pa*sizeof(wxUint16);
psz++;
}
if (buf && len<=n-sizeof(wxUint16)) *(wxUint16*)buf=0;
return len;
}
#endif // WC_UTF16
// ----------------------------------------------------------------------------
// UTF-32
// ----------------------------------------------------------------------------
#ifdef WORDS_BIGENDIAN
#define wxMBConvUTF32straight wxMBConvUTF32BE
#define wxMBConvUTF32swap wxMBConvUTF32LE
#else
#define wxMBConvUTF32swap wxMBConvUTF32BE
#define wxMBConvUTF32straight wxMBConvUTF32LE
#endif
WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
#ifdef WC_UTF16
// copy 32bit MB to 16bit String
size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len=0;
while (*(wxUint32*)psz && (!buf || len < n))
{
wxUint16 cc[2];
size_t pa=encode_utf16(*(wxUint32*)psz, cc);
if (pa == (size_t)-1)
return pa;
if (buf)
{
*buf++ = cc[0];
if (pa > 1)
*buf++ = cc[1];
}
len += pa;
psz += sizeof(wxUint32);
}
if (buf && len<n) *buf=0;
return len;
}
// copy 16bit String to 32bit MB
size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len=0;
while (*psz && (!buf || len < n))
{
wxUint32 cc;
size_t pa=decode_utf16(psz, cc);
if (pa == (size_t)-1)
return pa;
if (buf)
{
*(wxUint32*)buf = cc;
buf += sizeof(wxUint32);
}
len += sizeof(wxUint32);
psz += pa;
}
if (buf && len<=n-sizeof(wxUint32)) *(wxUint32*)buf=0;
return len;
}
// swap 32bit MB to 16bit String
size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len=0;
while (*(wxUint32*)psz && (!buf || len < n))
{
char tmp[4];
tmp[0] = psz[3]; tmp[1] = psz[2];
tmp[2] = psz[1]; tmp[3] = psz[0];
wxUint16 cc[2];
size_t pa=encode_utf16(*(wxUint32*)tmp, cc);
if (pa == (size_t)-1)
return pa;
if (buf)
{
*buf++ = cc[0];
if (pa > 1)
*buf++ = cc[1];
}
len += pa;
psz += sizeof(wxUint32);
}
if (buf && len<n) *buf=0;
return len;
}
// swap 16bit String to 32bit MB
size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len=0;
while (*psz && (!buf || len < n))
{
char cc[4];
size_t pa=decode_utf16(psz, *(wxUint32*)cc);
if (pa == (size_t)-1)
return pa;
if (buf)
{
*buf++ = cc[3];
*buf++ = cc[2];
*buf++ = cc[1];
*buf++ = cc[0];
}
len += sizeof(wxUint32);
psz += pa;
}
if (buf && len<=n-sizeof(wxUint32)) *(wxUint32*)buf=0;
return len;
}
#else // WC_UTF16
// copy 32bit MB to 32bit String
size_t wxMBConvUTF32straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len=0;
while (*(wxUint32*)psz && (!buf || len < n))
{
if (buf)
*buf++ = *(wxUint32*)psz;
len++;
psz += sizeof(wxUint32);
}
if (buf && len<n) *buf=0;
return len;
}
// copy 32bit String to 32bit MB
size_t wxMBConvUTF32straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len=0;
while (*psz && (!buf || len < n))
{
if (buf)
{
*(wxUint32*)buf = *psz;
buf += sizeof(wxUint32);
}
len += sizeof(wxUint32);
psz++;
}
if (buf && len<=n-sizeof(wxUint32)) *(wxUint32*)buf=0;
return len;
}
// swap 32bit MB to 32bit String
size_t wxMBConvUTF32swap::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len=0;
while (*(wxUint32*)psz && (!buf || len < n))
{
if (buf)
{
((char *)buf)[0] = psz[3];
((char *)buf)[1] = psz[2];
((char *)buf)[2] = psz[1];
((char *)buf)[3] = psz[0];
buf++;
}
len++;
psz += sizeof(wxUint32);
}
if (buf && len<n) *buf=0;
return len;
}
// swap 32bit String to 32bit MB
size_t wxMBConvUTF32swap::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len=0;
while (*psz && (!buf || len < n))
{
if (buf)
{
*buf++ = ((char *)psz)[3];
*buf++ = ((char *)psz)[2];
*buf++ = ((char *)psz)[1];
*buf++ = ((char *)psz)[0];
}
len += sizeof(wxUint32);
psz++;
}
if (buf && len<=n-sizeof(wxUint32)) *(wxUint32*)buf=0;
return len;
}
#endif // WC_UTF16
// ============================================================================
// wxCharacterSet and derived classes
// ============================================================================
@ -891,27 +1346,72 @@ wxGetCharacterSet(const wxChar *name, wxFontEncoding encoding)
return NULL;
}
wxCharacterSet *cset;
wxCharacterSet *cset = NULL;
if ( (name &&
(wxStricmp(name, wxT("UTF8")) == 0 ||
wxStricmp(name, wxT("UTF-8")) == 0)) ||
encoding == wxFONTENCODING_UTF8 )
{
cset = new ID_CharSet(&wxConvUTF8);
}
else // !UTF-8
if (name)
{
if((wxStricmp(name, wxT("UTF8")) == 0) ||
(wxStricmp(name, wxT("UTF-8")) == 0) ||
encoding == wxFONTENCODING_UTF8 )
{
cset = new ID_CharSet(&wxConvUTF8);
}
else if((wxStricmp(name, wxT("UTF16")) == 0) ||
(wxStricmp(name, wxT("UTF-16")) == 0) ||
encoding == wxFONTENCODING_UTF16 )
{
#ifdef WORDS_BIGENDIAN
cset = new ID_CharSet(&wxConvUTF16BE);
#else
cset = new ID_CharSet(&wxConvUTF16LE);
#endif
}
else if((wxStricmp(name, wxT("UTF16BE")) == 0) ||
(wxStricmp(name, wxT("UTF-16BE")) == 0) ||
encoding == wxFONTENCODING_UTF16BE )
{
cset = new ID_CharSet(&wxConvUTF16BE);
}
else if((wxStricmp(name, wxT("UTF16LE")) == 0) ||
(wxStricmp(name, wxT("UTF-16LE")) == 0) ||
encoding == wxFONTENCODING_UTF16LE )
{
cset = new ID_CharSet(&wxConvUTF16LE);
}
else if((wxStricmp(name, wxT("UTF32")) == 0) ||
(wxStricmp(name, wxT("UTF-32")) == 0) ||
(wxStricmp(name, wxT("UCS4")) == 0) ||
(wxStricmp(name, wxT("UCS-4")) == 0) ||
encoding == wxFONTENCODING_UTF32 )
{
#ifdef WORDS_BIGENDIAN
cset = new ID_CharSet(&wxConvUTF32BE);
#else
cset = new ID_CharSet(&wxConvUTF32LE);
#endif
}
else if((wxStricmp(name, wxT("UTF32BE")) == 0) ||
(wxStricmp(name, wxT("UTF-32BE")) == 0) ||
(wxStricmp(name, wxT("UCS4BE")) == 0) ||
(wxStricmp(name, wxT("UCS-4BE")) == 0) ||
encoding == wxFONTENCODING_UTF32BE )
{
cset = new ID_CharSet(&wxConvUTF32BE);
}
else if((wxStricmp(name, wxT("UTF32LE")) == 0) ||
(wxStricmp(name, wxT("UTF-32LE")) == 0) ||
(wxStricmp(name, wxT("UCS4LE")) == 0) ||
(wxStricmp(name, wxT("UCS-4LE")) == 0) ||
encoding == wxFONTENCODING_UTF32 )
{
cset = new ID_CharSet(&wxConvUTF32LE);
}
#ifdef HAVE_ICONV
if ( name )
else
{
cset = new IC_CharSet(name);
}
else
#endif // HAVE_ICONV
{
cset = NULL;
}
}
// it can only be NULL in this case