added GetAllEncodingNames(), use it to select the correct encoding name to pass to iconv_open()

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@35566 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2005-09-18 14:01:14 +00:00 · 2005-09-18 14:01:14 +00:00 · 8b3eb85d5e
commit 8b3eb85d5e
parent 0ad5d837cb
3 changed files with 152 additions and 129 deletions
--- a/include/wx/fontmap.h
+++ b/include/wx/fontmap.h
@ -90,10 +90,13 @@ public:
    // get the n-th supported encoding
    static wxFontEncoding GetEncoding(size_t n);

-    // return internal string identifier for the encoding (see also
-    // GetEncodingDescription())
+    // return canonical name of this encoding (this is a short string,
+    // GetEncodingDescription() returns a longer one)
    static wxString GetEncodingName(wxFontEncoding encoding);

+    // return a list of all names of this encoding (see GetEncodingName)
+    static const wxChar** GetAllEncodingNames(wxFontEncoding encoding);
+
    // return user-readable string describing the given encoding
    //
    // NB: hard-coded now, but might change later (read it from config?)
--- a/src/common/fmapbase.cpp
+++ b/src/common/fmapbase.cpp
@ -96,6 +96,10 @@ static wxFontEncoding gs_encodings[] =
    wxFONTENCODING_UTF32BE,
    wxFONTENCODING_UTF32LE,
    wxFONTENCODING_EUC_JP,
+    wxFONTENCODING_DEFAULT,
+    wxFONTENCODING_BIG5,
+    wxFONTENCODING_SHIFT_JIS,
+    wxFONTENCODING_GB2312,
 };

 // the descriptions for them
@ -141,56 +145,74 @@ static const wxChar* gs_encodingDescs[] =
    wxTRANSLATE( "Unicode 32 bit Big Endian (UTF-32BE)" ),
    wxTRANSLATE( "Unicode 32 bit Little Endian (UTF-32LE)" ),
    wxTRANSLATE( "Extended Unix Codepage for Japanese (EUC-JP)" ),
+    wxTRANSLATE( "US-ASCII" ),
+    wxTRANSLATE( "BIG5" ),
+    wxTRANSLATE( "SHIFT-JIS" ),
+    wxTRANSLATE( "GB-2312" ),
 };

 // and the internal names (these are not translated on purpose!)
-static const wxChar* gs_encodingNames[] =
+static const wxChar* gs_encodingNames[WXSIZEOF(gs_encodingDescs)][7] =
 {
-    wxT( "iso-8859-1" ),
-    wxT( "iso-8859-2" ),
-    wxT( "iso-8859-3" ),
-    wxT( "iso-8859-4" ),
-    wxT( "iso-8859-5" ),
-    wxT( "iso-8859-6" ),
-    wxT( "iso-8859-7" ),
-    wxT( "iso-8859-8" ),
-    wxT( "iso-8859-9" ),
-    wxT( "iso-8859-10" ),
-    wxT( "iso-8859-11" ),
-    wxT( "iso-8859-12" ),
-    wxT( "iso-8859-13" ),
-    wxT( "iso-8859-14" ),
-    wxT( "iso-8859-15" ),
-    wxT( "koi8-r" ),
-    wxT( "koi8-u" ),
-    wxT( "windows-874" ),
-    wxT( "windows-932" ),
-    wxT( "windows-936" ),
-    wxT( "windows-949" ),
-    wxT( "windows-950" ),
-    wxT( "windows-1250" ),
-    wxT( "windows-1251" ),
-    wxT( "windows-1252" ),
-    wxT( "windows-1253" ),
-    wxT( "windows-1254" ),
-    wxT( "windows-1255" ),
-    wxT( "windows-1256" ),
-    wxT( "windows-1257" ),
-    wxT( "windows-437" ),
-    wxT( "utf-7" ),
-    wxT( "utf-8" ),
-    wxT( "utf-16" ),
-    wxT( "utf-16be" ),
-    wxT( "utf-16le" ),
-    wxT( "utf-32" ),
-    wxT( "utf-32be" ),
-    wxT( "utf-32le" ),
-    wxT( "euc-jp" ),
+    { wxT( "iso-8859-1" ), NULL },
+    { wxT( "iso-8859-2" ), NULL },
+    { wxT( "iso-8859-3" ), NULL },
+    { wxT( "iso-8859-4" ), NULL },
+    { wxT( "iso-8859-5" ), NULL },
+    { wxT( "iso-8859-6" ), NULL },
+    { wxT( "iso-8859-7" ), NULL },
+    { wxT( "iso-8859-8" ), NULL },
+    { wxT( "iso-8859-9" ), NULL },
+    { wxT( "iso-8859-10" ), NULL },
+    { wxT( "iso-8859-11" ), NULL },
+    { wxT( "iso-8859-12" ), NULL },
+    { wxT( "iso-8859-13" ), NULL },
+    { wxT( "iso-8859-14" ), NULL },
+    { wxT( "iso-8859-15" ), NULL },
+    // although koi8-ru is not strictly speaking the same as koi8-r,
+    // they are similar enough to make mapping it to koi8 better than
+    // not recognizing it at all
+    { wxT( "koi8-r" ), wxT( "koi8-ru" ), NULL },
+    { wxT( "koi8-u" ), NULL },
+    { wxT( "windows-874" ), NULL },
+    { wxT( "windows-932" ), NULL },
+    { wxT( "windows-936" ), NULL },
+    { wxT( "windows-949" ), wxT( "euc-kr" ),
+      wxT( "euckr" ), wxT( "euc_kr" ), NULL },
+    { wxT( "windows-950" ), NULL },
+    { wxT( "windows-1250" ), NULL },
+    { wxT( "windows-1251" ), NULL },
+    { wxT( "windows-1252" ), NULL },
+    { wxT( "windows-1253" ), NULL },
+    { wxT( "windows-1254" ), NULL },
+    { wxT( "windows-1255" ), NULL },
+    { wxT( "windows-1256" ), NULL },
+    { wxT( "windows-1257" ), NULL },
+    { wxT( "windows-437" ), NULL },
+    { wxT( "UTF-7" ), NULL },
+    { wxT( "UTF-8" ), NULL },
+    { wxT( "UTF-16" ), NULL },
+    { wxT( "UTF-16be" ), NULL },
+    { wxT( "UTF-16le" ), NULL },
+    { wxT( "UTF-32" ), wxT( "UCS-4" ), NULL },
+    { wxT( "UTF-32be" ), wxT( "UCS-4be" ), NULL },
+    { wxT( "UTF-32le" ), wxT( "UCS-4le" ), NULL },
+    { wxT( "euc-jp" ), wxT( "eucJP" ), wxT( "euc_jp" ), wxT( "IBM-eucJP" ), NULL },
+    { wxT( "us-ascii" ), wxT( "ascii" ), wxT("ANSI_X3.4-1968"),
+#ifdef __SOLARIS__
+      wxT("646"),
+#endif
+#ifdef __HPUX__
+      wxT("roman8"),
+#endif
+      wxT( "" ), NULL },
+    { wxT( "big5" ), NULL },
+    { wxT( "shift-jis" ), wxT( "shift_jis" ), wxT( "sjis" ), NULL },
+    { wxT( "gb2312" ), NULL },
 };

-wxCOMPILE_TIME_ASSERT( WXSIZEOF(gs_encodingDescs) == WXSIZEOF(gs_encodings) &&
-                       WXSIZEOF(gs_encodingNames) == WXSIZEOF(gs_encodings),
-                       EncodingsArraysNotInSync );
+wxCOMPILE_TIME_ASSERT( WXSIZEOF(gs_encodingDescs) == WXSIZEOF(gs_encodings), EncodingsArraysNotInSync );
+wxCOMPILE_TIME_ASSERT( WXSIZEOF(gs_encodingNames) == WXSIZEOF(gs_encodings), EncodingsArraysNotInSync );

 // ----------------------------------------------------------------------------
 // private classes
@ -468,82 +490,18 @@ wxFontMapperBase::NonInteractiveCharsetToEncoding(const wxString& charset)
            }
        }

+        for ( size_t i = 0; i < WXSIZEOF(gs_encodingNames); ++i )
+        {
+            for ( const wxChar** encName = gs_encodingNames[i]; *encName; ++encName )
+            {
+                if ( cs.CmpNoCase(*encName) == 0 )
+                    return gs_encodings[i];
+            }
+        }
+
        cs.MakeUpper();

-        if ( cs.empty() || cs == _T("US-ASCII") )
-        {
-            encoding = wxFONTENCODING_DEFAULT;
-        }
-        else if ( cs == wxT("UTF-7") )
-        {
-            encoding = wxFONTENCODING_UTF7;
-        }
-        else if ( cs == wxT("UTF-8") )
-        {
-            encoding = wxFONTENCODING_UTF8;
-        }
-        else if ( cs == wxT("UTF-16") )
-        {
-            encoding = wxFONTENCODING_UTF16;
-        }
-        else if ( cs == wxT("UTF-16BE") )
-        {
-            encoding = wxFONTENCODING_UTF16BE;
-        }
-        else if ( cs == wxT("UTF-16LE") )
-        {
-            encoding = wxFONTENCODING_UTF16LE;
-        }
-        else if ( cs == wxT("UTF-32") || cs == wxT("UCS-4") )
-        {
-            encoding = wxFONTENCODING_UTF32;
-        }
-        else if ( cs == wxT("UTF-32BE") || cs == wxT("UCS-4BE") )
-        {
-            encoding = wxFONTENCODING_UTF32BE;
-        }
-        else if ( cs == wxT("UTF-32LE") || cs == wxT("UCS-4LE") )
-        {
-            encoding = wxFONTENCODING_UTF32LE;
-        }
-        else if ( cs == wxT("GB2312") )
-        {
-            encoding = wxFONTENCODING_GB2312;
-        }
-        else if ( cs == wxT("BIG5") )
-        {
-            encoding = wxFONTENCODING_BIG5;
-        }
-        else if ( cs == wxT("SJIS") ||
-                  cs == wxT("SHIFT_JIS") ||
-                  cs == wxT("SHIFT-JIS") )
-        {
-            encoding = wxFONTENCODING_SHIFT_JIS;
-        }
-        else if ( cs == wxT("EUC-JP") ||
-                  cs == wxT("EUC_JP") ||
-                  cs == wxT("EUCJP") )
-        {
-            encoding = wxFONTENCODING_EUC_JP;
-        }
-        else if ( cs == wxT("EUC-KR") ||
-                  cs == wxT("EUC_KR") )
-        {
-            encoding = wxFONTENCODING_CP949;
-        }
-        else if ( cs == wxT("KOI8-R") ||
-                  cs == wxT("KOI8-RU") )
-        {
-            // although koi8-ru is not strictly speaking the same as koi8-r,
-            // they are similar enough to make mapping it to koi8 better than
-            // not recognizing it at all
-            encoding = wxFONTENCODING_KOI8;
-        }
-        else if ( cs == wxT("KOI8-U") )
-        {
-            encoding = wxFONTENCODING_KOI8_U;
-        }
-        else if ( cs.Left(3) == wxT("ISO") )
+        if ( cs.Left(3) == wxT("ISO") )
        {
            // the dash is optional (or, to be exact, it is not, but
            // several brokenmails "forget" it)
@ -712,7 +670,7 @@ wxString wxFontMapperBase::GetEncodingName(wxFontEncoding encoding)
    {
        if ( gs_encodings[i] == encoding )
        {
-            return gs_encodingNames[i];
+            return gs_encodingNames[i][0];
        }
    }

@ -722,6 +680,22 @@ wxString wxFontMapperBase::GetEncodingName(wxFontEncoding encoding)
    return str;
 }

+/* static */
+const wxChar** wxFontMapperBase::GetAllEncodingNames(wxFontEncoding encoding)
+{
+    static const wxChar* dummy[] = { NULL };
+
+    for ( size_t i = 0; i < WXSIZEOF(gs_encodingNames); i++ )
+    {
+        if ( gs_encodings[i] == encoding )
+        {
+            return gs_encodingNames[i];
+        }
+    }
+
+    return dummy;
+}
+
 /* static */
 wxFontEncoding wxFontMapperBase::GetEncodingFromName(const wxString& name)
 {
@ -729,9 +703,10 @@ wxFontEncoding wxFontMapperBase::GetEncodingFromName(const wxString& name)

    for ( size_t i = 0; i < count; i++ )
    {
-        if ( gs_encodingNames[i] == name )
+        for ( const wxChar** encName = gs_encodingNames[i]; *encName; ++encName )
        {
-            return gs_encodings[i];
+            if ( name == *encName )
+                return gs_encodings[i];
        }
    }

--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@ -2509,6 +2509,15 @@ void wxCSConv::SetName(const wxChar *charset)
    }
 }

+#if wxUSE_FONTMAP
+#include "wx/hashmap.h"
+
+WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
+                     wxEncodingNameCache );
+
+static wxEncodingNameCache gs_nameCache;
+#endif
+
 wxMBConv *wxCSConv::DoCreate() const
 {
    // check for the special case of ASCII or ISO8859-1 charset: as we have
@ -2535,17 +2544,53 @@ wxMBConv *wxCSConv::DoCreate() const
 #endif // !wxUSE_FONTMAP
    {
        wxString name(m_name);
+        wxFontEncoding encoding(m_encoding);
+
+        if ( !name.empty() )
+        {
+            wxMBConv_iconv *conv = new wxMBConv_iconv(name);
+            if ( conv->IsOk() )
+                return conv;
+
+            delete conv;

 #if wxUSE_FONTMAP
-        if ( name.empty() )
-            name = wxFontMapperBase::GetEncodingName(m_encoding);
+            encoding =
+                wxFontMapperBase::Get()->CharsetToEncoding(name, false);
 #endif // wxUSE_FONTMAP
+        }
+#if wxUSE_FONTMAP
+        {
+            const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
+            if ( it != gs_nameCache.end() )
+            {
+                if ( it->second.empty() )
+                    return NULL;

-        wxMBConv_iconv *conv = new wxMBConv_iconv(name);
-        if ( conv->IsOk() )
-            return conv;
+                wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
+                if ( conv->IsOk() )
+                    return conv;

-        delete conv;
+                delete conv;
+            }
+
+            const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
+
+            for ( ; *names; ++names )
+            {
+                wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
+                if ( conv->IsOk() )
+                {
+                    gs_nameCache[encoding] = *names;
+                    return conv;
+                }
+
+                delete conv;
+            }
+
+            gs_nameCache[encoding] = ""; // cache the failure
+        }
+#endif // wxUSE_FONTMAP
    }
 #endif // HAVE_ICONV