Add more wxConvAuto test cases

2020-09-28 21:02:46 +03:00 · 2020-09-28 21:02:46 +03:00 · 857950c626
commit 857950c626
parent 3676635231
1 changed files with 79 additions and 14 deletions
--- a/tests/mbconv/convautotest.cpp
+++ b/tests/mbconv/convautotest.cpp
@ -34,7 +34,9 @@ public:

 private:
    CPPUNIT_TEST_SUITE( ConvAutoTestCase );
+        CPPUNIT_TEST( Init );
        CPPUNIT_TEST( Empty );
+        CPPUNIT_TEST( Encode );
        CPPUNIT_TEST( Short );
        CPPUNIT_TEST( None );
        CPPUNIT_TEST( UTF32LE );
@ -42,12 +44,17 @@ private:
        CPPUNIT_TEST( UTF16LE );
        CPPUNIT_TEST( UTF16BE );
        CPPUNIT_TEST( UTF8 );
+        CPPUNIT_TEST( UTF8NoBom );
+        CPPUNIT_TEST( Fallback );
+        CPPUNIT_TEST( FallbackMultibyte );
        CPPUNIT_TEST( StreamUTF8NoBOM );
        CPPUNIT_TEST( StreamUTF8 );
        CPPUNIT_TEST( StreamUTF16LE );
        CPPUNIT_TEST( StreamUTF16BE );
        CPPUNIT_TEST( StreamUTF32LE );
        CPPUNIT_TEST( StreamUTF32BE );
+        CPPUNIT_TEST( StreamFallback );
+        CPPUNIT_TEST( StreamFallbackMultibyte );
    CPPUNIT_TEST_SUITE_END();

    // expected converter state, UTF-8 without BOM by default
@ -76,9 +83,13 @@ private:
    //
    // the length of the string may need to be passed explicitly if it has
    // embedded NULs, otherwise it's not necessary
-    void TestFirstChar(const char *src, wchar_t wch, size_t len = wxNO_LEN, ConvState st = ConvState());
+    void TestFirstChar(const char *src, wchar_t wch, size_t len = wxNO_LEN,
+                       ConvState st = ConvState(),
+                       wxFontEncoding fe = wxFONTENCODING_DEFAULT);

+    void Init();
    void Empty();
+    void Encode();
    void Short();
    void None();
    void UTF32LE();
@ -86,12 +97,16 @@ private:
    void UTF16LE();
    void UTF16BE();
    void UTF8();
+    void UTF8NoBom();
+    void Fallback();
+    void FallbackMultibyte();

    // test whether two lines of text are converted properly from a stream
    void TestTextStream(const char *src,
                        size_t srclength,
                        const wxString& line1,
-                        const wxString& line2);
+                        const wxString& line2,
+                        wxFontEncoding fe = wxFONTENCODING_DEFAULT);

    void StreamUTF8NoBOM();
    void StreamUTF8();
@ -99,6 +114,8 @@ private:
    void StreamUTF16BE();
    void StreamUTF32LE();
    void StreamUTF32BE();
+    void StreamFallback();
+    void StreamFallbackMultibyte();
 };

 // register in the unnamed registry so that these tests are run by default
@ -111,15 +128,21 @@ CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(ConvAutoTestCase, "ConvAutoTestCase");
 // tests
 // ----------------------------------------------------------------------------

-void ConvAutoTestCase::TestFirstChar(const char *src, wchar_t wch, size_t len, ConvState st)
+void ConvAutoTestCase::TestFirstChar(const char *src, wchar_t wch, size_t len,
+                                     ConvState st, wxFontEncoding fe)
 {
-    wxConvAuto conv;
+    wxConvAuto conv(fe);
    wxWCharBuffer wbuf = conv.cMB2WC(src, len, NULL);
    CPPUNIT_ASSERT( wbuf );
    CPPUNIT_ASSERT_EQUAL( wch, *wbuf );
    st.Check(conv);
 }

+void ConvAutoTestCase::Init()
+{
+    ConvState(wxBOM_Unknown, wxFONTENCODING_MAX).Check(wxConvAuto());
+}
+
 void ConvAutoTestCase::Empty()
 {
    wxConvAuto conv;
@ -127,6 +150,16 @@ void ConvAutoTestCase::Empty()
    ConvState(wxBOM_Unknown, wxFONTENCODING_MAX).Check(conv);
 }

+void ConvAutoTestCase::Encode()
+{
+    wxConvAuto conv;
+    wxString str = wxString::FromUTF8("\xd0\x9f\xe3\x81\x82");
+    wxCharBuffer buf = conv.cWC2MB(str.wc_str());
+    CPPUNIT_ASSERT( buf );
+    CPPUNIT_ASSERT_EQUAL( str, wxString::FromUTF8(buf) );
+    ConvState(wxBOM_Unknown, wxFONTENCODING_UTF8).Check(conv);
+}
+
 void ConvAutoTestCase::Short()
 {
    TestFirstChar("1", wxT('1'));
@ -164,13 +197,39 @@ void ConvAutoTestCase::UTF8()
 #endif
 }

+void ConvAutoTestCase::UTF8NoBom()
+{
+#ifdef wxHAVE_U_ESCAPE
+    TestFirstChar("\xd0\x9f\xe3\x81\x82", L'\u041f', wxNO_LEN, ConvState(wxBOM_None, wxFONTENCODING_UTF8));
+#endif
+}
+
+void ConvAutoTestCase::Fallback()
+{
+#ifdef wxHAVE_U_ESCAPE
+    TestFirstChar("\xbf", L'\u041f', wxNO_LEN,
+                  ConvState(wxBOM_None, wxFONTENCODING_ISO8859_5, true),
+                  wxFONTENCODING_ISO8859_5);
+#endif
+}
+
+void ConvAutoTestCase::FallbackMultibyte()
+{
+#ifdef wxHAVE_U_ESCAPE
+    TestFirstChar("\x84\x50", L'\u041f', wxNO_LEN,
+                  ConvState(wxBOM_None, wxFONTENCODING_CP932, true),
+                  wxFONTENCODING_CP932);
+#endif
+}
+
 void ConvAutoTestCase::TestTextStream(const char *src,
                                      size_t srclength,
                                      const wxString& line1,
-                                      const wxString& line2)
+                                      const wxString& line2,
+                                      wxFontEncoding fe)
 {
    wxMemoryInputStream instream(src, srclength);
-    wxTextInputStream text(instream);
+    wxTextInputStream text(instream, wxT(" \t"), wxConvAuto(fe));

    CPPUNIT_ASSERT_EQUAL( line1, text.ReadLine() );
    CPPUNIT_ASSERT_EQUAL( line2, text.ReadLine() );
@ -191,16 +250,8 @@ const wxString line2 = wxString::FromUTF8("\xce\xb2");

 void ConvAutoTestCase::StreamUTF8NoBOM()
 {
-    // currently this test doesn't work because without the BOM wxConvAuto
-    // decides that the string is in Latin-1 after finding the first (but not
-    // the two subsequent ones which are part of the same UTF-8 sequence!)
-    // 8-bit character
-    //
-    // FIXME: we need to fix this at wxTextInputStream level, see #11570
-#if 0
    TestTextStream("\x61\xE3\x81\x82\x0A\xCE\xB2",
                   7, line1, line2);
-#endif
 }

 void ConvAutoTestCase::StreamUTF8()
@ -235,4 +286,18 @@ void ConvAutoTestCase::StreamUTF32BE()
                   20, line1, line2);
 }

+void ConvAutoTestCase::StreamFallback()
+{
+    // this only works if there are at least 3 bytes after the first non-ASCII character
+    TestTextStream("\x61\xbf\x0A\xe0\x7a",
+                   5, wxString::FromUTF8("a\xd0\x9f"), wxString::FromUTF8("\xd1\x80z"),
+                   wxFONTENCODING_ISO8859_5);
+}
+
+void ConvAutoTestCase::StreamFallbackMultibyte()
+{
+    TestTextStream("\x61\x82\xa0\x0A\x83\xc0",
+                   6, line1, line2, wxFONTENCODING_CP932);
+}
+
 #endif // wxUSE_UNICODE