Add a test checking that conversions involving surrogates work

After the fixes in the previous commits conversions between wchar_t containing surrogates and UTF-{8,16,32} work correctly, so add a test ensuring that this is the case. Notice that other conversions are still broken in presence of surrogates. See #17070.
2015-11-13 19:19:28 +01:00 · 2015-11-13 19:19:28 +01:00 · 0c02d70fa5
commit 0c02d70fa5
parent e570e8b6ac
1 changed files with 82 additions and 0 deletions
--- a/tests/mbconv/mbconvtest.cpp
+++ b/tests/mbconv/mbconvtest.cpp
@ -81,6 +81,7 @@ private:
        CPPUNIT_TEST( FontmapTests );
        CPPUNIT_TEST( BufSize );
        CPPUNIT_TEST( FromWCharTests );
+        CPPUNIT_TEST( NonBMPCharTests );
 #ifdef HAVE_WCHAR_H
        CPPUNIT_TEST( UTF8_41 );
        CPPUNIT_TEST( UTF8_7f );
@ -116,6 +117,7 @@ private:
    void FontmapTests();
    void BufSize();
    void FromWCharTests();
+    void NonBMPCharTests();
    void IconvTests();
    void Latin1Tests();

@ -940,6 +942,86 @@ void MBConvTestCase::FromWCharTests()
    CPPUNIT_ASSERT_EQUAL( '!', mbuf[6]);
 }

+void MBConvTestCase::NonBMPCharTests()
+{
+    // U+1F363 (UTF-16: D83C DF63, UTF-8: F0 9F 8D A3) sushi (emoji)
+    // U+732B (UTF-8: E7 8C AB) cat (kanji)
+    // U+1F408 (UTF-16: D83D DC08, UTF-8: F0 9F 90 88) cat (emoji)
+    // U+845B U+E0101 (UTF-16: 845B DB40 DD01, UTF-8: E8 91 9B F3 A0 84 81) (a kanji + an IVS)
+    const char u8[] =
+        "\xF0\x9F\x8D\xA3" /* U+1F363 */
+        "\xE7\x8C\xAB\xF0\x9F\x90\x88" /* U+732B U+1F408 */
+        "\xE8\x91\x9B\xF3\xA0\x84\x81"; /* U+845B U+E0101 */
+    const wxChar16 u16[] = {
+        0xD83C, 0xDF63,
+        0x732B, 0xD83D, 0xDC08,
+        0x845B, 0xDB40, 0xDD01,
+        0};
+    const wxChar32 u32[] = {
+        0x1F363,
+        0x732B, 0x1F408,
+        0x845B, 0xE0101,
+        0};
+#if SIZEOF_WCHAR_T == 2
+    const wchar_t *const w = u16;
+    const size_t wchars = sizeof(u16)/sizeof(wxChar16) - 1;
+#else
+    const wchar_t *const w = u32;
+    const size_t wchars = sizeof(u32)/sizeof(wxChar32) - 1;
+#endif
+    {
+        // Notice that these tests can only be done with strict UTF-8
+        // converter, the use of any MAP_INVALID_UTF8_XXX options currently
+        // completely breaks wxTextInputStream use.
+        TestDecoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
+        TestEncoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
+    }
+    {
+        char u16le[sizeof(u16)];
+        for (size_t i = 0; i < sizeof(u16)/2; ++i) {
+            u16le[2*i]   = (char)(unsigned char)(u16[i] & 0xFF);
+            u16le[2*i+1] = (char)(unsigned char)((u16[i] >> 8) & 0xFF);
+        }
+        wxMBConvUTF16LE conv;
+        TestDecoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
+        TestEncoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
+    }
+    {
+        char u16be[sizeof(u16)];
+        for (size_t i = 0; i < sizeof(u16)/2; ++i) {
+            u16be[2*i]   = (char)(unsigned char)((u16[i] >> 8) & 0xFF);
+            u16be[2*i+1] = (char)(unsigned char)(u16[i] & 0xFF);
+        }
+        wxMBConvUTF16BE conv;
+        TestDecoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
+        TestEncoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
+    }
+    {
+        char u32le[sizeof(u32)];
+        for (size_t i = 0; i < sizeof(u32)/4; ++i) {
+            u32le[4*i]   = (char)(unsigned char)(u32[i] & 0xFF);
+            u32le[4*i+1] = (char)(unsigned char)((u32[i] >> 8) & 0xFF);
+            u32le[4*i+2] = (char)(unsigned char)((u32[i] >> 16) & 0xFF);
+            u32le[4*i+3] = (char)(unsigned char)((u32[i] >> 24) & 0xFF);
+        }
+        wxMBConvUTF32LE conv;
+        TestDecoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
+        TestEncoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
+    }
+    {
+        char u32be[sizeof(u32)];
+        for (size_t i = 0; i < sizeof(u32)/4; ++i) {
+            u32be[4*i]   = (char)(unsigned char)((u32[i] >> 24) & 0xFF);
+            u32be[4*i+1] = (char)(unsigned char)((u32[i] >> 16) & 0xFF);
+            u32be[4*i+2] = (char)(unsigned char)((u32[i] >> 8) & 0xFF);
+            u32be[4*i+3] = (char)(unsigned char)(u32[i] & 0xFF);
+        }
+        wxMBConvUTF32BE conv;
+        TestDecoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4);
+        TestEncoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4);
+    }
+}
+
 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name );

 void MBConvTestCase::IconvTests()