Add a test checking that conversions involving surrogates work

After the fixes in the previous commits conversions between wchar_t containing surrogates and UTF-{8,16,32} work correctly, so add a test ensuring that this is the case. Notice that other conversions are still broken in presence of surrogates. See #17070.
2015-11-13 19:19:28 +01:00 · 2015-11-13 19:19:28 +01:00 · 0c02d70fa5
commit 0c02d70fa5
parent e570e8b6ac
1 changed files with 82 additions and 0 deletions
--- a/tests/mbconv/mbconvtest.cpp
+++ b/tests/mbconv/mbconvtest.cpp
@ -81,6 +81,7 @@ private:
        CPPUNIT_TEST( FontmapTests );
        CPPUNIT_TEST( BufSize );
        CPPUNIT_TEST( FromWCharTests );
        CPPUNIT_TEST( NonBMPCharTests );
 #ifdef HAVE_WCHAR_H
        CPPUNIT_TEST( UTF8_41 );
        CPPUNIT_TEST( UTF8_7f );
@ -116,6 +117,7 @@ private:
    void FontmapTests();
    void BufSize();
    void FromWCharTests();
    void NonBMPCharTests();
    void IconvTests();
    void Latin1Tests();
@ -940,6 +942,86 @@ void MBConvTestCase::FromWCharTests()
    CPPUNIT_ASSERT_EQUAL( '!', mbuf[6]);
 }
 void MBConvTestCase::NonBMPCharTests()
 {
    // U+1F363 (UTF-16: D83C DF63, UTF-8: F0 9F 8D A3) sushi (emoji)
    // U+732B (UTF-8: E7 8C AB) cat (kanji)
    // U+1F408 (UTF-16: D83D DC08, UTF-8: F0 9F 90 88) cat (emoji)
    // U+845B U+E0101 (UTF-16: 845B DB40 DD01, UTF-8: E8 91 9B F3 A0 84 81) (a kanji + an IVS)
    const char u8[] =
        "\xF0\x9F\x8D\xA3" /* U+1F363 */
        "\xE7\x8C\xAB\xF0\x9F\x90\x88" /* U+732B U+1F408 */
        "\xE8\x91\x9B\xF3\xA0\x84\x81"; /* U+845B U+E0101 */
    const wxChar16 u16[] = {
        0xD83C, 0xDF63,
        0x732B, 0xD83D, 0xDC08,
        0x845B, 0xDB40, 0xDD01,
        0};
    const wxChar32 u32[] = {
        0x1F363,
        0x732B, 0x1F408,
        0x845B, 0xE0101,
        0};
 #if SIZEOF_WCHAR_T == 2
    const wchar_t *const w = u16;
    const size_t wchars = sizeof(u16)/sizeof(wxChar16) - 1;
 #else
    const wchar_t *const w = u32;
    const size_t wchars = sizeof(u32)/sizeof(wxChar32) - 1;
 #endif
    {
        // Notice that these tests can only be done with strict UTF-8
        // converter, the use of any MAP_INVALID_UTF8_XXX options currently
        // completely breaks wxTextInputStream use.
        TestDecoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
        TestEncoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
    }
    {
        char u16le[sizeof(u16)];
        for (size_t i = 0; i < sizeof(u16)/2; ++i) {
            u16le[2*i]   = (char)(unsigned char)(u16[i] & 0xFF);
            u16le[2*i+1] = (char)(unsigned char)((u16[i] >> 8) & 0xFF);
        }
        wxMBConvUTF16LE conv;
        TestDecoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
        TestEncoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
    }
    {
        char u16be[sizeof(u16)];
        for (size_t i = 0; i < sizeof(u16)/2; ++i) {
            u16be[2*i]   = (char)(unsigned char)((u16[i] >> 8) & 0xFF);
            u16be[2*i+1] = (char)(unsigned char)(u16[i] & 0xFF);
        }
        wxMBConvUTF16BE conv;
        TestDecoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
        TestEncoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
    }
    {
        char u32le[sizeof(u32)];
        for (size_t i = 0; i < sizeof(u32)/4; ++i) {
            u32le[4*i]   = (char)(unsigned char)(u32[i] & 0xFF);
            u32le[4*i+1] = (char)(unsigned char)((u32[i] >> 8) & 0xFF);
            u32le[4*i+2] = (char)(unsigned char)((u32[i] >> 16) & 0xFF);
            u32le[4*i+3] = (char)(unsigned char)((u32[i] >> 24) & 0xFF);
        }
        wxMBConvUTF32LE conv;
        TestDecoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
        TestEncoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
    }
    {
        char u32be[sizeof(u32)];
        for (size_t i = 0; i < sizeof(u32)/4; ++i) {
            u32be[4*i]   = (char)(unsigned char)((u32[i] >> 24) & 0xFF);
            u32be[4*i+1] = (char)(unsigned char)((u32[i] >> 16) & 0xFF);
            u32be[4*i+2] = (char)(unsigned char)((u32[i] >> 8) & 0xFF);
            u32be[4*i+3] = (char)(unsigned char)(u32[i] & 0xFF);
        }
        wxMBConvUTF32BE conv;
        TestDecoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4);
        TestEncoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4);
    }
 }
 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name );
 void MBConvTestCase::IconvTests()