Add a test checking that conversions involving surrogates work

After the fixes in the previous commits conversions between wchar_t containing
surrogates and UTF-{8,16,32} work correctly, so add a test ensuring that this
is the case.

Notice that other conversions are still broken in presence of surrogates.

See #17070.
This commit is contained in:
ARATA Mizuki 2015-11-13 19:19:28 +01:00 committed by Vadim Zeitlin
parent e570e8b6ac
commit 0c02d70fa5

View File

@ -81,6 +81,7 @@ private:
CPPUNIT_TEST( FontmapTests );
CPPUNIT_TEST( BufSize );
CPPUNIT_TEST( FromWCharTests );
CPPUNIT_TEST( NonBMPCharTests );
#ifdef HAVE_WCHAR_H
CPPUNIT_TEST( UTF8_41 );
CPPUNIT_TEST( UTF8_7f );
@ -116,6 +117,7 @@ private:
void FontmapTests();
void BufSize();
void FromWCharTests();
void NonBMPCharTests();
void IconvTests();
void Latin1Tests();
@ -940,6 +942,86 @@ void MBConvTestCase::FromWCharTests()
CPPUNIT_ASSERT_EQUAL( '!', mbuf[6]);
}
void MBConvTestCase::NonBMPCharTests()
{
// U+1F363 (UTF-16: D83C DF63, UTF-8: F0 9F 8D A3) sushi (emoji)
// U+732B (UTF-8: E7 8C AB) cat (kanji)
// U+1F408 (UTF-16: D83D DC08, UTF-8: F0 9F 90 88) cat (emoji)
// U+845B U+E0101 (UTF-16: 845B DB40 DD01, UTF-8: E8 91 9B F3 A0 84 81) (a kanji + an IVS)
const char u8[] =
"\xF0\x9F\x8D\xA3" /* U+1F363 */
"\xE7\x8C\xAB\xF0\x9F\x90\x88" /* U+732B U+1F408 */
"\xE8\x91\x9B\xF3\xA0\x84\x81"; /* U+845B U+E0101 */
const wxChar16 u16[] = {
0xD83C, 0xDF63,
0x732B, 0xD83D, 0xDC08,
0x845B, 0xDB40, 0xDD01,
0};
const wxChar32 u32[] = {
0x1F363,
0x732B, 0x1F408,
0x845B, 0xE0101,
0};
#if SIZEOF_WCHAR_T == 2
const wchar_t *const w = u16;
const size_t wchars = sizeof(u16)/sizeof(wxChar16) - 1;
#else
const wchar_t *const w = u32;
const size_t wchars = sizeof(u32)/sizeof(wxChar32) - 1;
#endif
{
// Notice that these tests can only be done with strict UTF-8
// converter, the use of any MAP_INVALID_UTF8_XXX options currently
// completely breaks wxTextInputStream use.
TestDecoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
TestEncoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
}
{
char u16le[sizeof(u16)];
for (size_t i = 0; i < sizeof(u16)/2; ++i) {
u16le[2*i] = (char)(unsigned char)(u16[i] & 0xFF);
u16le[2*i+1] = (char)(unsigned char)((u16[i] >> 8) & 0xFF);
}
wxMBConvUTF16LE conv;
TestDecoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
TestEncoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
}
{
char u16be[sizeof(u16)];
for (size_t i = 0; i < sizeof(u16)/2; ++i) {
u16be[2*i] = (char)(unsigned char)((u16[i] >> 8) & 0xFF);
u16be[2*i+1] = (char)(unsigned char)(u16[i] & 0xFF);
}
wxMBConvUTF16BE conv;
TestDecoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
TestEncoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
}
{
char u32le[sizeof(u32)];
for (size_t i = 0; i < sizeof(u32)/4; ++i) {
u32le[4*i] = (char)(unsigned char)(u32[i] & 0xFF);
u32le[4*i+1] = (char)(unsigned char)((u32[i] >> 8) & 0xFF);
u32le[4*i+2] = (char)(unsigned char)((u32[i] >> 16) & 0xFF);
u32le[4*i+3] = (char)(unsigned char)((u32[i] >> 24) & 0xFF);
}
wxMBConvUTF32LE conv;
TestDecoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
TestEncoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
}
{
char u32be[sizeof(u32)];
for (size_t i = 0; i < sizeof(u32)/4; ++i) {
u32be[4*i] = (char)(unsigned char)((u32[i] >> 24) & 0xFF);
u32be[4*i+1] = (char)(unsigned char)((u32[i] >> 16) & 0xFF);
u32be[4*i+2] = (char)(unsigned char)((u32[i] >> 8) & 0xFF);
u32be[4*i+3] = (char)(unsigned char)(u32[i] & 0xFF);
}
wxMBConvUTF32BE conv;
TestDecoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4);
TestEncoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4);
}
}
WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name );
void MBConvTestCase::IconvTests()