Add a test checking that conversions involving surrogates work
After the fixes in the previous commits conversions between wchar_t containing surrogates and UTF-{8,16,32} work correctly, so add a test ensuring that this is the case. Notice that other conversions are still broken in presence of surrogates. See #17070.
This commit is contained in:
parent
e570e8b6ac
commit
0c02d70fa5
@ -81,6 +81,7 @@ private:
|
|||||||
CPPUNIT_TEST( FontmapTests );
|
CPPUNIT_TEST( FontmapTests );
|
||||||
CPPUNIT_TEST( BufSize );
|
CPPUNIT_TEST( BufSize );
|
||||||
CPPUNIT_TEST( FromWCharTests );
|
CPPUNIT_TEST( FromWCharTests );
|
||||||
|
CPPUNIT_TEST( NonBMPCharTests );
|
||||||
#ifdef HAVE_WCHAR_H
|
#ifdef HAVE_WCHAR_H
|
||||||
CPPUNIT_TEST( UTF8_41 );
|
CPPUNIT_TEST( UTF8_41 );
|
||||||
CPPUNIT_TEST( UTF8_7f );
|
CPPUNIT_TEST( UTF8_7f );
|
||||||
@ -116,6 +117,7 @@ private:
|
|||||||
void FontmapTests();
|
void FontmapTests();
|
||||||
void BufSize();
|
void BufSize();
|
||||||
void FromWCharTests();
|
void FromWCharTests();
|
||||||
|
void NonBMPCharTests();
|
||||||
void IconvTests();
|
void IconvTests();
|
||||||
void Latin1Tests();
|
void Latin1Tests();
|
||||||
|
|
||||||
@ -940,6 +942,86 @@ void MBConvTestCase::FromWCharTests()
|
|||||||
CPPUNIT_ASSERT_EQUAL( '!', mbuf[6]);
|
CPPUNIT_ASSERT_EQUAL( '!', mbuf[6]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MBConvTestCase::NonBMPCharTests()
|
||||||
|
{
|
||||||
|
// U+1F363 (UTF-16: D83C DF63, UTF-8: F0 9F 8D A3) sushi (emoji)
|
||||||
|
// U+732B (UTF-8: E7 8C AB) cat (kanji)
|
||||||
|
// U+1F408 (UTF-16: D83D DC08, UTF-8: F0 9F 90 88) cat (emoji)
|
||||||
|
// U+845B U+E0101 (UTF-16: 845B DB40 DD01, UTF-8: E8 91 9B F3 A0 84 81) (a kanji + an IVS)
|
||||||
|
const char u8[] =
|
||||||
|
"\xF0\x9F\x8D\xA3" /* U+1F363 */
|
||||||
|
"\xE7\x8C\xAB\xF0\x9F\x90\x88" /* U+732B U+1F408 */
|
||||||
|
"\xE8\x91\x9B\xF3\xA0\x84\x81"; /* U+845B U+E0101 */
|
||||||
|
const wxChar16 u16[] = {
|
||||||
|
0xD83C, 0xDF63,
|
||||||
|
0x732B, 0xD83D, 0xDC08,
|
||||||
|
0x845B, 0xDB40, 0xDD01,
|
||||||
|
0};
|
||||||
|
const wxChar32 u32[] = {
|
||||||
|
0x1F363,
|
||||||
|
0x732B, 0x1F408,
|
||||||
|
0x845B, 0xE0101,
|
||||||
|
0};
|
||||||
|
#if SIZEOF_WCHAR_T == 2
|
||||||
|
const wchar_t *const w = u16;
|
||||||
|
const size_t wchars = sizeof(u16)/sizeof(wxChar16) - 1;
|
||||||
|
#else
|
||||||
|
const wchar_t *const w = u32;
|
||||||
|
const size_t wchars = sizeof(u32)/sizeof(wxChar32) - 1;
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
// Notice that these tests can only be done with strict UTF-8
|
||||||
|
// converter, the use of any MAP_INVALID_UTF8_XXX options currently
|
||||||
|
// completely breaks wxTextInputStream use.
|
||||||
|
TestDecoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
|
||||||
|
TestEncoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
char u16le[sizeof(u16)];
|
||||||
|
for (size_t i = 0; i < sizeof(u16)/2; ++i) {
|
||||||
|
u16le[2*i] = (char)(unsigned char)(u16[i] & 0xFF);
|
||||||
|
u16le[2*i+1] = (char)(unsigned char)((u16[i] >> 8) & 0xFF);
|
||||||
|
}
|
||||||
|
wxMBConvUTF16LE conv;
|
||||||
|
TestDecoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
|
||||||
|
TestEncoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
char u16be[sizeof(u16)];
|
||||||
|
for (size_t i = 0; i < sizeof(u16)/2; ++i) {
|
||||||
|
u16be[2*i] = (char)(unsigned char)((u16[i] >> 8) & 0xFF);
|
||||||
|
u16be[2*i+1] = (char)(unsigned char)(u16[i] & 0xFF);
|
||||||
|
}
|
||||||
|
wxMBConvUTF16BE conv;
|
||||||
|
TestDecoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
|
||||||
|
TestEncoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
char u32le[sizeof(u32)];
|
||||||
|
for (size_t i = 0; i < sizeof(u32)/4; ++i) {
|
||||||
|
u32le[4*i] = (char)(unsigned char)(u32[i] & 0xFF);
|
||||||
|
u32le[4*i+1] = (char)(unsigned char)((u32[i] >> 8) & 0xFF);
|
||||||
|
u32le[4*i+2] = (char)(unsigned char)((u32[i] >> 16) & 0xFF);
|
||||||
|
u32le[4*i+3] = (char)(unsigned char)((u32[i] >> 24) & 0xFF);
|
||||||
|
}
|
||||||
|
wxMBConvUTF32LE conv;
|
||||||
|
TestDecoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
|
||||||
|
TestEncoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
char u32be[sizeof(u32)];
|
||||||
|
for (size_t i = 0; i < sizeof(u32)/4; ++i) {
|
||||||
|
u32be[4*i] = (char)(unsigned char)((u32[i] >> 24) & 0xFF);
|
||||||
|
u32be[4*i+1] = (char)(unsigned char)((u32[i] >> 16) & 0xFF);
|
||||||
|
u32be[4*i+2] = (char)(unsigned char)((u32[i] >> 8) & 0xFF);
|
||||||
|
u32be[4*i+3] = (char)(unsigned char)(u32[i] & 0xFF);
|
||||||
|
}
|
||||||
|
wxMBConvUTF32BE conv;
|
||||||
|
TestDecoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4);
|
||||||
|
TestEncoder(w, wchars, u32be, sizeof(u32be)-4, conv, 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name );
|
WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const char* name );
|
||||||
|
|
||||||
void MBConvTestCase::IconvTests()
|
void MBConvTestCase::IconvTests()
|
||||||
|
Loading…
Reference in New Issue
Block a user