From c7fd48db6b9f7b1116417ee4836e9c73483613d3 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Thu, 29 May 2014 23:48:46 +0000 Subject: [PATCH] Correct handling of the characters outside of the BMP in wxSTC. The code mapping positions for the units of the UTF-13/32 string used by wxWidgets to positions for the units of the UTF-8 string used by Scintilla didn't work correctly for the characters outside of the BMP, i.e. Unicode code points >= 0x10000. See #15621. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@76623 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- docs/changes.txt | 1 + src/stc/PlatWX.cpp | 40 ++++++++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 788052af2c..1fce2b1419 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -56,6 +56,7 @@ All (GUI): - Add support for loading old V1 BMP files to wxImage (Artur Wieczorek). - Improve auto sizing of wrapped cells in wxGrid (iwbnwif). - Fix handling of rectangular selections in wxStyledTextCtrl (roberto). +- Fix characters outside of the BMP in wxStyledTextCtrl (Thomas Goyne). wxGTK: diff --git a/src/stc/PlatWX.cpp b/src/stc/PlatWX.cpp index e6b34535e7..953047059f 100644 --- a/src/stc/PlatWX.cpp +++ b/src/stc/PlatWX.cpp @@ -522,23 +522,31 @@ void SurfaceImpl::MeasureWidths(Font &font, const char *s, int len, XYPOSITION * hdc->GetPartialTextExtents(str, tpos); #if wxUSE_UNICODE - // Map the widths for UCS-2 characters back to the UTF-8 input string - // NOTE: I don't think this is right for when sizeof(wxChar) > 2, ie wxGTK2 - // so figure it out and fix it! - size_t i = 0; - size_t ui = 0; - while ((int)i < len) { - unsigned char uch = (unsigned char)s[i]; - positions[i++] = tpos[ui]; - if (uch >= 0x80) { - if (uch < (0x80 + 0x40 + 0x20)) { - positions[i++] = tpos[ui]; - } else { - positions[i++] = tpos[ui]; - positions[i++] = tpos[ui]; - } + // Map the widths back to the UTF-8 input string + size_t utf8i = 0; + for (size_t wxi = 0; wxi < str.size(); ++wxi) { + wxUniChar c = str[wxi]; + +#if SIZEOF_WCHAR_T == 2 + // For surrogate pairs, the position for the lead surrogate is garbage + // and we need to use the position of the trail surrogate for all four bytes + if (c >= 0xD800 && c < 0xE000 && wxi + 1 < str.size()) { + ++wxi; + positions[utf8i++] = tpos[wxi]; + positions[utf8i++] = tpos[wxi]; + positions[utf8i++] = tpos[wxi]; + positions[utf8i++] = tpos[wxi]; + continue; } - ui++; +#endif + + positions[utf8i++] = tpos[wxi]; + if (c >= 0x80) + positions[utf8i++] = tpos[wxi]; + if (c >= 0x800) + positions[utf8i++] = tpos[wxi]; + if (c >= 0x10000) + positions[utf8i++] = tpos[wxi]; } #else // !wxUSE_UNICODE // If not unicode then just use the widths we have