From 144e10eafbba82d8ac554bbd25afab112482f509 Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Mon, 10 Jul 2023 16:27:13 +0200 Subject: [PATCH] Treat simple spaces as equivalent in date-time format separators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user might not be aware of, or able to see, the difference between Unicode's assorted horizontal spacing characters, leading them to expect their input to be accepted for a format despite differences in spacing. So treat the various horizontal spacing (other than tab) characters as equivalent when matching the separators in a date-time format. Add a test-case that failed before this fix. Fixes: QTBUG-114909 Pick-to: 6.6 6.5 Change-Id: I3e798d3e5b89adb8e86168ebd3954904b258d630 Reviewed-by: Qt CI Bot Reviewed-by: MÃ¥rten Nordheim Reviewed-by: Ievgenii Meshcheriakov --- src/corelib/time/qdatetimeparser.cpp | 26 +++++++++++++++++-- .../auto/corelib/text/qlocale/tst_qlocale.cpp | 4 +++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/corelib/time/qdatetimeparser.cpp b/src/corelib/time/qdatetimeparser.cpp index f8709f6b71..93ee63207c 100644 --- a/src/corelib/time/qdatetimeparser.cpp +++ b/src/corelib/time/qdatetimeparser.cpp @@ -728,11 +728,33 @@ int QDateTimeParser::sectionMaxSize(int index) const } // Separator matching +// +// QTBUG-114909: user may be oblivious to difference between visibly +// indistinguishable spacing characters. For now we only treat horizontal +// spacing characters, excluding tab, as equivalent. + static int matchesSeparator(QStringView text, QStringView separator) { + const auto isSimpleSpace = [](char32_t ch) { + // Distinguish tab, CR and the vertical spaces from the rest: + return ch == u' ' || (ch > 127 && QChar::isSpace(ch)); + }; // -1 if not a match, else length of prefix of text that does match. - // For now, just check for exact match: - return text.startsWith(separator) ? separator.size() : -1; + // First check for exact match + if (!text.startsWith(separator)) { + // Failing that, check for space-identifying match: + QStringIterator given(text), sep(separator); + while (sep.hasNext()) { + if (!given.hasNext()) + return -1; + char32_t s = sep.next(), g = given.next(); + if (s != g && !(isSimpleSpace(s) && isSimpleSpace(g))) + return -1; + } + // One side may have used a surrogate pair space where the other didn't: + return given.index(); + } + return separator.size(); } /*! diff --git a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp index 5de653138b..97eb99d3a1 100644 --- a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp +++ b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp @@ -2431,6 +2431,10 @@ void tst_QLocale::toTime_data() << usa << QTime(16, 43, 32) << u"h:mm:ss AP "_s << u"4:43:32 PM "_s << true; QTest::newRow("shortFormat-PM") << usa << QTime(16, 43) << u"h:mm AP "_s << u"4:43 PM "_s << true; + // Some locales use a narrow non-breaking space as separator, but + // the user can't see the difference from a space (QTBUG-114909): + QTest::newRow("shortFormat-AM-mixspace") + << usa << QTime(4, 43) << u"h:mm\u202F" "AP "_s << u"4:43 AM "_s << true; // Parsing am/pm indicators case-insensitively: const QLocale czech{QLocale::Czech, QLocale::Czechia};