From ee635571122e1dd9b77276afb0f642e7ac9a015a Mon Sep 17 00:00:00 2001 From: Marc Mutz Date: Wed, 1 Apr 2020 15:28:29 +0200 Subject: [PATCH] QString/View: add tokenize() member functions [ChangeLog][QtCore][QString, QStringView, QLatin1String] Added tokenize(). Change-Id: I5fbeab0ac1809ff2974e565129b61a6bdfb398bc Reviewed-by: Qt CI Bot Reviewed-by: Volker Hilsheimer Reviewed-by: Lars Knoll --- src/corelib/text/qstring.h | 25 +++ src/corelib/text/qstringview.cpp | 42 +++++ src/corelib/text/qstringview.h | 8 +- .../tst_qstringapisymmetry.cpp | 23 +++ .../corelib/text/qstringview/qstringview.pro | 1 + .../text/qstringview/tst_qstringview.cpp | 167 +++++++++++++++++- 6 files changed, 264 insertions(+), 2 deletions(-) diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h index b06afccc74..54aacfb298 100644 --- a/src/corelib/text/qstring.h +++ b/src/corelib/text/qstring.h @@ -3,6 +3,7 @@ ** Copyright (C) 2016 The Qt Company Ltd. ** Copyright (C) 2019 Intel Corporation. ** Copyright (C) 2019 Mail.ru Group. +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -196,6 +197,12 @@ public: Q_REQUIRED_RESULT QLatin1String trimmed() const noexcept { return QtPrivate::trimmed(*this); } + template + Q_REQUIRED_RESULT inline constexpr auto tokenize(Needle &&needle, Flags...flags) const + noexcept(noexcept(qTokenize(std::declval(), std::forward(needle), flags...))) + -> decltype(qTokenize(*this, std::forward(needle), flags...)) + { return qTokenize(*this, std::forward(needle), flags...); } + inline bool operator==(const QString &s) const noexcept; inline bool operator!=(const QString &s) const noexcept; inline bool operator>(const QString &s) const noexcept; @@ -634,6 +641,24 @@ public: Qt::SplitBehavior behavior = Qt::KeepEmptyParts) const; #endif + template + Q_REQUIRED_RESULT inline auto tokenize(Needle &&needle, Flags...flags) const & + noexcept(noexcept(qTokenize(std::declval(), std::forward(needle), flags...))) + -> decltype(qTokenize(*this, std::forward(needle), flags...)) + { return qTokenize(qToStringViewIgnoringNull(*this), std::forward(needle), flags...); } + + template + Q_REQUIRED_RESULT inline auto tokenize(Needle &&needle, Flags...flags) const && + noexcept(noexcept(qTokenize(std::declval(), std::forward(needle), flags...))) + -> decltype(qTokenize(std::move(*this), std::forward(needle), flags...)) + { return qTokenize(std::move(*this), std::forward(needle), flags...); } + + template + Q_REQUIRED_RESULT inline auto tokenize(Needle &&needle, Flags...flags) && + noexcept(noexcept(qTokenize(std::declval(), std::forward(needle), flags...))) + -> decltype(qTokenize(std::move(*this), std::forward(needle), flags...)) + { return qTokenize(std::move(*this), std::forward(needle), flags...); } + enum NormalizationForm { NormalizationForm_D, diff --git a/src/corelib/text/qstringview.cpp b/src/corelib/text/qstringview.cpp index bf8b80ee5d..667d9306af 100644 --- a/src/corelib/text/qstringview.cpp +++ b/src/corelib/text/qstringview.cpp @@ -1172,4 +1172,46 @@ QT_BEGIN_NAMESPACE \since 6.0 */ + +/*! + \fn QStringView::tokenize(Needle &&sep, Flags...flags) const + \fn QLatin1String::tokenize(Needle &&sep, Flags...flags) const + \fn QString::tokenize(Needle &&sep, Flags...flags) const & + \fn QString::tokenize(Needle &&sep, Flags...flags) const && + \fn QString::tokenize(Needle &&sep, Flags...flags) && + + Splits the string into substring views wherever \a sep occurs, and + returns a lazy sequence of those strings. + + Equivalent to + + \code + return QStringTokenizer{std::forward(sep), flags...}; + \endcode + + except it works without C++17 Class Template Argument Deduction (CTAD) + enabled in the compiler. + + See QStringTokenizer for how \a sep and \a flags interact to form + the result. + + \note While this function returns QStringTokenizer, you should never, + ever, name its template arguments explicitly. If you can use C++17 Class + Template Argument Deduction (CTAD), you may write + \code + QStringTokenizer result = sv.tokenize(sep); + \endcode + (without template arguments). If you can't use C++17 CTAD, you must store + the return value only in \c{auto} variables: + \code + auto result = sv.tokenize(sep); + \endcode + This is because the template arguments of QStringTokenizer have a very + subtle dependency on the specific tokenize() overload from which they are + returned, and they don't usually correspond to the type used for the separator. + + \since 6.0 + \sa QStringTokenizer, qTokenize() +*/ + QT_END_NAMESPACE diff --git a/src/corelib/text/qstringview.h b/src/corelib/text/qstringview.h index 4e780628cc..028bf3a544 100644 --- a/src/corelib/text/qstringview.h +++ b/src/corelib/text/qstringview.h @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2017 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz ** Copyright (C) 2019 Mail.ru Group. ** Contact: http://www.qt.io/licensing/ ** @@ -275,6 +275,12 @@ public: Q_REQUIRED_RESULT QStringView trimmed() const noexcept { return QtPrivate::trimmed(*this); } + template + Q_REQUIRED_RESULT constexpr inline auto tokenize(Needle &&needle, Flags...flags) const + noexcept(noexcept(qTokenize(std::declval(), std::forward(needle), flags...))) + -> decltype(qTokenize(*this, std::forward(needle), flags...)) + { return qTokenize(*this, std::forward(needle), flags...); } + Q_REQUIRED_RESULT int compare(QStringView other, Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept { return QtPrivate::compareStrings(*this, other, cs); } Q_REQUIRED_RESULT inline int compare(QLatin1String other, Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept; diff --git a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp index f3a7e93be2..aeaf317d75 100644 --- a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp +++ b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp @@ -1429,6 +1429,14 @@ void tst_QStringApiSymmetry::tok_data(bool rhsHasVariableLength) split_data(rhsHasVariableLength); } +template struct has_tokenize_method : std::false_type {}; +template <> struct has_tokenize_method : std::true_type {}; +template <> struct has_tokenize_method : std::true_type {}; +template <> struct has_tokenize_method : std::true_type {}; + +template +constexpr inline bool has_tokenize_method_v = has_tokenize_method>::value; + template void tst_QStringApiSymmetry::tok_impl() const { @@ -1475,6 +1483,21 @@ void tst_QStringApiSymmetry::tok_impl() const QCOMPARE(toQStringList(tok), resultCS); } #endif // __cpp_deduction_guides + + if constexpr (has_tokenize_method_v) { + QCOMPARE(toQStringList(haystack.tokenize(needle)), resultCS); + QCOMPARE(toQStringList(haystack.tokenize(needle, Qt::KeepEmptyParts, Qt::CaseSensitive)), resultCS); + QCOMPARE(toQStringList(haystack.tokenize(needle, Qt::CaseInsensitive, Qt::KeepEmptyParts)), resultCIS); + QCOMPARE(toQStringList(haystack.tokenize(needle, Qt::SkipEmptyParts, Qt::CaseSensitive)), skippedResultCS); + QCOMPARE(toQStringList(haystack.tokenize(needle, Qt::CaseInsensitive, Qt::SkipEmptyParts)), skippedResultCIS); + + { + const auto tok = deepCopied(haystack).tokenize(deepCopied(needle)); + // here, the temporaries returned from deepCopied() have already been destroyed, + // yet `tok` should have kept a copy alive as needed: + QCOMPARE(toQStringList(tok), resultCS); + } + } } void tst_QStringApiSymmetry::mid_data() diff --git a/tests/auto/corelib/text/qstringview/qstringview.pro b/tests/auto/corelib/text/qstringview/qstringview.pro index e0e9973c91..e6d610e980 100644 --- a/tests/auto/corelib/text/qstringview/qstringview.pro +++ b/tests/auto/corelib/text/qstringview/qstringview.pro @@ -3,4 +3,5 @@ TARGET = tst_qstringview QT = core testlib contains(QT_CONFIG, c++14):CONFIG *= c++14 contains(QT_CONFIG, c++1z):CONFIG *= c++1z +contains(QT_CONFIG, c++2a):CONFIG *= c++2a SOURCES += tst_qstringview.cpp diff --git a/tests/auto/corelib/text/qstringview/tst_qstringview.cpp b/tests/auto/corelib/text/qstringview/tst_qstringview.cpp index b2288f0785..69702a14b3 100644 --- a/tests/auto/corelib/text/qstringview/tst_qstringview.cpp +++ b/tests/auto/corelib/text/qstringview/tst_qstringview.cpp @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -27,6 +27,7 @@ ****************************************************************************/ #include +#include #include #include #include @@ -39,6 +40,8 @@ #include #include #include +#include +#include // for negative testing (can't convert from) #include @@ -264,6 +267,9 @@ private Q_SLOTS: void overloadResolution(); + void tokenize_data() const; + void tokenize() const; + private: template void conversion_tests(String arg) const; @@ -502,6 +508,165 @@ void tst_QStringView::fromQStringRef() const conversion_tests(QString("Hello World!").midRef(6)); } +void tst_QStringView::tokenize_data() const +{ + // copied from tst_QString + QTest::addColumn("str"); + QTest::addColumn("sep"); + QTest::addColumn("result"); + + QTest::newRow("1") << "a,b,c" << "," << (QStringList() << "a" << "b" << "c"); + QTest::newRow("2") << QString("-rw-r--r-- 1 0 0 519240 Jul 9 2002 bigfile") + << " " + << (QStringList() << "-rw-r--r--" << "" << "1" << "0" << "" << "0" << "" + << "519240" << "Jul" << "" << "9" << "" << "2002" + << "bigfile"); + QTest::newRow("one-empty") << "" << " " << (QStringList() << ""); + QTest::newRow("two-empty") << " " << " " << (QStringList() << "" << ""); + QTest::newRow("three-empty") << " " << " " << (QStringList() << "" << "" << ""); + + QTest::newRow("all-empty") << "" << "" << (QStringList() << "" << ""); + QTest::newRow("sep-empty") << "abc" << "" << (QStringList() << "" << "a" << "b" << "c" << ""); +} + +void tst_QStringView::tokenize() const +{ + QFETCH(const QString, str); + QFETCH(const QString, sep); + QFETCH(const QStringList, result); + + // lvalue QString +#ifdef __cpp_deduction_guides + { + auto rit = result.cbegin(); + for (auto sv : QStringTokenizer{str, sep}) + QCOMPARE(sv, *rit++); + } +#endif + { + auto rit = result.cbegin(); + for (auto sv : QStringView{str}.tokenize(sep)) + QCOMPARE(sv, *rit++); + } + + // rvalue QString +#ifdef __cpp_deduction_guides + { + auto rit = result.cbegin(); + for (auto sv : QStringTokenizer{str, QString{sep}}) + QCOMPARE(sv, *rit++); + } +#endif + { + auto rit = result.cbegin(); + for (auto sv : QStringView{str}.tokenize(QString{sep})) + QCOMPARE(sv, *rit++); + } + + // (rvalue) QStringRef +#ifdef __cpp_deduction_guides + { + auto rit = result.cbegin(); + for (auto sv : QStringTokenizer{str, sep.midRef(0)}) + QCOMPARE(sv, *rit++); + } +#endif + { + auto rit = result.cbegin(); + for (auto sv : QStringView{str}.tokenize(sep.midRef(0))) + QCOMPARE(sv, *rit++); + } + + // (rvalue) QChar +#ifdef __cpp_deduction_guides + if (sep.size() == 1) { + auto rit = result.cbegin(); + for (auto sv : QStringTokenizer{str, sep.front()}) + QCOMPARE(sv, *rit++); + } +#endif + if (sep.size() == 1) { + auto rit = result.cbegin(); + for (auto sv : QStringView{str}.tokenize(sep.front())) + QCOMPARE(sv, *rit++); + } + + // (rvalue) char16_t +#ifdef __cpp_deduction_guides + if (sep.size() == 1) { + auto rit = result.cbegin(); + for (auto sv : QStringTokenizer{str, *qToStringViewIgnoringNull(sep).utf16()}) + QCOMPARE(sv, *rit++); + } +#endif + if (sep.size() == 1) { + auto rit = result.cbegin(); + for (auto sv : QStringView{str}.tokenize(*qToStringViewIgnoringNull(sep).utf16())) + QCOMPARE(sv, *rit++); + } + + // char16_t literal + const auto make_literal = [](const QString &sep) { + auto literal = std::make_unique(sep.size() + 1); + const auto to_char16_t = [](QChar c) { return char16_t{c.unicode()}; }; + std::transform(sep.cbegin(), sep.cend(), literal.get(), to_char16_t); + return literal; + }; + const std::unique_ptr literal = make_literal(sep); +#ifdef __cpp_deduction_guides + { + auto rit = result.cbegin(); + for (auto sv : QStringTokenizer{str, literal.get()}) + QCOMPARE(sv, *rit++); + } +#endif + { + auto rit = result.cbegin(); + for (auto sv : QStringView{str}.tokenize(literal.get())) + QCOMPARE(sv, *rit++); + } + +#ifdef __cpp_deduction_guides +#ifdef __cpp_lib_ranges + // lvalue QString + { + QStringList actual; + const QStringTokenizer tok{str, sep}; + std::ranges::transform(tok, std::back_inserter(actual), + [](auto sv) { return sv.toString(); }); + QCOMPARE(result, actual); + } + + // rvalue QString + { + QStringList actual; + const QStringTokenizer tok{str, QString{sep}}; + std::ranges::transform(tok, std::back_inserter(actual), + [](auto sv) { return sv.toString(); }); + QCOMPARE(result, actual); + } + + // (rvalue) QStringRef + { + QStringList actual; + const QStringTokenizer tok{str, sep.midRef(0)}; + std::ranges::transform(tok, std::back_inserter(actual), + [](auto sv) { return sv.toString(); }); + QCOMPARE(result, actual); + } + + // (rvalue) QChar + if (sep.size() == 1) { + QStringList actual; + const QStringTokenizer tok{str, sep.front()}; + std::ranges::transform(tok, std::back_inserter(actual), + [](auto sv) { return sv.toString(); }); + QCOMPARE(result, actual); + } +#endif // __cpp_lib_ranges +#endif // __cpp_deduction_guides +} + template void tst_QStringView::fromLiteral(const Char *arg) const {