diff --git a/doc/src/snippets/qstring/main.cpp b/doc/src/snippets/qstring/main.cpp index d0a445731c..d7299e80d5 100644 --- a/doc/src/snippets/qstring/main.cpp +++ b/doc/src/snippets/qstring/main.cpp @@ -314,6 +314,11 @@ void Widget::countFunction() QString str = "banana and panama"; str.count(QRegExp("a[nm]a")); // returns 4 //! [18] + + //! [95] + QString str = "banana and panama"; + str.count(QRegularExpression("a[nm]a")); // returns 4 + //! [95] } void Widget::dataFunction() @@ -384,6 +389,11 @@ void Widget::firstIndexOfFunction() QString str = "the minimum"; str.indexOf(QRegExp("m[aeiou]"), 0); // returns 4 //! [25] + + //! [93] + QString str = "the minimum"; + str.indexOf(QRegularExpression("m[aeiou]"), 0); // returns 4 + //! [93] } void Widget::insertFunction() @@ -429,6 +439,11 @@ void Widget::lastIndexOfFunction() QString str = "the minimum"; str.lastIndexOf(QRegExp("m[aeiou]")); // returns 8 //! [30] + + //! [94] + QString str = "the minimum"; + str.lastIndexOf(QRegularExpression("m[aeiou]")); // returns 8 + //! [94] } void Widget::leftFunction() @@ -499,6 +514,12 @@ void Widget::removeFunction() r.remove(QRegExp("[aeiou].")); // r == "The" //! [39] + + //! [96] + QString r = "Telephone"; + r.remove(QRegularExpression("[aeiou].")); + // r == "The" + //! [96] } void Widget::replaceFunction() @@ -533,6 +554,18 @@ void Widget::replaceFunction() equis.replace("xx", "x"); // equis == "xxx" //! [86] + + //! [87] + QString s = "Banana"; + s.replace(QRegularExpression("a[mn]"), "ox"); + // s == "Boxoxa" + //! [87] + + //! [88] + QString t = "A bon mot."; + t.replace(QRegularExpression("([^<]*)"), "\\emph{\\1}"); + // t == "A \\emph{bon mot}." + //! [88] } void Widget::reserveFunction() @@ -627,9 +660,16 @@ void Widget::sectionFunction() //! [55] QString line = "forename\tmiddlename surname \t \t phone"; QRegExp sep("\\s+"); - str = line.section(sep, 2, 2); // s == "surname" - str = line.section(sep, -3, -2); // s == "middlename surname" + str = line.section(sep, 2, 2); // str == "surname" + str = line.section(sep, -3, -2); // str == "middlename surname" //! [55] + + //! [89] + QString line = "forename\tmiddlename surname \t \t phone"; + QRegularExpression sep("\\s+"); + str = line.section(sep, 2, 2); // str == "surname" + str = line.section(sep, -3, -2); // str == "middlename surname" + //! [89] } void Widget::setNumFunction() @@ -682,6 +722,27 @@ void Widget::splitFunction() list = str.split(QRegExp("\\b")); // list: [ "", "Now", ": ", "this", " ", "sentence", " ", "fragment", "." ] //! [61] + + //! [90] + QString str; + QStringList list; + + str = "Some text\n\twith strange whitespace."; + list = str.split(QRegularExpression("\\s+")); + // list: [ "Some", "text", "with", "strange", "whitespace." ] + //! [90] + + //! [91] + str = "This time, a normal English sentence."; + list = str.split(QRegularExpression("\\W+"), QString::SkipEmptyParts); + // list: [ "This", "time", "a", "normal", "English", "sentence" ] + //! [91] + + //! [92] + str = "Now: this sentence fragment."; + list = str.split(QRegularExpression("\\b")); + // list: [ "", "Now", ": ", "this", " ", "sentence", " ", "fragment", "." ] + //! [92] } void Widget::splitCaseSensitiveFunction() diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 584502f713..878403fe75 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -41,6 +41,7 @@ #include "qstringlist.h" #include "qregexp.h" +#include "qregularexpression.h" #include "qunicodetables_p.h" #ifndef QT_NO_TEXTCODEC #include @@ -1740,6 +1741,18 @@ QString &QString::remove(QChar ch, Qt::CaseSensitivity cs) \sa indexOf(), lastIndexOf(), replace() */ +/*! + \fn QString &QString::remove(const QRegularExpression &re) + \since 5.0 + + Removes every occurrence of the regular expression \a re in the + string, and returns a reference to the string. For example: + + \snippet doc/src/snippets/qstring/main.cpp 96 + + \sa indexOf(), lastIndexOf(), replace() +*/ + /*! \fn QString &QString::replace(int position, int n, const QString &after) @@ -2923,6 +2936,138 @@ QString& QString::replace(const QRegExp &rx, const QString &after) } #endif +#ifndef QT_NO_REGEXP +#ifndef QT_BOOTSTRAPPED +/*! + \overload replace() + \since 5.0 + + Replaces every occurrence of the regular expression \a re in the + string with \a after. Returns a reference to the string. For + example: + + \snippet doc/src/snippets/qstring/main.cpp 87 + + For regular expressions containing capturing groups, + occurrences of \bold{\\1}, \bold{\\2}, ..., in \a after are replaced + with the string captured by the corresponding capturing group. + + \snippet doc/src/snippets/qstring/main.cpp 88 + + \sa indexOf(), lastIndexOf(), remove(), QRegularExpression, QRegularExpressionMatch +*/ +QString &QString::replace(const QRegularExpression &re, const QString &after) +{ + if (!re.isValid()) { + qWarning("QString::replace: invalid QRegularExpresssion object"); + return *this; + } + + const QString copy(*this); + QRegularExpressionMatchIterator iterator = re.globalMatch(copy); + if (!iterator.hasNext()) // no matches at all + return *this; + + realloc(); + + int numCaptures = re.captureCount(); + + // 1. build the backreferences vector, holding where the backreferences + // are in the replacement string + QVector backReferences; + const int al = after.length(); + const QChar *ac = after.unicode(); + + for (int i = 0; i < al - 1; i++) { + if (ac[i] == QLatin1Char('\\')) { + int no = ac[i + 1].digitValue(); + if (no > 0 && no <= numCaptures) { + QStringCapture backReference; + backReference.pos = i; + backReference.len = 2; + + if (i < al - 2) { + int secondDigit = ac[i + 2].digitValue(); + if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) { + no = (no * 10) + secondDigit; + ++backReference.len; + } + } + + backReference.no = no; + backReferences.append(backReference); + } + } + } + + // 2. iterate on the matches. For every match, copy in chunks + // - the part before the match + // - the after string, with the proper replacements for the backreferences + + int newLength = 0; // length of the new string, with all the replacements + int lastEnd = 0; + QVector chunks; + while (iterator.hasNext()) { + QRegularExpressionMatch match = iterator.next(); + int len; + // add the part before the match + len = match.capturedStart() - lastEnd; + if (len > 0) { + chunks << copy.midRef(lastEnd, len); + newLength += len; + } + + lastEnd = 0; + // add the after string, with replacements for the backreferences + foreach (const QStringCapture &backReference, backReferences) { + // part of "after" before the backreference + len = backReference.pos - lastEnd; + if (len > 0) { + chunks << after.midRef(lastEnd, len); + newLength += len; + } + + // backreference itself + len = match.capturedLength(backReference.no); + if (len > 0) { + chunks << copy.midRef(match.capturedStart(backReference.no), len); + newLength += len; + } + + lastEnd = backReference.pos + backReference.len; + } + + // add the last part of the after string + len = after.length() - lastEnd; + if (len > 0) { + chunks << after.midRef(lastEnd, len); + newLength += len; + } + + lastEnd = match.capturedEnd(); + } + + // 3. trailing string after the last match + if (copy.length() > lastEnd) { + chunks << copy.midRef(lastEnd); + newLength += copy.length() - lastEnd; + } + + // 4. assemble the chunks together + resize(newLength); + int i = 0; + QChar *uc = data(); + foreach (const QStringRef &chunk, chunks) { + int len = chunk.length(); + memcpy(uc + i, chunk.unicode(), len * sizeof(QChar)); + i += len; + } + + return *this; +} +#endif // QT_BOOTSTRAPPED +#endif // QT_NO_REGEXP + /*! Returns the number of (potentially overlapping) occurrences of the string \a str in this string. @@ -3122,6 +3267,118 @@ int QString::count(const QRegExp& rx) const } #endif // QT_NO_REGEXP +#ifndef QT_NO_REGEXP +#ifndef QT_BOOTSTRAPPED +/*! + \overload indexOf() + \since 5.0 + + Returns the index position of the first match of the regular + expression \a re in the string, searching forward from index + position \a from. Returns -1 if \a re didn't match anywhere. + + Example: + + \snippet doc/src/snippets/qstring/main.cpp 93 +*/ +int QString::indexOf(const QRegularExpression& re, int from) const +{ + if (!re.isValid()) { + qWarning("QString::indexOf: invalid QRegularExpresssion object"); + return -1; + } + + QRegularExpressionMatch match = re.match(*this, from); + if (match.hasMatch()) + return match.capturedStart(); + + return -1; +} + +/*! + \overload lastIndexOf() + \since 5.0 + + Returns the index position of the last match of the regular + expression \a re in the string, which starts before the index + position \a from. Returns -1 if \a re didn't match anywhere. + + Example: + + \snippet doc/src/snippets/qstring/main.cpp 94 +*/ +int QString::lastIndexOf(const QRegularExpression &re, int from) const +{ + if (!re.isValid()) { + qWarning("QString::lastIndexOf: invalid QRegularExpresssion object"); + return -1; + } + + int endpos = (from < 0) ? (size() + from + 1) : (from + 1); + + QRegularExpressionMatchIterator iterator = re.globalMatch(*this); + int lastIndex = -1; + while (iterator.hasNext()) { + QRegularExpressionMatch match = iterator.next(); + int start = match.capturedStart(); + if (start < endpos) + lastIndex = start; + else + break; + } + + return lastIndex; +} + +/*! \overload contains() + \since 5.0 + + Returns true if the regular expression \a re matches somewhere in + this string; otherwise returns false. +*/ +bool QString::contains(const QRegularExpression &re) const +{ + if (!re.isValid()) { + qWarning("QString::contains: invalid QRegularExpresssion object"); + return false; + } + QRegularExpressionMatch match = re.match(*this); + return match.hasMatch(); +} + +/*! + \overload count() + \since 5.0 + + Returns the number of times the regular expression \a re matches + in the string. + + This function counts overlapping matches, so in the example + below, there are four instances of "ana" or "ama": + + \snippet doc/src/snippets/qstring/main.cpp 95 +*/ +int QString::count(const QRegularExpression &re) const +{ + if (!re.isValid()) { + qWarning("QString::count: invalid QRegularExpresssion object"); + return 0; + } + int count = 0; + int index = -1; + int len = length(); + while (index < len - 1) { + QRegularExpressionMatch match = re.match(*this, index + 1); + if (!match.hasMatch()) + break; + index = match.capturedStart(); + count++; + } + return count; +} +#endif // QT_BOOTSTRAPPED +#endif // QT_NO_REGEXP + /*! \fn int QString::count() const \overload count() @@ -3249,6 +3506,49 @@ public: QString string; }; +static QString extractSections(const QList §ions, + int start, + int end, + QString::SectionFlags flags) +{ + if (start < 0) + start += sections.count(); + if (end < 0) + end += sections.count(); + + QString ret; + int x = 0; + int first_i = start, last_i = end; + for (int i = 0; x <= end && i < sections.size(); ++i) { + const qt_section_chunk §ion = sections.at(i); + const bool empty = (section.length == section.string.length()); + if (x >= start) { + if (x == start) + first_i = i; + if (x == end) + last_i = i; + if (x != start) + ret += section.string; + else + ret += section.string.mid(section.length); + } + if (!empty || !(flags & QString::SectionSkipEmpty)) + x++; + } + + if ((flags & QString::SectionIncludeLeadingSep) && first_i < sections.size()) { + const qt_section_chunk §ion = sections.at(first_i); + ret.prepend(section.string.left(section.length)); + } + + if ((flags & QString::SectionIncludeTrailingSep) && last_i+1 <= sections.size()-1) { + const qt_section_chunk §ion = sections.at(last_i+1); + ret += section.string.left(section.length); + } + + return ret; +} + /*! \overload section() @@ -3282,42 +3582,58 @@ QString QString::section(const QRegExp ®, int start, int end, SectionFlags fl } sections.append(qt_section_chunk(last_len, QString(uc + last_m, n - last_m))); - if(start < 0) - start += sections.count(); - if(end < 0) - end += sections.count(); - - QString ret; - int x = 0; - int first_i = start, last_i = end; - for (int i = 0; x <= end && i < sections.size(); ++i) { - const qt_section_chunk §ion = sections.at(i); - const bool empty = (section.length == section.string.length()); - if (x >= start) { - if(x == start) - first_i = i; - if(x == end) - last_i = i; - if(x != start) - ret += section.string; - else - ret += section.string.mid(section.length); - } - if (!empty || !(flags & SectionSkipEmpty)) - x++; - } - if((flags & SectionIncludeLeadingSep) && first_i < sections.size()) { - const qt_section_chunk §ion = sections.at(first_i); - ret.prepend(section.string.left(section.length)); - } - if((flags & SectionIncludeTrailingSep) && last_i+1 <= sections.size()-1) { - const qt_section_chunk §ion = sections.at(last_i+1); - ret += section.string.left(section.length); - } - return ret; + return extractSections(sections, start, end, flags); } #endif +#ifndef QT_NO_REGEXP +#ifndef QT_BOOTSTRAPPED +/*! + \overload section() + \since 5.0 + + This string is treated as a sequence of fields separated by the + regular expression, \a re. + + \snippet doc/src/snippets/qstring/main.cpp 89 + + \warning Using this QRegularExpression version is much more expensive than + the overloaded string and character versions. + + \sa split() simplified() +*/ +QString QString::section(const QRegularExpression &re, int start, int end, SectionFlags flags) const +{ + if (!re.isValid()) { + qWarning("QString::section: invalid QRegularExpression object"); + return QString(); + } + + const QChar *uc = unicode(); + if (!uc) + return QString(); + + QRegularExpression sep(re); + if (flags & SectionCaseInsensitiveSeps) + sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption); + + QList sections; + int n = length(), m = 0, last_m = 0, last_len = 0; + QRegularExpressionMatchIterator iterator = sep.globalMatch(*this); + while (iterator.hasNext()) { + QRegularExpressionMatch match = iterator.next(); + m = match.capturedStart(); + sections.append(qt_section_chunk(last_len, QString(uc + last_m, m - last_m))); + last_m = m; + last_len = match.capturedLength(); + } + sections.append(qt_section_chunk(last_len, QString(uc + last_m, n - last_m))); + + return extractSections(sections, start, end, flags); +} +#endif // QT_BOOTSTRAPPED +#endif // QT_NO_REGEXP + /*! Returns a substring that contains the \a n leftmost characters of the string. @@ -6077,6 +6393,62 @@ QStringList QString::split(const QRegExp &rx, SplitBehavior behavior) const } #endif +#ifndef QT_NO_REGEXP +#ifndef QT_BOOTSTRAPPED +/*! + \overload + \since 5.0 + + Splits the string into substrings wherever the regular expression + \a re matches, and returns the list of those strings. If \a re + does not match anywhere in the string, split() returns a + single-element list containing this string. + + Here's an example where we extract the words in a sentence + using one or more whitespace characters as the separator: + + \snippet doc/src/snippets/qstring/main.cpp 90 + + Here's a similar example, but this time we use any sequence of + non-word characters as the separator: + + \snippet doc/src/snippets/qstring/main.cpp 91 + + Here's a third example where we use a zero-length assertion, + \bold{\\b} (word boundary), to split the string into an + alternating sequence of non-word and word tokens: + + \snippet doc/src/snippets/qstring/main.cpp 92 + + \sa QStringList::join(), section() +*/ +QStringList QString::split(const QRegularExpression &re, SplitBehavior behavior) const +{ + QStringList list; + if (!re.isValid()) { + qWarning("QString::split: invalid QRegularExpression object"); + return list; + } + + int start = 0; + int end = 0; + QRegularExpressionMatchIterator iterator = re.globalMatch(*this); + while (iterator.hasNext()) { + QRegularExpressionMatch match = iterator.next(); + end = match.capturedStart(); + if (start != end || behavior == KeepEmptyParts) + list.append(mid(start, end - start)); + start = match.capturedEnd(); + } + + if (start != size() || behavior == KeepEmptyParts) + list.append(mid(start)); + + return list; +} +#endif // QT_BOOTSTRAPPED +#endif // QT_NO_REGEXP + /*! \enum QString::NormalizationForm diff --git a/src/corelib/tools/qstring.h b/src/corelib/tools/qstring.h index b71484f4cb..de5cd2cfa9 100644 --- a/src/corelib/tools/qstring.h +++ b/src/corelib/tools/qstring.h @@ -64,6 +64,7 @@ QT_BEGIN_NAMESPACE class QCharRef; class QRegExp; +class QRegularExpression; class QStringList; class QTextCodec; class QLatin1String; @@ -283,6 +284,13 @@ public: inline bool contains(QRegExp &rx) const { return indexOf(rx) != -1; } #endif +#ifndef QT_NO_REGEXP + int indexOf(const QRegularExpression &re, int from = 0) const; + int lastIndexOf(const QRegularExpression &re, int from = -1) const; + bool contains(const QRegularExpression &re) const; + int count(const QRegularExpression &re) const; +#endif + enum SectionFlag { SectionDefault = 0x00, SectionSkipEmpty = 0x01, @@ -297,7 +305,9 @@ public: #ifndef QT_NO_REGEXP QString section(const QRegExp ®, int start, int end = -1, SectionFlags flags = SectionDefault) const; #endif - +#ifndef QT_NO_REGEXP + QString section(const QRegularExpression &re, int start, int end = -1, SectionFlags flags = SectionDefault) const; +#endif QString left(int n) const Q_REQUIRED_RESULT; QString right(int n) const Q_REQUIRED_RESULT; QString mid(int position, int n = -1) const Q_REQUIRED_RESULT; @@ -370,6 +380,11 @@ public: inline QString &remove(const QRegExp &rx) { return replace(rx, QString()); } #endif +#ifndef QT_NO_REGEXP + QString &replace(const QRegularExpression &re, const QString &after); + inline QString &remove(const QRegularExpression &re) + { return replace(re, QString()); } +#endif enum SplitBehavior { KeepEmptyParts, SkipEmptyParts }; @@ -380,7 +395,9 @@ public: #ifndef QT_NO_REGEXP QStringList split(const QRegExp &sep, SplitBehavior behavior = KeepEmptyParts) const Q_REQUIRED_RESULT; #endif - +#ifndef QT_NO_REGEXP + QStringList split(const QRegularExpression &sep, SplitBehavior behavior = KeepEmptyParts) const Q_REQUIRED_RESULT; +#endif enum NormalizationForm { NormalizationForm_D, NormalizationForm_C, diff --git a/tests/auto/corelib/tools/qstring/tst_qstring.cpp b/tests/auto/corelib/tools/qstring/tst_qstring.cpp index 355e4d7d00..7d4a9b5aba 100644 --- a/tests/auto/corelib/tools/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/tools/qstring/tst_qstring.cpp @@ -41,6 +41,7 @@ #include #include +#include #include #include #include @@ -194,6 +195,7 @@ private slots: void localeAwareCompare(); void split_data(); void split(); + void split_regexp_data(); void split_regexp(); void fromUtf16_data(); void fromUtf16(); @@ -1080,6 +1082,17 @@ void tst_QString::indexOf() QCOMPARE( rx2.matchedLength(), -1 ); } + { + QRegularExpression::PatternOptions options = QRegularExpression::NoPatternOption; + if (!bcs) + options |= QRegularExpression::CaseInsensitiveOption; + + QRegularExpression re(QRegularExpression::escape(needle), options); + QEXPECT_FAIL("data58", "QRegularExpression does not support case folding", Continue); + QEXPECT_FAIL("data59", "QRegularExpression does not support case folding", Continue); + QCOMPARE( haystack.indexOf(re, startpos), resultpos ); + } + if (cs == Qt::CaseSensitive) { QCOMPARE( haystack.indexOf(needle, startpos), resultpos ); QCOMPARE( haystack.indexOf(ref, startpos), resultpos ); @@ -1267,6 +1280,15 @@ void tst_QString::lastIndexOf() QCOMPARE(rx1.matchedLength(), -1); QCOMPARE(rx2.matchedLength(), -1); } + + { + QRegularExpression::PatternOptions options = QRegularExpression::NoPatternOption; + if (!caseSensitive) + options |= QRegularExpression::CaseInsensitiveOption; + + QRegularExpression re(QRegularExpression::escape(needle), options); + QCOMPARE(haystack.lastIndexOf(re, from), expected); + } } if (cs == Qt::CaseSensitive) { @@ -1302,6 +1324,9 @@ void tst_QString::count() QCOMPARE(a.count( "", Qt::CaseInsensitive), 16); QCOMPARE(a.count(QRegExp("[FG][HI]")),1); QCOMPARE(a.count(QRegExp("[G][HE]")),2); + QCOMPARE(a.count(QRegularExpression("[FG][HI]")), 1); + QCOMPARE(a.count(QRegularExpression("[G][HE]")), 2); + CREATE_REF(QLatin1String("FG")); QCOMPARE(a.count(ref),2); @@ -1327,6 +1352,8 @@ void tst_QString::contains() QVERIFY(a.contains( "", Qt::CaseInsensitive)); QVERIFY(a.contains(QRegExp("[FG][HI]"))); QVERIFY(a.contains(QRegExp("[G][HE]"))); + QVERIFY(a.contains(QRegularExpression("[FG][HI]"))); + QVERIFY(a.contains(QRegularExpression("[G][HE]"))); CREATE_REF(QLatin1String("FG")); QVERIFY(a.contains(ref)); @@ -2172,6 +2199,9 @@ void tst_QString::replace_regexp() QString s2 = string; s2.replace( QRegExp(regexp), after ); QTEST( s2, "result" ); + s2 = string; + s2.replace( QRegularExpression(regexp), after ); + QTEST( s2, "result" ); } void tst_QString::remove_uint_uint() @@ -2236,8 +2266,13 @@ void tst_QString::remove_regexp() QFETCH( QString, after ); if ( after.length() == 0 ) { - string.remove( QRegExp(regexp) ); - QTEST( string, "result" ); + QString s2 = string; + s2.remove( QRegExp(regexp) ); + QTEST( s2, "result" ); + + s2 = string; + s2.remove( QRegularExpression(regexp) ); + QTEST( s2, "result" ); } else { QCOMPARE( 0, 0 ); // shut QtTest } @@ -3978,8 +4013,12 @@ void tst_QString::section() QFETCH( bool, regexp ); if (regexp) { QCOMPARE( wholeString.section( QRegExp(sep), start, end, QString::SectionFlag(flags) ), sectionString ); + QCOMPARE( wholeString.section( QRegularExpression(sep), start, end, QString::SectionFlag(flags) ), sectionString ); } else { QCOMPARE( wholeString.section( sep, start, end, QString::SectionFlag(flags) ), sectionString ); + QCOMPARE( wholeString.section( QRegExp(QRegExp::escape(sep)), start, end, QString::SectionFlag(flags) ), sectionString ); + QCOMPARE( wholeString.section( QRegularExpression(QRegularExpression::escape(sep)), start, end, QString::SectionFlag(flags) ), sectionString ); + } } @@ -4500,6 +4539,7 @@ void tst_QString::split() QFETCH(QStringList, result); QRegExp rx = QRegExp(QRegExp::escape(sep)); + QRegularExpression re(QRegularExpression::escape(sep)); QStringList list; @@ -4507,6 +4547,8 @@ void tst_QString::split() QVERIFY(list == result); list = str.split(rx); QVERIFY(list == result); + list = str.split(re); + QVERIFY(list == result); if (sep.size() == 1) { list = str.split(sep.at(0)); QVERIFY(list == result); @@ -4516,6 +4558,8 @@ void tst_QString::split() QVERIFY(list == result); list = str.split(rx, QString::KeepEmptyParts); QVERIFY(list == result); + list = str.split(re, QString::KeepEmptyParts); + QVERIFY(list == result); if (sep.size() == 1) { list = str.split(sep.at(0), QString::KeepEmptyParts); QVERIFY(list == result); @@ -4526,39 +4570,51 @@ void tst_QString::split() QVERIFY(list == result); list = str.split(rx, QString::SkipEmptyParts); QVERIFY(list == result); + list = str.split(re, QString::SkipEmptyParts); + QVERIFY(list == result); if (sep.size() == 1) { list = str.split(sep.at(0), QString::SkipEmptyParts); QVERIFY(list == result); } } +void tst_QString::split_regexp_data() +{ + QTest::addColumn("string"); + QTest::addColumn("pattern"); + QTest::addColumn("result"); + + QTest::newRow("data01") << "Some text\n\twith strange whitespace." + << "\\s+" + << (QStringList() << "Some" << "text" << "with" << "strange" << "whitespace." ); + + QTest::newRow("data02") << "This time, a normal English sentence." + << "\\W+" + << (QStringList() << "This" << "time" << "a" << "normal" << "English" << "sentence" << ""); + + QTest::newRow("data03") << "Now: this sentence fragment." + << "\\b" + << (QStringList() << "" << "Now" << ": " << "this" << " " << "sentence" << " " << "fragment" << "."); +} + void tst_QString::split_regexp() { - QString str1 = "Some text\n\twith strange whitespace."; - QStringList list1 = str1.split(QRegExp("\\s+")); - QStringList result1; - result1 << "Some" << "text" << "with" << "strange" << "whitespace."; - QVERIFY(list1 == result1); - list1 = str1.split(QRegExp("\\s"), QString::SkipEmptyParts); - QVERIFY(list1 == result1); + QFETCH(QString, string); + QFETCH(QString, pattern); + QFETCH(QStringList, result); - QString str2 = "This time, a normal English sentence."; - QStringList list2 = str2.split(QRegExp("\\W+")); - QStringList result2; - result2 << "This" << "time" << "a" << "normal" << "English" << "sentence" << ""; - QVERIFY(list2 == result2); - list2 = str2.split(QRegExp("\\W"), QString::SkipEmptyParts); - result2.removeAll(QString()); - QVERIFY(list2 == result2); + QStringList list; + list = string.split(QRegExp(pattern)); + QCOMPARE(list, result); + list = string.split(QRegularExpression(pattern)); + QCOMPARE(list, result); - QString str3 = "Now: this sentence fragment."; - QStringList list3 = str3.split(QRegExp("\\b")); - QStringList result3; - result3 << "" << "Now" << ": " << "this" << " " << "sentence" << " " << "fragment" << "."; - QVERIFY(list3 == result3); - list3 = str3.split(QRegExp("\\b"), QString::SkipEmptyParts); - result3.removeAll(QString()); - QVERIFY(list3 == result3); + result.removeAll(QString()); + + list = string.split(QRegExp(pattern), QString::SkipEmptyParts); + QCOMPARE(list, result); + list = string.split(QRegularExpression(pattern), QString::SkipEmptyParts); + QCOMPARE(list, result); } void tst_QString::fromUtf16_data()