From 361dc074f2301b4b68435c05ccaa7279c0170776 Mon Sep 17 00:00:00 2001 From: Sona Kurazyan Date: Mon, 29 Jun 2020 16:54:15 +0200 Subject: [PATCH] Move QRegExp and its remaining mentions out of QtCore Task-number: QTBUG-85235 Change-Id: Ibd6c98d952c1bb9916b64715c6430fb0d3fe3843 Reviewed-by: Lars Knoll --- qmake/.prev_CMakeLists.txt | 1 - qmake/CMakeLists.txt | 2 - qmake/Makefile.unix | 6 +- qmake/Makefile.win32 | 1 - qmake/doc/src/qmake-manual.qdoc | 3 +- qmake/generators/makefile.cpp | 1 - qmake/main.cpp | 2 +- qmake/qmake.pro | 2 - qmake/qmake_pch.h | 1 - src/corelib/.prev_CMakeLists.txt | 1 - src/corelib/CMakeLists.txt | 1 - .../code/src_corelib_text_qregexp.cpp | 243 - src/corelib/text/qregexp.cpp | 5039 ----------------- src/corelib/text/qregexp.h | 151 - src/corelib/text/text.pri | 2 - src/tools/uic/qclass_lib_map.h | 1 - tests/auto/corelib/text/.prev_CMakeLists.txt | 1 - tests/auto/corelib/text/CMakeLists.txt | 1 - tests/auto/corelib/text/qregexp/.gitignore | 1 - .../auto/corelib/text/qregexp/CMakeLists.txt | 24 - .../text/qregexp/data/qdatastream_4.9.bin | Bin 30 -> 0 bytes .../text/qregexp/data/qdatastream_5.0.bin | Bin 30 -> 0 bytes tests/auto/corelib/text/qregexp/qregexp.pro | 5 - tests/auto/corelib/text/qregexp/qregexp.qrc | 6 - .../auto/corelib/text/qregexp/tst_qregexp.cpp | 1726 ------ .../auto/corelib/text/qstring/tst_qstring.cpp | 2 +- tests/auto/corelib/text/text.pro | 1 - .../corelib/text/qregexp/CMakeLists.txt | 46 - .../benchmarks/corelib/text/qregexp/main.cpp | 615 -- .../corelib/text/qregexp/qregexp.pro | 20 - .../corelib/text/qregexp/qregexp.qrc | 6 - 31 files changed, 5 insertions(+), 7906 deletions(-) delete mode 100644 src/corelib/doc/snippets/code/src_corelib_text_qregexp.cpp delete mode 100644 src/corelib/text/qregexp.cpp delete mode 100644 src/corelib/text/qregexp.h delete mode 100644 tests/auto/corelib/text/qregexp/.gitignore delete mode 100644 tests/auto/corelib/text/qregexp/CMakeLists.txt delete mode 100644 tests/auto/corelib/text/qregexp/data/qdatastream_4.9.bin delete mode 100644 tests/auto/corelib/text/qregexp/data/qdatastream_5.0.bin delete mode 100644 tests/auto/corelib/text/qregexp/qregexp.pro delete mode 100644 tests/auto/corelib/text/qregexp/qregexp.qrc delete mode 100644 tests/auto/corelib/text/qregexp/tst_qregexp.cpp delete mode 100644 tests/benchmarks/corelib/text/qregexp/CMakeLists.txt delete mode 100644 tests/benchmarks/corelib/text/qregexp/main.cpp delete mode 100644 tests/benchmarks/corelib/text/qregexp/qregexp.pro delete mode 100644 tests/benchmarks/corelib/text/qregexp/qregexp.qrc diff --git a/qmake/.prev_CMakeLists.txt b/qmake/.prev_CMakeLists.txt index 2bfc56b37f..127da3d71f 100644 --- a/qmake/.prev_CMakeLists.txt +++ b/qmake/.prev_CMakeLists.txt @@ -77,7 +77,6 @@ qt_add_tool(${target_name} ../src/corelib/text/qchar.h ../src/corelib/text/qlocale.cpp ../src/corelib/text/qlocale.h ../src/corelib/text/qlocale_tools.cpp ../src/corelib/text/qlocale_tools_p.h - ../src/corelib/text/qregexp.cpp ../src/corelib/text/qregexp.h ../src/corelib/text/qregularexpression.cpp ../src/corelib/text/qregularexpression.h ../src/corelib/text/qstring.cpp ../src/corelib/text/qstring.h ../src/corelib/text/qstringbuilder.cpp ../src/corelib/text/qstringbuilder.h diff --git a/qmake/CMakeLists.txt b/qmake/CMakeLists.txt index 9931a2e14a..2a64d49c54 100644 --- a/qmake/CMakeLists.txt +++ b/qmake/CMakeLists.txt @@ -85,7 +85,6 @@ qt_add_tool(${target_name} ../src/corelib/text/qchar.h ../src/corelib/text/qlocale.cpp ../src/corelib/text/qlocale.h ../src/corelib/text/qlocale_tools.cpp ../src/corelib/text/qlocale_tools_p.h - ../src/corelib/text/qregexp.cpp ../src/corelib/text/qregexp.h ../src/corelib/text/qregularexpression.cpp ../src/corelib/text/qregularexpression.h ../src/corelib/text/qstring.cpp ../src/corelib/text/qstring.h ../src/corelib/text/qstringbuilder.cpp ../src/corelib/text/qstringbuilder.h @@ -114,7 +113,6 @@ qt_add_tool(${target_name} ../src/corelib/text/qlocale.cpp ../src/corelib/text/qlocale.h ../src/corelib/text/qlocale_tools.cpp ../src/corelib/text/qlocale_tools_p.h ../src/corelib/tools/qmap.cpp ../src/corelib/tools/qmap.h - ../src/corelib/text/qregexp.cpp ../src/corelib/text/qregexp.h ../src/corelib/text/qregularexpression.cpp ../src/corelib/text/qregularexpression.h ../src/corelib/tools/qringbuffer.cpp # special case ../src/corelib/text/qstring.cpp ../src/corelib/text/qstring.h diff --git a/qmake/Makefile.unix b/qmake/Makefile.unix index 660a21daa0..75d3fe3ca7 100644 --- a/qmake/Makefile.unix +++ b/qmake/Makefile.unix @@ -30,7 +30,7 @@ QOBJS = \ qarraydata.o qbitarray.o qbytearray.o qbytearraylist.o qbytearraymatcher.o \ qcalendar.o qgregoriancalendar.o qromancalendar.o \ qcryptographichash.o qdatetime.o qhash.o \ - qlocale.o qlocale_tools.o qmap.o qregularexpression.o qregexp.o qringbuffer.o \ + qlocale.o qlocale_tools.o qmap.o qregularexpression.o qringbuffer.o \ qstringbuilder.o qstring.o qstringconverter.o qstringlist.o qversionnumber.o \ qvsnprintf.o \ pcre2_auto_possess.o pcre2_chartables.o pcre2_compile.o pcre2_config.o \ @@ -116,7 +116,6 @@ DEPEND_SRC = \ $(SOURCE_PATH)/src/corelib/text/qlocale.cpp \ $(SOURCE_PATH)/src/corelib/text/qlocale_tools.cpp \ $(SOURCE_PATH)/src/corelib/text/qregularexpression.cpp \ - $(SOURCE_PATH)/src/corelib/text/qregexp.cpp \ $(SOURCE_PATH)/src/corelib/text/qstringbuilder.cpp \ $(SOURCE_PATH)/src/corelib/text/qstringconverter.cpp \ $(SOURCE_PATH)/src/corelib/text/qstring.cpp \ @@ -449,9 +448,6 @@ qtemporaryfile.o: $(SOURCE_PATH)/src/corelib/io/qtemporaryfile.cpp qregularexpression.o: $(SOURCE_PATH)/src/corelib/text/qregularexpression.cpp $(CXX) -c -o $@ $(CXXFLAGS) $< -qregexp.o: $(SOURCE_PATH)/src/corelib/text/qregexp.cpp - $(CXX) -c -o $@ $(CXXFLAGS) $< - qbitarray.o: $(SOURCE_PATH)/src/corelib/tools/qbitarray.cpp $(CXX) -c -o $@ $(CXXFLAGS) $< diff --git a/qmake/Makefile.win32 b/qmake/Makefile.win32 index 6478c43edd..05ea1fb338 100644 --- a/qmake/Makefile.win32 +++ b/qmake/Makefile.win32 @@ -102,7 +102,6 @@ QTOBJS= \ qmap.obj \ qoperatingsystemversion.obj \ qoperatingsystemversion_win.obj \ - qregexp.obj \ qromancalendar.obj \ qstring.obj \ qstringconverter.obj \ diff --git a/qmake/doc/src/qmake-manual.qdoc b/qmake/doc/src/qmake-manual.qdoc index 9b37756b79..085cd5c461 100644 --- a/qmake/doc/src/qmake-manual.qdoc +++ b/qmake/doc/src/qmake-manual.qdoc @@ -3535,7 +3535,8 @@ \section2 re_escape(string) Returns the \c string with every special regular expression character - escaped with a backslash. This function is a wrapper around QRegExp::escape. + escaped with a backslash. This function is a wrapper around + QRegularExpression::escape. \section2 read_registry(tree, key[, flag]) diff --git a/qmake/generators/makefile.cpp b/qmake/generators/makefile.cpp index dfe9a1c0b9..d38b8ad9f4 100644 --- a/qmake/generators/makefile.cpp +++ b/qmake/generators/makefile.cpp @@ -41,7 +41,6 @@ #include #include #include -#include #if defined(Q_OS_UNIX) #include diff --git a/qmake/main.cpp b/qmake/main.cpp index 3cd79145e5..36ba631127 100644 --- a/qmake/main.cpp +++ b/qmake/main.cpp @@ -110,7 +110,7 @@ static int doSed(int argc, char **argv) && (c == QLatin1Char('+') || c == QLatin1Char('?') || c == QLatin1Char('|') || c == QLatin1Char('{') || c == QLatin1Char('}') || c == QLatin1Char('(') || c == QLatin1Char(')'))) { - // translate sed rx to QRegExp + // translate sed rx to QRegularExpression escaped ^= 1; } if (escaped) { diff --git a/qmake/qmake.pro b/qmake/qmake.pro index ee190a820a..98eb3d1f6f 100644 --- a/qmake/qmake.pro +++ b/qmake/qmake.pro @@ -152,7 +152,6 @@ SOURCES += \ qmap.cpp \ qmetatype.cpp \ qnumeric.cpp \ - qregexp.cpp \ qregularexpression.cpp \ qromancalendar.cpp \ qsettings.cpp \ @@ -209,7 +208,6 @@ HEADERS += \ qmap.h \ qmetatype.h \ qnumeric.h \ - qregexp.h \ qregularexpression.h \ qromancalendar_p.h \ qstring.h \ diff --git a/qmake/qmake_pch.h b/qmake/qmake_pch.h index c97c872311..fd8c78d7b6 100644 --- a/qmake/qmake_pch.h +++ b/qmake/qmake_pch.h @@ -52,7 +52,6 @@ #include #include #include -#include //#include //#include "option.h" diff --git a/src/corelib/.prev_CMakeLists.txt b/src/corelib/.prev_CMakeLists.txt index f93c934c85..23b44f739d 100644 --- a/src/corelib/.prev_CMakeLists.txt +++ b/src/corelib/.prev_CMakeLists.txt @@ -145,7 +145,6 @@ qt_add_module(Core text/qlocale.cpp text/qlocale.h text/qlocale_p.h text/qlocale_data_p.h text/qlocale_tools.cpp text/qlocale_tools_p.h - text/qregexp.cpp text/qregexp.h text/qstring.cpp text/qstring.h text/qstring_compat.cpp text/qstringalgorithms.h text/qstringalgorithms_p.h diff --git a/src/corelib/CMakeLists.txt b/src/corelib/CMakeLists.txt index 9deda274b7..84d1a79b9b 100644 --- a/src/corelib/CMakeLists.txt +++ b/src/corelib/CMakeLists.txt @@ -164,7 +164,6 @@ qt_add_module(Core text/qlocale.cpp text/qlocale.h text/qlocale_p.h text/qlocale_data_p.h text/qlocale_tools.cpp text/qlocale_tools_p.h - text/qregexp.cpp text/qregexp.h text/qstring.cpp text/qstring.h text/qstring_compat.cpp text/qstringalgorithms.h text/qstringalgorithms_p.h diff --git a/src/corelib/doc/snippets/code/src_corelib_text_qregexp.cpp b/src/corelib/doc/snippets/code/src_corelib_text_qregexp.cpp deleted file mode 100644 index 8339ea413e..0000000000 --- a/src/corelib/doc/snippets/code/src_corelib_text_qregexp.cpp +++ /dev/null @@ -1,243 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the documentation of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:BSD$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** BSD License Usage -** Alternatively, you may use this file under the terms of the BSD license -** as follows: -** -** "Redistribution and use in source and binary forms, with or without -** modification, are permitted provided that the following conditions are -** met: -** * Redistributions of source code must retain the above copyright -** notice, this list of conditions and the following disclaimer. -** * Redistributions in binary form must reproduce the above copyright -** notice, this list of conditions and the following disclaimer in -** the documentation and/or other materials provided with the -** distribution. -** * Neither the name of The Qt Company Ltd nor the names of its -** contributors may be used to endorse or promote products derived -** from this software without specific prior written permission. -** -** -** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -** "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -** LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -** A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -** OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -//! [0] -QRegExp rx("(\\d+)"); -QString str = "Offsets: 12 14 99 231 7"; -QStringList list; -int pos = 0; - -while ((pos = rx.indexIn(str, pos)) != -1) { - list << rx.cap(1); - pos += rx.matchedLength(); -} -// list: ["12", "14", "99", "231", "7"] -//! [0] - - -//! [1] -QRegExp rx("*.txt"); -rx.setPatternSyntax(QRegExp::Wildcard); -rx.exactMatch("README.txt"); // returns true -rx.exactMatch("welcome.txt.bak"); // returns false -//! [1] - - -//! [2] -QRegExp rx("ro+m"); -rx.setMinimal(true); -//! [2] - - -//! [3] -QRegExp mark("\\b" // word boundary - "[Mm]ark" // the word we want to match - ); -//! [3] - - -//! [4] -QRegExp rx("^\\d\\d?$"); // match integers 0 to 99 -rx.indexIn("123"); // returns -1 (no match) -rx.indexIn("-6"); // returns -1 (no match) -rx.indexIn("6"); // returns 0 (matched at position 0) -//! [4] - - -//! [5] -QRegExp rx("^\\S+$"); // match strings without whitespace -rx.indexIn("Hello world"); // returns -1 (no match) -rx.indexIn("This_is-OK"); // returns 0 (matched at position 0) -//! [5] - - -//! [6] -QRegExp rx("\\b(mail|letter|correspondence)\\b"); -rx.indexIn("I sent you an email"); // returns -1 (no match) -rx.indexIn("Please write the letter"); // returns 17 -//! [6] - - -//! [7] -QString captured = rx.cap(1); // captured == "letter" -//! [7] - - -//! [8] -QRegExp rx("&(?!amp;)"); // match ampersands but not & -QString line1 = "This & that"; -line1.replace(rx, "&"); -// line1 == "This & that" -QString line2 = "His & hers & theirs"; -line2.replace(rx, "&"); -// line2 == "His & hers & theirs" -//! [8] - - -//! [9] -QString str = "One Eric another Eirik, and an Ericsson. " - "How many Eiriks, Eric?"; -QRegExp rx("\\b(Eric|Eirik)\\b"); // match Eric or Eirik -int pos = 0; // where we are in the string -int count = 0; // how many Eric and Eirik's we've counted -while (pos >= 0) { - pos = rx.indexIn(str, pos); - if (pos >= 0) { - ++pos; // move along in str - ++count; // count our Eric or Eirik - } -} -//! [9] - - -//! [10] -str = "The Qt Company Ltd\tqt.io\tFinland"; -QString company, web, country; -rx.setPattern("^([^\t]+)\t([^\t]+)\t([^\t]+)$"); -if (rx.indexIn(str) != -1) { - company = rx.cap(1); - web = rx.cap(2); - country = rx.cap(3); -} -//! [10] - - -//! [11] -QStringList field = str.split("\t"); -//! [11] - - -//! [12] -QRegExp rx("*.html"); -rx.setPatternSyntax(QRegExp::Wildcard); -rx.exactMatch("index.html"); // returns true -rx.exactMatch("default.htm"); // returns false -rx.exactMatch("readme.txt"); // returns false -//! [12] - - -//! [13] -QString str = "offsets: 1.23 .50 71.00 6.00"; -QRegExp rx("\\d*\\.\\d+"); // primitive floating point matching -int count = 0; -int pos = 0; -while ((pos = rx.indexIn(str, pos)) != -1) { - ++count; - pos += rx.matchedLength(); -} -// pos will be 9, 14, 18 and finally 24; count will end up as 4 -//! [13] - - -//! [14] -QRegExp rx("(\\d+)(\\s*)(cm|inch(es)?)"); -int pos = rx.indexIn("Length: 36 inches"); -QStringList list = rx.capturedTexts(); -// list is now ("36 inches", "36", " ", "inches", "es") -//! [14] - - -//! [15] -QRegExp rx("(\\d+)(?:\\s*)(cm|inch(?:es)?)"); -int pos = rx.indexIn("Length: 36 inches"); -QStringList list = rx.capturedTexts(); -// list is now ("36 inches", "36", "inches") -//! [15] - - -//! [16] -QStringList list = rx.capturedTexts(); -QStringList::iterator it = list.begin(); -while (it != list.end()) { - myProcessing(*it); - ++it; -} -//! [16] - - -//! [17] -QRegExp rxlen("(\\d+)(?:\\s*)(cm|inch)"); -int pos = rxlen.indexIn("Length: 189cm"); -if (pos > -1) { - QString value = rxlen.cap(1); // "189" - QString unit = rxlen.cap(2); // "cm" - // ... -} -//! [17] - - -//! [18] -QRegExp rx("/([a-z]+)/([a-z]+)"); -rx.indexIn("Output /dev/null"); // returns 7 (position of /dev/null) -rx.pos(0); // returns 7 (position of /dev/null) -rx.pos(1); // returns 8 (position of dev) -rx.pos(2); // returns 12 (position of null) -//! [18] - - -//! [19] -s1 = QRegExp::escape("bingo"); // s1 == "bingo" -s2 = QRegExp::escape("f(x)"); // s2 == "f\\(x\\)" -//! [19] - - -//! [20] -QRegExp rx("(" + QRegExp::escape(name) + - "|" + QRegExp::escape(alias) + ")"); -//! [20] - -{ -//! [21] -QString p("a .*|pattern"); - -// re matches exactly the pattern string p -QRegularExpression re(QRegularExpression::anchoredPattern(p)); -//! [21] -} diff --git a/src/corelib/text/qregexp.cpp b/src/corelib/text/qregexp.cpp deleted file mode 100644 index d7a2434b52..0000000000 --- a/src/corelib/text/qregexp.cpp +++ /dev/null @@ -1,5039 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include "qregexp.h" - -#include "qalgorithms.h" -#include "qbitarray.h" -#include "qcache.h" -#include "qdatastream.h" -#include "qdebug.h" -#include "qhashfunctions.h" -#include "qlist.h" -#include "qmap.h" -#include "qmutex.h" -#include "qstring.h" -#include "qstringlist.h" -#include "qstringmatcher.h" -#include "private/qlocking_p.h" - -#include -#include - -QT_BEGIN_NAMESPACE - -// error strings for the regexp parser -#define RXERR_OK QT_TRANSLATE_NOOP("QRegExp", "no error occurred") -#define RXERR_DISABLED QT_TRANSLATE_NOOP("QRegExp", "disabled feature used") -#define RXERR_CHARCLASS QT_TRANSLATE_NOOP("QRegExp", "bad char class syntax") -#define RXERR_LOOKAHEAD QT_TRANSLATE_NOOP("QRegExp", "bad lookahead syntax") -#define RXERR_LOOKBEHIND QT_TRANSLATE_NOOP("QRegExp", "lookbehinds not supported, see QTBUG-2371") -#define RXERR_REPETITION QT_TRANSLATE_NOOP("QRegExp", "bad repetition syntax") -#define RXERR_OCTAL QT_TRANSLATE_NOOP("QRegExp", "invalid octal value") -#define RXERR_LEFTDELIM QT_TRANSLATE_NOOP("QRegExp", "missing left delim") -#define RXERR_END QT_TRANSLATE_NOOP("QRegExp", "unexpected end") -#define RXERR_LIMIT QT_TRANSLATE_NOOP("QRegExp", "met internal limit") -#define RXERR_INTERVAL QT_TRANSLATE_NOOP("QRegExp", "invalid interval") -#define RXERR_CATEGORY QT_TRANSLATE_NOOP("QRegExp", "invalid category") - -/*! - \class QRegExp - \inmodule QtCore - \obsolete Use QRegularExpression instead - \reentrant - \brief The QRegExp class provides pattern matching using regular expressions. - - \ingroup tools - \ingroup shared - - \keyword regular expression - - This class is deprecated in Qt 6. Please use QRegularExpression instead - for all new code. For guidelines on porting old code from QRegExp to - QRegularExpression, see {Porting to QRegularExpression} - - A regular expression, or "regexp", is a pattern for matching - substrings in a text. This is useful in many contexts, e.g., - - \table - \row \li Validation - \li A regexp can test whether a substring meets some criteria, - e.g. is an integer or contains no whitespace. - \row \li Searching - \li A regexp provides more powerful pattern matching than - simple substring matching, e.g., match one of the words - \e{mail}, \e{letter} or \e{correspondence}, but none of the - words \e{email}, \e{mailman}, \e{mailer}, \e{letterbox}, etc. - \row \li Search and Replace - \li A regexp can replace all occurrences of a substring with a - different substring, e.g., replace all occurrences of \e{&} - with \e{\&} except where the \e{&} is already followed by - an \e{amp;}. - \row \li String Splitting - \li A regexp can be used to identify where a string should be - split apart, e.g. splitting tab-delimited strings. - \endtable - - A brief introduction to regexps is presented, a description of - Qt's regexp language, some examples, and the function - documentation itself. QRegExp is modeled on Perl's regexp - language. It fully supports Unicode. QRegExp can also be used in a - simpler, \e{wildcard mode} that is similar to the functionality - found in command shells. The syntax rules used by QRegExp can be - changed with setPatternSyntax(). In particular, the pattern syntax - can be set to QRegExp::FixedString, which means the pattern to be - matched is interpreted as a plain string, i.e., special characters - (e.g., backslash) are not escaped. - - A good text on regexps is \e {Mastering Regular Expressions} - (Third Edition) by Jeffrey E. F. Friedl, ISBN 0-596-52812-4. - - \note In Qt 5, the new QRegularExpression class provides a Perl - compatible implementation of regular expressions and is recommended - in place of QRegExp. - - \tableofcontents - - \section1 Introduction - - Regexps are built up from expressions, quantifiers, and - assertions. The simplest expression is a character, e.g. \b{x} - or \b{5}. An expression can also be a set of characters - enclosed in square brackets. \b{[ABCD]} will match an \b{A} - or a \b{B} or a \b{C} or a \b{D}. We can write this same - expression as \b{[A-D]}, and an expression to match any - capital letter in the English alphabet is written as - \b{[A-Z]}. - - A quantifier specifies the number of occurrences of an expression - that must be matched. \b{x{1,1}} means match one and only one - \b{x}. \b{x{1,5}} means match a sequence of \b{x} - characters that contains at least one \b{x} but no more than - five. - - Note that in general regexps cannot be used to check for balanced - brackets or tags. For example, a regexp can be written to match an - opening html \c{} and its closing \c{}, if the \c{} tags - are not nested, but if the \c{} tags are nested, that same - regexp will match an opening \c{} tag with the wrong closing - \c{}. For the fragment \c{bold bolder}, the - first \c{} would be matched with the first \c{}, which is - not correct. However, it is possible to write a regexp that will - match nested brackets or tags correctly, but only if the number of - nesting levels is fixed and known. If the number of nesting levels - is not fixed and known, it is impossible to write a regexp that - will not fail. - - Suppose we want a regexp to match integers in the range 0 to 99. - At least one digit is required, so we start with the expression - \b{[0-9]{1,1}}, which matches a single digit exactly once. This - regexp matches integers in the range 0 to 9. To match integers up - to 99, increase the maximum number of occurrences to 2, so the - regexp becomes \b{[0-9]{1,2}}. This regexp satisfies the - original requirement to match integers from 0 to 99, but it will - also match integers that occur in the middle of strings. If we - want the matched integer to be the whole string, we must use the - anchor assertions, \b{^} (caret) and \b{$} (dollar). When - \b{^} is the first character in a regexp, it means the regexp - must match from the beginning of the string. When \b{$} is the - last character of the regexp, it means the regexp must match to - the end of the string. The regexp becomes \b{^[0-9]{1,2}$}. - Note that assertions, e.g. \b{^} and \b{$}, do not match - characters but locations in the string. - - If you have seen regexps described elsewhere, they may have looked - different from the ones shown here. This is because some sets of - characters and some quantifiers are so common that they have been - given special symbols to represent them. \b{[0-9]} can be - replaced with the symbol \b{\\d}. The quantifier to match - exactly one occurrence, \b{{1,1}}, can be replaced with the - expression itself, i.e. \b{x{1,1}} is the same as \b{x}. So - our 0 to 99 matcher could be written as \b{^\\d{1,2}$}. It can - also be written \b{^\\d\\d{0,1}$}, i.e. \e{From the start of - the string, match a digit, followed immediately by 0 or 1 digits}. - In practice, it would be written as \b{^\\d\\d?$}. The \b{?} - is shorthand for the quantifier \b{{0,1}}, i.e. 0 or 1 - occurrences. \b{?} makes an expression optional. The regexp - \b{^\\d\\d?$} means \e{From the beginning of the string, match - one digit, followed immediately by 0 or 1 more digit, followed - immediately by end of string}. - - To write a regexp that matches one of the words 'mail' \e or - 'letter' \e or 'correspondence' but does not match words that - contain these words, e.g., 'email', 'mailman', 'mailer', and - 'letterbox', start with a regexp that matches 'mail'. Expressed - fully, the regexp is \b{m{1,1}a{1,1}i{1,1}l{1,1}}, but because - a character expression is automatically quantified by - \b{{1,1}}, we can simplify the regexp to \b{mail}, i.e., an - 'm' followed by an 'a' followed by an 'i' followed by an 'l'. Now - we can use the vertical bar \b{|}, which means \b{or}, to - include the other two words, so our regexp for matching any of the - three words becomes \b{mail|letter|correspondence}. Match - 'mail' \b{or} 'letter' \b{or} 'correspondence'. While this - regexp will match one of the three words we want to match, it will - also match words we don't want to match, e.g., 'email'. To - prevent the regexp from matching unwanted words, we must tell it - to begin and end the match at word boundaries. First we enclose - our regexp in parentheses, \b{(mail|letter|correspondence)}. - Parentheses group expressions together, and they identify a part - of the regexp that we wish to \l{capturing text}{capture}. - Enclosing the expression in parentheses allows us to use it as a - component in more complex regexps. It also allows us to examine - which of the three words was actually matched. To force the match - to begin and end on word boundaries, we enclose the regexp in - \b{\\b} \e{word boundary} assertions: - \b{\\b(mail|letter|correspondence)\\b}. Now the regexp means: - \e{Match a word boundary, followed by the regexp in parentheses, - followed by a word boundary}. The \b{\\b} assertion matches a - \e position in the regexp, not a \e character. A word boundary is - any non-word character, e.g., a space, newline, or the beginning - or ending of a string. - - If we want to replace ampersand characters with the HTML entity - \b{\&}, the regexp to match is simply \b{\&}. But this - regexp will also match ampersands that have already been converted - to HTML entities. We want to replace only ampersands that are not - already followed by \b{amp;}. For this, we need the negative - lookahead assertion, \b{(?!}__\b{)}. The regexp can then be - written as \b{\&(?!amp;)}, i.e. \e{Match an ampersand that is} - \b{not} \e{followed by} \b{amp;}. - - If we want to count all the occurrences of 'Eric' and 'Eirik' in a - string, two valid solutions are \b{\\b(Eric|Eirik)\\b} and - \b{\\bEi?ri[ck]\\b}. The word boundary assertion '\\b' is - required to avoid matching words that contain either name, - e.g. 'Ericsson'. Note that the second regexp matches more - spellings than we want: 'Eric', 'Erik', 'Eiric' and 'Eirik'. - - Some of the examples discussed above are implemented in the - \l{#code-examples}{code examples} section. - - \target characters-and-abbreviations-for-sets-of-characters - \section1 Characters and Abbreviations for Sets of Characters - - \table - \header \li Element \li Meaning - \row \li \b{c} - \li A character represents itself unless it has a special - regexp meaning. e.g. \b{c} matches the character \e c. - \row \li \b{\\c} - \li A character that follows a backslash matches the character - itself, except as specified below. e.g., To match a literal - caret at the beginning of a string, write \b{\\^}. - \row \li \b{\\a} - \li Matches the ASCII bell (BEL, 0x07). - \row \li \b{\\f} - \li Matches the ASCII form feed (FF, 0x0C). - \row \li \b{\\n} - \li Matches the ASCII line feed (LF, 0x0A, Unix newline). - \row \li \b{\\r} - \li Matches the ASCII carriage return (CR, 0x0D). - \row \li \b{\\t} - \li Matches the ASCII horizontal tab (HT, 0x09). - \row \li \b{\\v} - \li Matches the ASCII vertical tab (VT, 0x0B). - \row \li \b{\\x\e{hhhh}} - \li Matches the Unicode character corresponding to the - hexadecimal number \e{hhhh} (between 0x0000 and 0xFFFF). - \row \li \b{\\0\e{ooo}} (i.e., \\zero \e{ooo}) - \li matches the ASCII/Latin1 character for the octal number - \e{ooo} (between 0 and 0377). - \row \li \b{. (dot)} - \li Matches any character (including newline). - \row \li \b{\\d} - \li Matches a digit (QChar::isDigit()). - \row \li \b{\\D} - \li Matches a non-digit. - \row \li \b{\\s} - \li Matches a whitespace character (QChar::isSpace()). - \row \li \b{\\S} - \li Matches a non-whitespace character. - \row \li \b{\\w} - \li Matches a word character (QChar::isLetterOrNumber(), QChar::isMark(), or '_'). - \row \li \b{\\W} - \li Matches a non-word character. - \row \li \b{\\\e{n}} - \li The \e{n}-th backreference, e.g. \\1, \\2, etc. - \endtable - - \b{Note:} The C++ compiler transforms backslashes in strings. - To include a \b{\\} in a regexp, enter it twice, i.e. \c{\\}. - To match the backslash character itself, enter it four times, i.e. - \c{\\\\}. - - \target sets-of-characters - \section1 Sets of Characters - - Square brackets mean match any character contained in the square - brackets. The character set abbreviations described above can - appear in a character set in square brackets. Except for the - character set abbreviations and the following two exceptions, - characters do not have special meanings in square brackets. - - \table - \row \li \b{^} - - \li The caret negates the character set if it occurs as the - first character (i.e. immediately after the opening square - bracket). \b{[abc]} matches 'a' or 'b' or 'c', but - \b{[^abc]} matches anything \e but 'a' or 'b' or 'c'. - - \row \li \b{-} - - \li The dash indicates a range of characters. \b{[W-Z]} - matches 'W' or 'X' or 'Y' or 'Z'. - - \endtable - - Using the predefined character set abbreviations is more portable - than using character ranges across platforms and languages. For - example, \b{[0-9]} matches a digit in Western alphabets but - \b{\\d} matches a digit in \e any alphabet. - - Note: In other regexp documentation, sets of characters are often - called "character classes". - - \target quantifiers - \section1 Quantifiers - - By default, an expression is automatically quantified by - \b{{1,1}}, i.e. it should occur exactly once. In the following - list, \b{\e {E}} stands for expression. An expression is a - character, or an abbreviation for a set of characters, or a set of - characters in square brackets, or an expression in parentheses. - - \table - \row \li \b{\e {E}?} - - \li Matches zero or one occurrences of \e E. This quantifier - means \e{The previous expression is optional}, because it - will match whether or not the expression is found. \b{\e - {E}?} is the same as \b{\e {E}{0,1}}. e.g., \b{dents?} - matches 'dent' or 'dents'. - - \row \li \b{\e {E}+} - - \li Matches one or more occurrences of \e E. \b{\e {E}+} is - the same as \b{\e {E}{1,}}. e.g., \b{0+} matches '0', - '00', '000', etc. - - \row \li \b{\e {E}*} - - \li Matches zero or more occurrences of \e E. It is the same - as \b{\e {E}{0,}}. The \b{*} quantifier is often used - in error where \b{+} should be used. For example, if - \b{\\s*$} is used in an expression to match strings that - end in whitespace, it will match every string because - \b{\\s*$} means \e{Match zero or more whitespaces followed - by end of string}. The correct regexp to match strings that - have at least one trailing whitespace character is - \b{\\s+$}. - - \row \li \b{\e {E}{n}} - - \li Matches exactly \e n occurrences of \e E. \b{\e {E}{n}} - is the same as repeating \e E \e n times. For example, - \b{x{5}} is the same as \b{xxxxx}. It is also the same - as \b{\e {E}{n,n}}, e.g. \b{x{5,5}}. - - \row \li \b{\e {E}{n,}} - \li Matches at least \e n occurrences of \e E. - - \row \li \b{\e {E}{,m}} - \li Matches at most \e m occurrences of \e E. \b{\e {E}{,m}} - is the same as \b{\e {E}{0,m}}. - - \row \li \b{\e {E}{n,m}} - \li Matches at least \e n and at most \e m occurrences of \e E. - \endtable - - To apply a quantifier to more than just the preceding character, - use parentheses to group characters together in an expression. For - example, \b{tag+} matches a 't' followed by an 'a' followed by - at least one 'g', whereas \b{(tag)+} matches at least one - occurrence of 'tag'. - - Note: Quantifiers are normally "greedy". They always match as much - text as they can. For example, \b{0+} matches the first zero it - finds and all the consecutive zeros after the first zero. Applied - to '20005', it matches '2\underline{000}5'. Quantifiers can be made - non-greedy, see setMinimal(). - - \target capturing parentheses - \target backreferences - \section1 Capturing Text - - Parentheses allow us to group elements together so that we can - quantify and capture them. For example if we have the expression - \b{mail|letter|correspondence} that matches a string we know - that \e one of the words matched but not which one. Using - parentheses allows us to "capture" whatever is matched within - their bounds, so if we used \b{(mail|letter|correspondence)} - and matched this regexp against the string "I sent you some email" - we can use the cap() or capturedTexts() functions to extract the - matched characters, in this case 'mail'. - - We can use captured text within the regexp itself. To refer to the - captured text we use \e backreferences which are indexed from 1, - the same as for cap(). For example we could search for duplicate - words in a string using \b{\\b(\\w+)\\W+\\1\\b} which means match a - word boundary followed by one or more word characters followed by - one or more non-word characters followed by the same text as the - first parenthesized expression followed by a word boundary. - - If we want to use parentheses purely for grouping and not for - capturing we can use the non-capturing syntax, e.g. - \b{(?:green|blue)}. Non-capturing parentheses begin '(?:' and - end ')'. In this example we match either 'green' or 'blue' but we - do not capture the match so we only know whether or not we matched - but not which color we actually found. Using non-capturing - parentheses is more efficient than using capturing parentheses - since the regexp engine has to do less book-keeping. - - Both capturing and non-capturing parentheses may be nested. - - \target greedy quantifiers - - For historical reasons, quantifiers (e.g. \b{*}) that apply to - capturing parentheses are more "greedy" than other quantifiers. - For example, \b{a*(a*)} will match "aaa" with cap(1) == "aaa". - This behavior is different from what other regexp engines do - (notably, Perl). To obtain a more intuitive capturing behavior, - specify QRegExp::RegExp2 to the QRegExp constructor or call - setPatternSyntax(QRegExp::RegExp2). - - \target cap_in_a_loop - - When the number of matches cannot be determined in advance, a - common idiom is to use cap() in a loop. For example: - - \snippet code/src_corelib_text_qregexp.cpp 0 - - \target assertions - \section1 Assertions - - Assertions make some statement about the text at the point where - they occur in the regexp but they do not match any characters. In - the following list \b{\e {E}} stands for any expression. - - \table - \row \li \b{^} - \li The caret signifies the beginning of the string. If you - wish to match a literal \c{^} you must escape it by - writing \c{\\^}. For example, \b{^#include} will only - match strings which \e begin with the characters '#include'. - (When the caret is the first character of a character set it - has a special meaning, see \l{#sets-of-characters}{Sets of Characters}.) - - \row \li \b{$} - \li The dollar signifies the end of the string. For example - \b{\\d\\s*$} will match strings which end with a digit - optionally followed by whitespace. If you wish to match a - literal \c{$} you must escape it by writing - \c{\\$}. - - \row \li \b{\\b} - \li A word boundary. For example the regexp - \b{\\bOK\\b} means match immediately after a word - boundary (e.g. start of string or whitespace) the letter 'O' - then the letter 'K' immediately before another word boundary - (e.g. end of string or whitespace). But note that the - assertion does not actually match any whitespace so if we - write \b{(\\bOK\\b)} and we have a match it will only - contain 'OK' even if the string is "It's \underline{OK} now". - - \row \li \b{\\B} - \li A non-word boundary. This assertion is true wherever - \b{\\b} is false. For example if we searched for - \b{\\Bon\\B} in "Left on" the match would fail (space - and end of string aren't non-word boundaries), but it would - match in "t\underline{on}ne". - - \row \li \b{(?=\e E)} - \li Positive lookahead. This assertion is true if the - expression matches at this point in the regexp. For example, - \b{const(?=\\s+char)} matches 'const' whenever it is - followed by 'char', as in 'static \underline{const} char *'. - (Compare with \b{const\\s+char}, which matches 'static - \underline{const char} *'.) - - \row \li \b{(?!\e E)} - \li Negative lookahead. This assertion is true if the - expression does not match at this point in the regexp. For - example, \b{const(?!\\s+char)} matches 'const' \e except - when it is followed by 'char'. - \endtable - - \target QRegExp wildcard matching - \section1 Wildcard Matching - - Most command shells such as \e bash or \e cmd.exe support "file - globbing", the ability to identify a group of files by using - wildcards. The setPatternSyntax() function is used to switch - between regexp and wildcard mode. Wildcard matching is much - simpler than full regexps and has only four features: - - \table - \row \li \b{c} - \li Any character represents itself apart from those mentioned - below. Thus \b{c} matches the character \e c. - \row \li \b{?} - \li Matches any single character. It is the same as - \b{.} in full regexps. - \row \li \b{*} - \li Matches zero or more of any characters. It is the - same as \b{.*} in full regexps. - \row \li \b{[...]} - \li Sets of characters can be represented in square brackets, - similar to full regexps. Within the character class, like - outside, backslash has no special meaning. - \endtable - - In the mode Wildcard, the wildcard characters cannot be - escaped. In the mode WildcardUnix, the character '\\' escapes the - wildcard. - - For example if we are in wildcard mode and have strings which - contain filenames we could identify HTML files with \b{*.html}. - This will match zero or more characters followed by a dot followed - by 'h', 't', 'm' and 'l'. - - To test a string against a wildcard expression, use exactMatch(). - For example: - - \snippet code/src_corelib_text_qregexp.cpp 1 - - \target perl-users - \section1 Notes for Perl Users - - Most of the character class abbreviations supported by Perl are - supported by QRegExp, see \l{#characters-and-abbreviations-for-sets-of-characters} - {characters and abbreviations for sets of characters}. - - In QRegExp, apart from within character classes, \c{^} always - signifies the start of the string, so carets must always be - escaped unless used for that purpose. In Perl the meaning of caret - varies automagically depending on where it occurs so escaping it - is rarely necessary. The same applies to \c{$} which in - QRegExp always signifies the end of the string. - - QRegExp's quantifiers are the same as Perl's greedy quantifiers - (but see the \l{greedy quantifiers}{note above}). Non-greedy - matching cannot be applied to individual quantifiers, but can be - applied to all the quantifiers in the pattern. For example, to - match the Perl regexp \b{ro+?m} requires: - - \snippet code/src_corelib_text_qregexp.cpp 2 - - The equivalent of Perl's \c{/i} option is - setCaseSensitivity(Qt::CaseInsensitive). - - Perl's \c{/g} option can be emulated using a \l{#cap_in_a_loop}{loop}. - - In QRegExp \b{.} matches any character, therefore all QRegExp - regexps have the equivalent of Perl's \c{/s} option. QRegExp - does not have an equivalent to Perl's \c{/m} option, but this - can be emulated in various ways for example by splitting the input - into lines or by looping with a regexp that searches for newlines. - - Because QRegExp is string oriented, there are no \\A, \\Z, or \\z - assertions. The \\G assertion is not supported but can be emulated - in a loop. - - Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp - equivalents for $`, $' or $+. Perl's capturing variables, $1, $2, - ... correspond to cap(1) or capturedTexts()[1], cap(2) or - capturedTexts()[2], etc. - - To substitute a pattern use QString::replace(). - - Perl's extended \c{/x} syntax is not supported, nor are - directives, e.g. (?i), or regexp comments, e.g. (?#comment). On - the other hand, C++'s rules for literal strings can be used to - achieve the same: - - \snippet code/src_corelib_text_qregexp.cpp 3 - - Both zero-width positive and zero-width negative lookahead - assertions (?=pattern) and (?!pattern) are supported with the same - syntax as Perl. Perl's lookbehind assertions, "independent" - subexpressions and conditional expressions are not supported. - - Non-capturing parentheses are also supported, with the same - (?:pattern) syntax. - - See QString::split() and QStringList::join() for equivalents - to Perl's split and join functions. - - Note: because C++ transforms \\'s they must be written \e twice in - code, e.g. \b{\\b} must be written \b{\\\\b}. - - \target code-examples - \section1 Code Examples - - \snippet code/src_corelib_text_qregexp.cpp 4 - - The third string matches '\underline{6}'. This is a simple validation - regexp for integers in the range 0 to 99. - - \snippet code/src_corelib_text_qregexp.cpp 5 - - The second string matches '\underline{This_is-OK}'. We've used the - character set abbreviation '\\S' (non-whitespace) and the anchors - to match strings which contain no whitespace. - - In the following example we match strings containing 'mail' or - 'letter' or 'correspondence' but only match whole words i.e. not - 'email' - - \snippet code/src_corelib_text_qregexp.cpp 6 - - The second string matches "Please write the \underline{letter}". The - word 'letter' is also captured (because of the parentheses). We - can see what text we've captured like this: - - \snippet code/src_corelib_text_qregexp.cpp 7 - - This will capture the text from the first set of capturing - parentheses (counting capturing left parentheses from left to - right). The parentheses are counted from 1 since cap(0) is the - whole matched regexp (equivalent to '&' in most regexp engines). - - \snippet code/src_corelib_text_qregexp.cpp 8 - - Here we've passed the QRegExp to QString's replace() function to - replace the matched text with new text. - - \snippet code/src_corelib_text_qregexp.cpp 9 - - We've used the indexIn() function to repeatedly match the regexp in - the string. Note that instead of moving forward by one character - at a time \c pos++ we could have written \c {pos += - rx.matchedLength()} to skip over the already matched string. The - count will equal 3, matching 'One \underline{Eric} another - \underline{Eirik}, and an Ericsson. How many Eiriks, \underline{Eric}?'; it - doesn't match 'Ericsson' or 'Eiriks' because they are not bounded - by non-word boundaries. - - One common use of regexps is to split lines of delimited data into - their component fields. - - \snippet code/src_corelib_text_qregexp.cpp 10 - - In this example our input lines have the format company name, web - address and country. Unfortunately the regexp is rather long and - not very versatile -- the code will break if we add any more - fields. A simpler and better solution is to look for the - separator, '\\t' in this case, and take the surrounding text. The - QString::split() function can take a separator string or regexp - as an argument and split a string accordingly. - - \snippet code/src_corelib_text_qregexp.cpp 11 - - Here field[0] is the company, field[1] the web address and so on. - - To imitate the matching of a shell we can use wildcard mode. - - \snippet code/src_corelib_text_qregexp.cpp 12 - - Wildcard matching can be convenient because of its simplicity, but - any wildcard regexp can be defined using full regexps, e.g. - \b{.*\\.html$}. Notice that we can't match both \c .html and \c - .htm files with a wildcard unless we use \b{*.htm*} which will - also match 'test.html.bak'. A full regexp gives us the precision - we need, \b{.*\\.html?$}. - - QRegExp can match case insensitively using setCaseSensitivity(), - and can use non-greedy matching, see setMinimal(). By - default QRegExp uses full regexps but this can be changed with - setPatternSyntax(). Searching can be done forward with indexIn() or backward - with lastIndexIn(). Captured text can be accessed using - capturedTexts() which returns a string list of all captured - strings, or using cap() which returns the captured string for the - given index. The pos() function takes a match index and returns - the position in the string where the match was made (or -1 if - there was no match). - - \sa QString, QStringList, QSortFilterProxyModel, - {tools/regexp}{Regular Expression Example} - - - \section1 Porting to QRegularExpression - - The QRegularExpression class introduced in Qt 5 is a big improvement upon - QRegExp, in terms of APIs offered, supported pattern syntax and speed of - execution. The biggest difference is that QRegularExpression simply holds a - regular expression, and it's \e{not} modified when a match is requested. - Instead, a QRegularExpressionMatch object is returned, in order to check - the result of a match and extract the captured substring. The same applies - with global matching and QRegularExpressionMatchIterator. - - Other differences are outlined below. - - \section2 Different pattern syntax - - Porting a regular expression from QRegExp to QRegularExpression may require - changes to the pattern itself. - - In certain scenarios, QRegExp was too lenient and accepted patterns that - are simply invalid when using QRegularExpression. These are somehow easy - to detect, because the QRegularExpression objects built with these patterns - are not valid (cf. QRegularExpression::isValid()). - - In other cases, a pattern ported from QRegExp to QRegularExpression may - silently change semantics. Therefore, it is necessary to review the - patterns used. The most notable cases of silent incompatibility are: - - \list - - \li Curly braces are needed in order to use a hexadecimal escape like - \c{\xHHHH} with more than 2 digits. A pattern like \c{\x2022} neeeds to - be ported to \c{\x{2022}}, or it will match a space (\c{0x20}) followed - by the string \c{"22"}. In general, it is highly recommended to always use - curly braces with the \c{\x} escape, no matter the amount of digits - specified. - - \li A 0-to-n quantification like \c{{,n}} needs to be ported to \c{{0,n}} to - preserve semantics. Otherwise, a pattern such as \c{\d{,3}} would - actually match a digit followed by the exact string \c{"{,3}"}. - - \li QRegExp by default does Unicode-aware matching, while - QRegularExpression requires a separate option; see below for more details. - - \li c{.} in QRegExp does by default match all characters, including the - newline character. QRegularExpression excludes the newline character by - default. To include the newline character, set the - QRegularExpression::DotMatchesEverythingOption pattern option. - - \endlist - - \section2 Porting from QRegExp::exactMatch() - - QRegExp::exactMatch() in Qt 4 served two purposes: it exactly matched - a regular expression against a subject string, and it implemented partial - matching. - - \section3 Porting from QRegExp's Exact Matching - - Exact matching indicates whether the regular expression matches the entire - subject string. For example, the classes yield on the subject string \c{"abc123"}: - - \table - \header \li \li QRegExp::exactMatch() \li QRegularExpressionMatch::hasMatch() - \row \li \c{"\\d+"} \li \b false \li \b true - \row \li \c{"[a-z]+\\d+"} \li \b true \li \b true - \endtable - - Exact matching is not reflected in QRegularExpression. If you want - to be sure that the subject string matches the regular expression - exactly, you can wrap the pattern using the QRegularExpression::anchoredPattern() - function: - - \snippet code/src_corelib_text_qregexp.cpp 21 - - \section3 Porting from QRegExp's Partial Matching - - When using QRegExp::exactMatch(), if an exact match was not found, one - could still find out how much of the subject string was matched by the - regular expression by calling QRegExp::matchedLength(). If the returned length - was equal to the subject string's length, then one could conclude that a partial - match was found. - - QRegularExpression supports partial matching explicitly by means of the - appropriate MatchType. - - \section2 Global matching - - Due to limitations of the QRegExp API it was impossible to implement global - matching correctly (that is, like Perl does). In particular, patterns that - can match 0 characters (like \c{"a*"}) are problematic. - - QRegularExpression::globalMatch() implements Perl global match correctly, and - the returned iterator can be used to examine each result. - - \section2 Unicode properties support - - When using QRegExp, character classes such as \c{\w}, \c{\d}, etc. match - characters with the corresponding Unicode property: for instance, \c{\d} - matches any character with the Unicode Nd (decimal digit) property. - - Those character classes only match ASCII characters by default when using - QRegularExpression: for instance, \c{\d} matches exactly a character in the - \c{0-9} ASCII range. It is possible to change this behavior by using the - UseUnicodePropertiesOption pattern option. - - \section2 Wildcard matching - - There is no direct way to do wildcard matching in QRegularExpression. - However, the wildcardToRegularExpression method is provided to translate - glob patterns into a Perl-compatible regular expression that can be used - for that purpose. - - \section2 Other pattern syntaxes - - QRegularExpression supports only Perl-compatible regular expressions. - - \section2 Minimal matching - - QRegExp::setMinimal() implemented minimal matching by simply reversing the - greediness of the quantifiers (QRegExp did not support lazy quantifiers, - like \c{*?}, \c{+?}, etc.). QRegularExpression instead does support greedy, - lazy and possessive quantifiers. The InvertedGreedinessOption - pattern option can be useful to emulate the effects of QRegExp::setMinimal(): - if enabled, it inverts the greediness of quantifiers (greedy ones become - lazy and vice versa). - - \section2 Caret modes - - The AnchorAtOffsetMatchOption match option can be used to emulate the - QRegExp::CaretAtOffset behavior. There is no equivalent for the other - QRegExp::CaretMode modes. -*/ - -#if defined(Q_OS_VXWORKS) && defined(EOS) -# undef EOS -#endif - -const int NumBadChars = 64; -#define BadChar(ch) ((ch).unicode() % NumBadChars) - -const int NoOccurrence = INT_MAX; -const int EmptyCapture = INT_MAX; -const int InftyLen = INT_MAX; -const int InftyRep = 1025; -const int EOS = -1; - -static bool isWord(QChar ch) -{ - return ch.isLetterOrNumber() || ch.isMark() || ch == QLatin1Char('_'); -} - -/* - Merges two vectors of ints and puts the result into the first - one. -*/ -static void mergeInto(QList *a, const QList &b) -{ - int asize = a->size(); - int bsize = b.size(); - if (asize == 0) { - *a = b; -#ifndef QT_NO_REGEXP_OPTIM - } else if (bsize == 1 && a->at(asize - 1) < b.at(0)) { - a->resize(asize + 1); - (*a)[asize] = b.at(0); -#endif - } else if (bsize >= 1) { - int csize = asize + bsize; - QList c(csize); - int i = 0, j = 0, k = 0; - while (i < asize) { - if (j < bsize) { - if (a->at(i) == b.at(j)) { - ++i; - --csize; - } else if (a->at(i) < b.at(j)) { - c[k++] = a->at(i++); - } else { - c[k++] = b.at(j++); - } - } else { - memcpy(c.data() + k, a->constData() + i, (asize - i) * sizeof(int)); - break; - } - } - c.resize(csize); - if (j < bsize) - memcpy(c.data() + k, b.constData() + j, (bsize - j) * sizeof(int)); - *a = c; - } -} - -#ifndef QT_NO_REGEXP_WILDCARD -/* - Translates a wildcard pattern to an equivalent regular expression - pattern (e.g., *.cpp to .*\.cpp). - - If enableEscaping is true, it is possible to escape the wildcard - characters with \ -*/ -static QString wc2rx(const QString &wc_str, const bool enableEscaping) -{ - const int wclen = wc_str.length(); - QString rx; - int i = 0; - bool isEscaping = false; // the previous character is '\' - const QChar *wc = wc_str.unicode(); - - while (i < wclen) { - const QChar c = wc[i++]; - switch (c.unicode()) { - case '\\': - if (enableEscaping) { - if (isEscaping) { - rx += QLatin1String("\\\\"); - } // we insert the \\ later if necessary - if (i == wclen) { // the end - rx += QLatin1String("\\\\"); - } - } else { - rx += QLatin1String("\\\\"); - } - isEscaping = true; - break; - case '*': - if (isEscaping) { - rx += QLatin1String("\\*"); - isEscaping = false; - } else { - rx += QLatin1String(".*"); - } - break; - case '?': - if (isEscaping) { - rx += QLatin1String("\\?"); - isEscaping = false; - } else { - rx += QLatin1Char('.'); - } - - break; - case '$': - case '(': - case ')': - case '+': - case '.': - case '^': - case '{': - case '|': - case '}': - if (isEscaping) { - isEscaping = false; - rx += QLatin1String("\\\\"); - } - rx += QLatin1Char('\\'); - rx += c; - break; - case '[': - if (isEscaping) { - isEscaping = false; - rx += QLatin1String("\\["); - } else { - rx += c; - if (wc[i] == QLatin1Char('^')) - rx += wc[i++]; - if (i < wclen) { - if (wc[i] == QLatin1Char(']')) - rx += wc[i++]; - while (i < wclen && wc[i] != QLatin1Char(']')) { - if (wc[i] == QLatin1Char('\\')) - rx += QLatin1Char('\\'); - rx += wc[i++]; - } - } - } - break; - - case ']': - if(isEscaping){ - isEscaping = false; - rx += QLatin1String("\\"); - } - rx += c; - break; - - default: - if(isEscaping){ - isEscaping = false; - rx += QLatin1String("\\\\"); - } - rx += c; - } - } - return rx; -} -#endif - -static int caretIndex(int offset, QRegExp::CaretMode caretMode) -{ - if (caretMode == QRegExp::CaretAtZero) { - return 0; - } else if (caretMode == QRegExp::CaretAtOffset) { - return offset; - } else { // QRegExp::CaretWontMatch - return -1; - } -} - -/* - The QRegExpEngineKey struct uniquely identifies an engine. -*/ -struct QRegExpEngineKey -{ - QString pattern; - QRegExp::PatternSyntax patternSyntax; - Qt::CaseSensitivity cs; - - inline QRegExpEngineKey(const QString &pattern, QRegExp::PatternSyntax patternSyntax, - Qt::CaseSensitivity cs) - : pattern(pattern), patternSyntax(patternSyntax), cs(cs) {} - - inline void clear() { - pattern.clear(); - patternSyntax = QRegExp::RegExp; - cs = Qt::CaseSensitive; - } -}; - -static bool operator==(const QRegExpEngineKey &key1, const QRegExpEngineKey &key2) -{ - return key1.pattern == key2.pattern && key1.patternSyntax == key2.patternSyntax - && key1.cs == key2.cs; -} - -static size_t qHash(const QRegExpEngineKey &key, size_t seed = 0) noexcept -{ - return qHashMulti(seed, key.pattern, key.patternSyntax, key.cs); -} - -class QRegExpEngine; - -/* - This is the engine state during matching. -*/ -struct QRegExpMatchState -{ - const QChar *in; // a pointer to the input string data - int pos; // the current position in the string - int caretPos; - int len; // the length of the input string - bool minimal; // minimal matching? - int *bigArray; // big array holding the data for the next pointers - int *inNextStack; // is state is nextStack? - int *curStack; // stack of current states - int *nextStack; // stack of next states - int *curCapBegin; // start of current states' captures - int *nextCapBegin; // start of next states' captures - int *curCapEnd; // end of current states' captures - int *nextCapEnd; // end of next states' captures - int *tempCapBegin; // start of temporary captures - int *tempCapEnd; // end of temporary captures - int *capBegin; // start of captures for a next state - int *capEnd; // end of captures for a next state - int *slideTab; // bump-along slide table for bad-character heuristic - int *captured; // what match() returned last - int slideTabSize; // size of slide table - int capturedSize; -#ifndef QT_NO_REGEXP_BACKREF - QList> sleeping; // list of back-reference sleepers -#endif - int matchLen; // length of match - int oneTestMatchedLen; // length of partial match - - const QRegExpEngine *eng; - - inline QRegExpMatchState() : bigArray(nullptr), captured(nullptr) {} - inline ~QRegExpMatchState() { free(bigArray); } - - void drain() { free(bigArray); bigArray = nullptr; captured = nullptr; } // to save memory - void prepareForMatch(QRegExpEngine *eng); - void match(const QChar *str, int len, int pos, bool minimal, - bool oneTest, int caretIndex); - bool matchHere(); - bool testAnchor(int i, int a, const int *capBegin); -}; - -/* - The struct QRegExpAutomatonState represents one state in a modified NFA. The - input characters matched are stored in the state instead of on - the transitions, something possible for an automaton - constructed from a regular expression. -*/ -struct QRegExpAutomatonState -{ -#ifndef QT_NO_REGEXP_CAPTURE - int atom; // which atom does this state belong to? -#endif - int match; // what does it match? (see CharClassBit and BackRefBit) - QList outs; // out-transitions - QMap reenter; // atoms reentered when transiting out - QMap anchors; // anchors met when transiting out - - inline QRegExpAutomatonState() { } -#ifndef QT_NO_REGEXP_CAPTURE - inline QRegExpAutomatonState(int a, int m) - : atom(a), match(m) { } -#else - inline QRegExpAutomatonState(int m) - : match(m) { } -#endif -}; - -Q_DECLARE_TYPEINFO(QRegExpAutomatonState, Q_MOVABLE_TYPE); - -/* - The struct QRegExpCharClassRange represents a range of characters (e.g., - [0-9] denotes range 48 to 57). -*/ -struct QRegExpCharClassRange -{ - ushort from; // 48 - ushort len; // 10 -}; - -Q_DECLARE_TYPEINFO(QRegExpCharClassRange, Q_PRIMITIVE_TYPE); - -#ifndef QT_NO_REGEXP_CAPTURE -/* - The struct QRegExpAtom represents one node in the hierarchy of regular - expression atoms. -*/ -struct QRegExpAtom -{ - enum { NoCapture = -1, OfficialCapture = -2, UnofficialCapture = -3 }; - - int parent; // index of parent in array of atoms - int capture; // index of capture, from 1 to ncap - 1 -}; - -Q_DECLARE_TYPEINFO(QRegExpAtom, Q_PRIMITIVE_TYPE); -#endif - -struct QRegExpLookahead; - -#ifndef QT_NO_REGEXP_ANCHOR_ALT -/* - The struct QRegExpAnchorAlternation represents a pair of anchors with - OR semantics. -*/ -struct QRegExpAnchorAlternation -{ - int a; // this anchor... - int b; // ...or this one -}; - -Q_DECLARE_TYPEINFO(QRegExpAnchorAlternation, Q_PRIMITIVE_TYPE); -#endif - -#ifndef QT_NO_REGEXP_CCLASS - -#define FLAG(x) (1 << (x)) -/* - The class QRegExpCharClass represents a set of characters, such as can - be found in regular expressions (e.g., [a-z] denotes the set - {a, b, ..., z}). -*/ -class QRegExpCharClass -{ -public: - QRegExpCharClass(); - - void clear(); - bool negative() const { return n; } - void setNegative(bool negative); - void addCategories(uint cats); - void addRange(ushort from, ushort to); - void addSingleton(ushort ch) { addRange(ch, ch); } - - bool in(QChar ch) const; -#ifndef QT_NO_REGEXP_OPTIM - const QList &firstOccurrence() const { return occ1; } -#endif - -#if defined(QT_DEBUG) - void dump() const; -#endif - -private: - QList r; // character ranges -#ifndef QT_NO_REGEXP_OPTIM - QList occ1; // first-occurrence array -#endif - uint c; // character classes - bool n; // negative? -}; -#else -struct QRegExpCharClass -{ - int dummy; - -#ifndef QT_NO_REGEXP_OPTIM - QRegExpCharClass() { occ1.fill(0, NumBadChars); } - - const QList &firstOccurrence() const { return occ1; } - QList occ1; -#endif -}; -#endif - -Q_DECLARE_TYPEINFO(QRegExpCharClass, Q_MOVABLE_TYPE); - -/* - The QRegExpEngine class encapsulates a modified nondeterministic - finite automaton (NFA). -*/ -class QRegExpEngine -{ -public: - QRegExpEngine(Qt::CaseSensitivity cs, bool greedyQuantifiers) - : cs(cs), greedyQuantifiers(greedyQuantifiers) { setup(); } - - QRegExpEngine(const QRegExpEngineKey &key); - ~QRegExpEngine(); - - bool isValid() const { return valid; } - const QString &errorString() const { return yyError; } - int captureCount() const { return officialncap; } - - int createState(QChar ch); - int createState(const QRegExpCharClass &cc); -#ifndef QT_NO_REGEXP_BACKREF - int createState(int bref); -#endif - - void addCatTransitions(const QList &from, const QList &to); -#ifndef QT_NO_REGEXP_CAPTURE - void addPlusTransitions(const QList &from, const QList &to, int atom); -#endif - -#ifndef QT_NO_REGEXP_ANCHOR_ALT - int anchorAlternation(int a, int b); - int anchorConcatenation(int a, int b); -#else - int anchorAlternation(int a, int b) { return a & b; } - int anchorConcatenation(int a, int b) { return a | b; } -#endif - void addAnchors(int from, int to, int a); - -#ifndef QT_NO_REGEXP_OPTIM - void heuristicallyChooseHeuristic(); -#endif - -#if defined(QT_DEBUG) - void dump() const; -#endif - - QAtomicInt ref; - -private: - enum { CharClassBit = 0x10000, BackRefBit = 0x20000 }; - enum { InitialState = 0, FinalState = 1 }; - - void setup(); - int setupState(int match); - - /* - Let's hope that 13 lookaheads and 14 back-references are - enough. - */ - enum { MaxLookaheads = 13, MaxBackRefs = 14 }; - enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, Anchor_Word = 0x00000004, - Anchor_NonWord = 0x00000008, Anchor_FirstLookahead = 0x00000010, - Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads, - Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1, - Anchor_Alternation = unsigned(Anchor_BackRef1Empty) << MaxBackRefs, - - Anchor_LookaheadMask = (Anchor_FirstLookahead - 1) ^ - ((Anchor_FirstLookahead << MaxLookaheads) - 1) }; -#ifndef QT_NO_REGEXP_CAPTURE - int startAtom(bool officialCapture); - void finishAtom(int atom, bool needCapture); -#endif - -#ifndef QT_NO_REGEXP_LOOKAHEAD - int addLookahead(QRegExpEngine *eng, bool negative); -#endif - -#ifndef QT_NO_REGEXP_OPTIM - bool goodStringMatch(QRegExpMatchState &matchState) const; - bool badCharMatch(QRegExpMatchState &matchState) const; -#else - bool bruteMatch(QRegExpMatchState &matchState) const; -#endif - - QList s; // array of states -#ifndef QT_NO_REGEXP_CAPTURE - QList f; // atom hierarchy - int nf; // number of atoms - int cf; // current atom - QList captureForOfficialCapture; -#endif - int officialncap; // number of captures, seen from the outside - int ncap; // number of captures, seen from the inside -#ifndef QT_NO_REGEXP_CCLASS - QList cl; // array of character classes -#endif -#ifndef QT_NO_REGEXP_LOOKAHEAD - QList ahead; // array of lookaheads -#endif -#ifndef QT_NO_REGEXP_ANCHOR_ALT - QList aa; // array of (a, b) pairs of anchors -#endif -#ifndef QT_NO_REGEXP_OPTIM - bool caretAnchored; // does the regexp start with ^? - bool trivial; // is the good-string all that needs to match? -#endif - bool valid; // is the regular expression valid? - Qt::CaseSensitivity cs; // case sensitive? - bool greedyQuantifiers; // RegExp2? - bool xmlSchemaExtensions; -#ifndef QT_NO_REGEXP_BACKREF - int nbrefs; // number of back-references -#endif - -#ifndef QT_NO_REGEXP_OPTIM - bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch - - int goodEarlyStart; // the index where goodStr can first occur in a match - int goodLateStart; // the index where goodStr can last occur in a match - QString goodStr; // the string that any match has to contain - - int minl; // the minimum length of a match - QList occ1; // first-occurrence array -#endif - - /* - The class Box is an abstraction for a regular expression - fragment. It can also be seen as one node in the syntax tree of - a regular expression with synthetized attributes. - - Its interface is ugly for performance reasons. - */ - class Box - { - public: - Box(QRegExpEngine *engine); - Box(const Box &b) { operator=(b); } - - Box &operator=(const Box &b); - - void clear() { operator=(Box(eng)); } - void set(QChar ch); - void set(const QRegExpCharClass &cc); -#ifndef QT_NO_REGEXP_BACKREF - void set(int bref); -#endif - - void cat(const Box &b); - void orx(const Box &b); - void plus(int atom); - void opt(); - void catAnchor(int a); -#ifndef QT_NO_REGEXP_OPTIM - void setupHeuristics(); -#endif - -#if defined(QT_DEBUG) - void dump() const; -#endif - - private: - void addAnchorsToEngine(const Box &to) const; - - QRegExpEngine *eng; // the automaton under construction - QList ls; // the left states (firstpos) - QList rs; // the right states (lastpos) - QMap lanchors; // the left anchors - QMap ranchors; // the right anchors - int skipanchors; // the anchors to match if the box is skipped - -#ifndef QT_NO_REGEXP_OPTIM - int earlyStart; // the index where str can first occur - int lateStart; // the index where str can last occur - QString str; // a string that has to occur in any match - QString leftStr; // a string occurring at the left of this box - QString rightStr; // a string occurring at the right of this box - int maxl; // the maximum length of this box (possibly InftyLen) -#endif - - int minl; // the minimum length of this box -#ifndef QT_NO_REGEXP_OPTIM - QList occ1; // first-occurrence array -#endif - }; - - friend class Box; - - /* - This is the lexical analyzer for regular expressions. - */ - enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen, Tok_PosLookahead, - Tok_NegLookahead, Tok_RightParen, Tok_CharClass, Tok_Caret, Tok_Quantifier, Tok_Bar, - Tok_Word, Tok_NonWord, Tok_Char = 0x10000, Tok_BackRef = 0x20000 }; - int getChar(); - int getEscape(); -#ifndef QT_NO_REGEXP_INTERVAL - int getRep(int def); -#endif -#ifndef QT_NO_REGEXP_LOOKAHEAD - void skipChars(int n); -#endif - void error(const char *msg); - void startTokenizer(const QChar *rx, int len); - int getToken(); - - const QChar *yyIn; // a pointer to the input regular expression pattern - int yyPos0; // the position of yyTok in the input pattern - int yyPos; // the position of the next character to read - int yyLen; // the length of yyIn - int yyCh; // the last character read - QScopedPointer yyCharClass; // attribute for Tok_CharClass tokens - int yyMinRep; // attribute for Tok_Quantifier - int yyMaxRep; // ditto - QString yyError; // syntax error or overflow during parsing? - - /* - This is the syntactic analyzer for regular expressions. - */ - int parse(const QChar *rx, int len); - void parseAtom(Box *box); - void parseFactor(Box *box); - void parseTerm(Box *box); - void parseExpression(Box *box); - - int yyTok; // the last token read - bool yyMayCapture; // set this to false to disable capturing - - friend struct QRegExpMatchState; -}; - -#ifndef QT_NO_REGEXP_LOOKAHEAD -/* - The struct QRegExpLookahead represents a lookahead a la Perl (e.g., - (?=foo) and (?!bar)). -*/ -struct QRegExpLookahead -{ - QRegExpEngine *eng; // NFA representing the embedded regular expression - bool neg; // negative lookahead? - - inline QRegExpLookahead(QRegExpEngine *eng0, bool neg0) - : eng(eng0), neg(neg0) { } - inline ~QRegExpLookahead() { delete eng; } -}; -#endif - -/*! - \internal - convert the pattern string to the RegExp syntax. - - This is also used by QScriptEngine::newRegExp to convert to a pattern that JavaScriptCore can understan - */ -Q_CORE_EXPORT QString qt_regexp_toCanonical(const QString &pattern, QRegExp::PatternSyntax patternSyntax) -{ - switch (patternSyntax) { -#ifndef QT_NO_REGEXP_WILDCARD - case QRegExp::Wildcard: - return wc2rx(pattern, false); - case QRegExp::WildcardUnix: - return wc2rx(pattern, true); -#endif - case QRegExp::FixedString: - return QRegExp::escape(pattern); - case QRegExp::W3CXmlSchema11: - default: - return pattern; - } -} - -QRegExpEngine::QRegExpEngine(const QRegExpEngineKey &key) - : cs(key.cs), greedyQuantifiers(key.patternSyntax == QRegExp::RegExp2), - xmlSchemaExtensions(key.patternSyntax == QRegExp::W3CXmlSchema11) -{ - setup(); - - QString rx = qt_regexp_toCanonical(key.pattern, key.patternSyntax); - - valid = (parse(rx.unicode(), rx.length()) == rx.length()); - if (!valid) { -#ifndef QT_NO_REGEXP_OPTIM - trivial = false; -#endif - error(RXERR_LEFTDELIM); - } -} - -QRegExpEngine::~QRegExpEngine() -{ -#ifndef QT_NO_REGEXP_LOOKAHEAD - qDeleteAll(ahead); -#endif -} - -void QRegExpMatchState::prepareForMatch(QRegExpEngine *eng) -{ - /* - We use one QList for all the big data used a lot in - matchHere() and friends. - */ - int ns = eng->s.size(); // number of states - int ncap = eng->ncap; -#ifndef QT_NO_REGEXP_OPTIM - int newSlideTabSize = qMax(eng->minl + 1, 16); -#else - int newSlideTabSize = 0; -#endif - int numCaptures = eng->captureCount(); - int newCapturedSize = 2 + 2 * numCaptures; - bigArray = q_check_ptr((int *)realloc(bigArray, ((3 + 4 * ncap) * ns + 4 * ncap + newSlideTabSize + newCapturedSize)*sizeof(int))); - - // set all internal variables only _after_ bigArray is realloc'ed - // to prevent a broken regexp in oom case - - slideTabSize = newSlideTabSize; - capturedSize = newCapturedSize; - inNextStack = bigArray; - memset(inNextStack, -1, ns * sizeof(int)); - curStack = inNextStack + ns; - nextStack = inNextStack + 2 * ns; - - curCapBegin = inNextStack + 3 * ns; - nextCapBegin = curCapBegin + ncap * ns; - curCapEnd = curCapBegin + 2 * ncap * ns; - nextCapEnd = curCapBegin + 3 * ncap * ns; - - tempCapBegin = curCapBegin + 4 * ncap * ns; - tempCapEnd = tempCapBegin + ncap; - capBegin = tempCapBegin + 2 * ncap; - capEnd = tempCapBegin + 3 * ncap; - - slideTab = tempCapBegin + 4 * ncap; - captured = slideTab + slideTabSize; - memset(captured, -1, capturedSize*sizeof(int)); - this->eng = eng; -} - -/* - Tries to match in str and returns an array of (begin, length) pairs - for captured text. If there is no match, all pairs are (-1, -1). -*/ -void QRegExpMatchState::match(const QChar *str0, int len0, int pos0, - bool minimal0, bool oneTest, int caretIndex) -{ - bool matched = false; - QChar char_null; - -#ifndef QT_NO_REGEXP_OPTIM - if (eng->trivial && !oneTest) { - // ### Qt6: qsizetype - pos = int(QtPrivate::findString(QStringView(str0, len0), pos0, QStringView(eng->goodStr.unicode(), eng->goodStr.length()), eng->cs)); - matchLen = eng->goodStr.length(); - matched = (pos != -1); - } else -#endif - { - in = str0; - if (in == nullptr) - in = &char_null; - pos = pos0; - caretPos = caretIndex; - len = len0; - minimal = minimal0; - matchLen = 0; - oneTestMatchedLen = 0; - - if (eng->valid && pos >= 0 && pos <= len) { -#ifndef QT_NO_REGEXP_OPTIM - if (oneTest) { - matched = matchHere(); - } else { - if (pos <= len - eng->minl) { - if (eng->caretAnchored) { - matched = matchHere(); - } else if (eng->useGoodStringHeuristic) { - matched = eng->goodStringMatch(*this); - } else { - matched = eng->badCharMatch(*this); - } - } - } -#else - matched = oneTest ? matchHere() : eng->bruteMatch(*this); -#endif - } - } - - if (matched) { - int *c = captured; - *c++ = pos; - *c++ = matchLen; - - int numCaptures = (capturedSize - 2) >> 1; -#ifndef QT_NO_REGEXP_CAPTURE - for (int i = 0; i < numCaptures; ++i) { - int j = eng->captureForOfficialCapture.at(i); - if (capBegin[j] != EmptyCapture) { - int len = capEnd[j] - capBegin[j]; - *c++ = (len > 0) ? pos + capBegin[j] : 0; - *c++ = len; - } else { - *c++ = -1; - *c++ = -1; - } - } -#endif - } else { - // we rely on 2's complement here - memset(captured, -1, capturedSize * sizeof(int)); - } -} - -/* - The three following functions add one state to the automaton and - return the number of the state. -*/ - -int QRegExpEngine::createState(QChar ch) -{ - return setupState(ch.unicode()); -} - -int QRegExpEngine::createState(const QRegExpCharClass &cc) -{ -#ifndef QT_NO_REGEXP_CCLASS - int n = cl.size(); - cl += QRegExpCharClass(cc); - return setupState(CharClassBit | n); -#else - Q_UNUSED(cc); - return setupState(CharClassBit); -#endif -} - -#ifndef QT_NO_REGEXP_BACKREF -int QRegExpEngine::createState(int bref) -{ - if (bref > nbrefs) { - nbrefs = bref; - if (nbrefs > MaxBackRefs) { - error(RXERR_LIMIT); - return 0; - } - } - return setupState(BackRefBit | bref); -} -#endif - -/* - The two following functions add a transition between all pairs of - states (i, j) where i is found in from, and j is found in to. - - Cat-transitions are distinguished from plus-transitions for - capturing. -*/ - -void QRegExpEngine::addCatTransitions(const QList &from, const QList &to) -{ - for (int i = 0; i < from.size(); i++) - mergeInto(&s[from.at(i)].outs, to); -} - -#ifndef QT_NO_REGEXP_CAPTURE -void QRegExpEngine::addPlusTransitions(const QList &from, const QList &to, int atom) -{ - for (int i = 0; i < from.size(); i++) { - QRegExpAutomatonState &st = s[from.at(i)]; - const QList oldOuts = st.outs; - mergeInto(&st.outs, to); - if (f.at(atom).capture != QRegExpAtom::NoCapture) { - for (int j = 0; j < to.size(); j++) { - // ### st.reenter.contains(to.at(j)) check looks suspicious - if (!st.reenter.contains(to.at(j)) && - !std::binary_search(oldOuts.constBegin(), oldOuts.constEnd(), to.at(j))) - st.reenter.insert(to.at(j), atom); - } - } - } -} -#endif - -#ifndef QT_NO_REGEXP_ANCHOR_ALT -/* - Returns an anchor that means a OR b. -*/ -int QRegExpEngine::anchorAlternation(int a, int b) -{ - if (((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0) - return a & b; - - int n = aa.size(); -#ifndef QT_NO_REGEXP_OPTIM - if (n > 0 && aa.at(n - 1).a == a && aa.at(n - 1).b == b) - return Anchor_Alternation | (n - 1); -#endif - - QRegExpAnchorAlternation element = {a, b}; - aa.append(element); - return Anchor_Alternation | n; -} - -/* - Returns an anchor that means a AND b. -*/ -int QRegExpEngine::anchorConcatenation(int a, int b) -{ - if (((a | b) & Anchor_Alternation) == 0) - return a | b; - if ((b & Anchor_Alternation) != 0) - qSwap(a, b); - - int aprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).a, b); - int bprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).b, b); - return anchorAlternation(aprime, bprime); -} -#endif - -/* - Adds anchor a on a transition caracterised by its from state and - its to state. -*/ -void QRegExpEngine::addAnchors(int from, int to, int a) -{ - QRegExpAutomatonState &st = s[from]; - if (st.anchors.contains(to)) - a = anchorAlternation(st.anchors.value(to), a); - st.anchors.insert(to, a); -} - -#ifndef QT_NO_REGEXP_OPTIM -/* - This function chooses between the good-string and the bad-character - heuristics. It computes two scores and chooses the heuristic with - the highest score. - - Here are some common-sense constraints on the scores that should be - respected if the formulas are ever modified: (1) If goodStr is - empty, the good-string heuristic scores 0. (2) If the regular - expression is trivial, the good-string heuristic should be used. - (3) If the search is case insensitive, the good-string heuristic - should be used, unless it scores 0. (Case insensitivity turns all - entries of occ1 to 0.) (4) If (goodLateStart - goodEarlyStart) is - big, the good-string heuristic should score less. -*/ -void QRegExpEngine::heuristicallyChooseHeuristic() -{ - if (minl == 0) { - useGoodStringHeuristic = false; - } else if (trivial) { - useGoodStringHeuristic = true; - } else { - /* - Magic formula: The good string has to constitute a good - proportion of the minimum-length string, and appear at a - more-or-less known index. - */ - int goodStringScore = (64 * goodStr.length() / minl) - - (goodLateStart - goodEarlyStart); - /* - Less magic formula: We pick some characters at random, and - check whether they are good or bad. - */ - int badCharScore = 0; - int step = qMax(1, NumBadChars / 32); - for (int i = 1; i < NumBadChars; i += step) { - if (occ1.at(i) == NoOccurrence) - badCharScore += minl; - else - badCharScore += occ1.at(i); - } - badCharScore /= minl; - useGoodStringHeuristic = (goodStringScore > badCharScore); - } -} -#endif - -#if defined(QT_DEBUG) -void QRegExpEngine::dump() const -{ - int i, j; - qDebug("Case %ssensitive engine", cs ? "" : "in"); - qDebug(" States"); - for (i = 0; i < s.size(); i++) { - qDebug(" %d%s", i, i == InitialState ? " (initial)" : i == FinalState ? " (final)" : ""); -#ifndef QT_NO_REGEXP_CAPTURE - if (nf > 0) - qDebug(" in atom %d", s[i].atom); -#endif - int m = s[i].match; - if ((m & CharClassBit) != 0) { - qDebug(" match character class %d", m ^ CharClassBit); -#ifndef QT_NO_REGEXP_CCLASS - cl[m ^ CharClassBit].dump(); -#else - qDebug(" negative character class"); -#endif - } else if ((m & BackRefBit) != 0) { - qDebug(" match back-reference %d", m ^ BackRefBit); - } else if (m >= 0x20 && m <= 0x7e) { - qDebug(" match 0x%.4x (%c)", m, m); - } else { - qDebug(" match 0x%.4x", m); - } - for (j = 0; j < s[i].outs.size(); j++) { - int next = s[i].outs[j]; - qDebug(" -> %d", next); - if (s[i].reenter.contains(next)) - qDebug(" [reenter %d]", s[i].reenter[next]); - if (s[i].anchors.value(next) != 0) - qDebug(" [anchors 0x%.8x]", s[i].anchors[next]); - } - } -#ifndef QT_NO_REGEXP_CAPTURE - if (nf > 0) { - qDebug(" Atom Parent Capture"); - for (i = 0; i < nf; i++) { - if (f[i].capture == QRegExpAtom::NoCapture) { - qDebug(" %6d %6d nil", i, f[i].parent); - } else { - int cap = f[i].capture; - bool official = captureForOfficialCapture.contains(cap); - qDebug(" %6d %6d %6d %s", i, f[i].parent, f[i].capture, - official ? "official" : ""); - } - } - } -#endif -#ifndef QT_NO_REGEXP_ANCHOR_ALT - for (i = 0; i < aa.size(); i++) - qDebug(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b); -#endif -} -#endif - -void QRegExpEngine::setup() -{ - ref.storeRelaxed(1); -#ifndef QT_NO_REGEXP_CAPTURE - f.resize(32); - nf = 0; - cf = -1; -#endif - officialncap = 0; - ncap = 0; -#ifndef QT_NO_REGEXP_OPTIM - caretAnchored = true; - trivial = true; -#endif - valid = false; -#ifndef QT_NO_REGEXP_BACKREF - nbrefs = 0; -#endif -#ifndef QT_NO_REGEXP_OPTIM - useGoodStringHeuristic = true; - minl = 0; - occ1.fill(0, NumBadChars); -#endif -} - -int QRegExpEngine::setupState(int match) -{ -#ifndef QT_NO_REGEXP_CAPTURE - s += QRegExpAutomatonState(cf, match); -#else - s += QRegExpAutomatonState(match); -#endif - return s.size() - 1; -} - -#ifndef QT_NO_REGEXP_CAPTURE -/* - Functions startAtom() and finishAtom() should be called to delimit - atoms. When a state is created, it is assigned to the current atom. - The information is later used for capturing. -*/ -int QRegExpEngine::startAtom(bool officialCapture) -{ - if ((nf & (nf + 1)) == 0 && nf + 1 >= f.size()) - f.resize((nf + 1) << 1); - f[nf].parent = cf; - cf = nf++; - f[cf].capture = officialCapture ? QRegExpAtom::OfficialCapture : QRegExpAtom::NoCapture; - return cf; -} - -void QRegExpEngine::finishAtom(int atom, bool needCapture) -{ - if (greedyQuantifiers && needCapture && f[atom].capture == QRegExpAtom::NoCapture) - f[atom].capture = QRegExpAtom::UnofficialCapture; - cf = f.at(atom).parent; -} -#endif - -#ifndef QT_NO_REGEXP_LOOKAHEAD -/* - Creates a lookahead anchor. -*/ -int QRegExpEngine::addLookahead(QRegExpEngine *eng, bool negative) -{ - int n = ahead.size(); - if (n == MaxLookaheads) { - error(RXERR_LIMIT); - return 0; - } - ahead += new QRegExpLookahead(eng, negative); - return Anchor_FirstLookahead << n; -} -#endif - -#ifndef QT_NO_REGEXP_CAPTURE -/* - We want the longest leftmost captures. -*/ -static bool isBetterCapture(int ncap, const int *begin1, const int *end1, const int *begin2, - const int *end2) -{ - for (int i = 0; i < ncap; i++) { - int delta = begin2[i] - begin1[i]; // it has to start early... - if (delta == 0) - delta = end1[i] - end2[i]; // ...and end late - - if (delta != 0) - return delta > 0; - } - return false; -} -#endif - -/* - Returns \c true if anchor a matches at position pos + i in the input - string, otherwise false. -*/ -bool QRegExpMatchState::testAnchor(int i, int a, const int *capBegin) -{ - int j; - -#ifndef QT_NO_REGEXP_ANCHOR_ALT - if ((a & QRegExpEngine::Anchor_Alternation) != 0) - return testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).a, capBegin) - || testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).b, capBegin); -#endif - - if ((a & QRegExpEngine::Anchor_Caret) != 0) { - if (pos + i != caretPos) - return false; - } - if ((a & QRegExpEngine::Anchor_Dollar) != 0) { - if (pos + i != len) - return false; - } -#ifndef QT_NO_REGEXP_ESCAPE - if ((a & (QRegExpEngine::Anchor_Word | QRegExpEngine::Anchor_NonWord)) != 0) { - bool before = false; - bool after = false; - if (pos + i != 0) - before = isWord(in[pos + i - 1]); - if (pos + i != len) - after = isWord(in[pos + i]); - if ((a & QRegExpEngine::Anchor_Word) != 0 && (before == after)) - return false; - if ((a & QRegExpEngine::Anchor_NonWord) != 0 && (before != after)) - return false; - } -#endif -#ifndef QT_NO_REGEXP_LOOKAHEAD - if ((a & QRegExpEngine::Anchor_LookaheadMask) != 0) { - const QList &ahead = eng->ahead; - for (j = 0; j < ahead.size(); j++) { - if ((a & (QRegExpEngine::Anchor_FirstLookahead << j)) != 0) { - QRegExpMatchState matchState; - matchState.prepareForMatch(ahead[j]->eng); - matchState.match(in + pos + i, len - pos - i, 0, - true, true, caretPos - pos - i); - if ((matchState.captured[0] == 0) == ahead[j]->neg) - return false; - } - } - } -#endif -#ifndef QT_NO_REGEXP_CAPTURE -#ifndef QT_NO_REGEXP_BACKREF - for (j = 0; j < eng->nbrefs; j++) { - if ((a & (QRegExpEngine::Anchor_BackRef1Empty << j)) != 0) { - int i = eng->captureForOfficialCapture.at(j); - if (capBegin[i] != EmptyCapture) - return false; - } - } -#endif -#endif - return true; -} - -#ifndef QT_NO_REGEXP_OPTIM -/* - The three following functions are what Jeffrey Friedl would call - transmissions (or bump-alongs). Using one or the other should make - no difference except in performance. -*/ - -bool QRegExpEngine::goodStringMatch(QRegExpMatchState &matchState) const -{ - int k = matchState.pos + goodEarlyStart; - QStringMatcher matcher(goodStr.unicode(), goodStr.length(), cs); - while ((k = matcher.indexIn(matchState.in, matchState.len, k)) != -1) { - int from = k - goodLateStart; - int to = k - goodEarlyStart; - if (from > matchState.pos) - matchState.pos = from; - - while (matchState.pos <= to) { - if (matchState.matchHere()) - return true; - ++matchState.pos; - } - ++k; - } - return false; -} - -bool QRegExpEngine::badCharMatch(QRegExpMatchState &matchState) const -{ - int slideHead = 0; - int slideNext = 0; - int i; - int lastPos = matchState.len - minl; - memset(matchState.slideTab, 0, matchState.slideTabSize * sizeof(int)); - - /* - Set up the slide table, used for the bad-character heuristic, - using the table of first occurrence of each character. - */ - for (i = 0; i < minl; i++) { - int sk = occ1[BadChar(matchState.in[matchState.pos + i])]; - if (sk == NoOccurrence) - sk = i + 1; - if (sk > 0) { - int k = i + 1 - sk; - if (k < 0) { - sk = i + 1; - k = 0; - } - if (sk > matchState.slideTab[k]) - matchState.slideTab[k] = sk; - } - } - - if (matchState.pos > lastPos) - return false; - - for (;;) { - if (++slideNext >= matchState.slideTabSize) - slideNext = 0; - if (matchState.slideTab[slideHead] > 0) { - if (matchState.slideTab[slideHead] - 1 > matchState.slideTab[slideNext]) - matchState.slideTab[slideNext] = matchState.slideTab[slideHead] - 1; - matchState.slideTab[slideHead] = 0; - } else { - if (matchState.matchHere()) - return true; - } - - if (matchState.pos == lastPos) - break; - - /* - Update the slide table. This code has much in common with - the initialization code. - */ - int sk = occ1[BadChar(matchState.in[matchState.pos + minl])]; - if (sk == NoOccurrence) { - matchState.slideTab[slideNext] = minl; - } else if (sk > 0) { - int k = slideNext + minl - sk; - if (k >= matchState.slideTabSize) - k -= matchState.slideTabSize; - if (sk > matchState.slideTab[k]) - matchState.slideTab[k] = sk; - } - slideHead = slideNext; - ++matchState.pos; - } - return false; -} -#else -bool QRegExpEngine::bruteMatch(QRegExpMatchState &matchState) const -{ - while (matchState.pos <= matchState.len) { - if (matchState.matchHere()) - return true; - ++matchState.pos; - } - return false; -} -#endif - -/* - Here's the core of the engine. It tries to do a match here and now. -*/ -bool QRegExpMatchState::matchHere() -{ - int ncur = 1, nnext = 0; - int i = 0, j, k, m; - bool stop = false; - - matchLen = -1; - oneTestMatchedLen = -1; - curStack[0] = QRegExpEngine::InitialState; - - int ncap = eng->ncap; -#ifndef QT_NO_REGEXP_CAPTURE - if (ncap > 0) { - for (j = 0; j < ncap; j++) { - curCapBegin[j] = EmptyCapture; - curCapEnd[j] = EmptyCapture; - } - } -#endif - -#ifndef QT_NO_REGEXP_BACKREF - while ((ncur > 0 || !sleeping.isEmpty()) && i <= len - pos && !stop) -#else - while (ncur > 0 && i <= len - pos && !stop) -#endif - { - int ch = (i < len - pos) ? in[pos + i].unicode() : 0; - for (j = 0; j < ncur; j++) { - int cur = curStack[j]; - const QRegExpAutomatonState &scur = eng->s.at(cur); - const QList &outs = scur.outs; - for (k = 0; k < outs.size(); k++) { - int next = outs.at(k); - const QRegExpAutomatonState &snext = eng->s.at(next); - bool inside = true; -#if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE) - int needSomeSleep = 0; -#endif - - /* - First, check if the anchors are anchored properly. - */ - int a = scur.anchors.value(next); - if (a != 0 && !testAnchor(i, a, curCapBegin + j * ncap)) - inside = false; - - /* - If indeed they are, check if the input character is - correct for this transition. - */ - if (inside) { - m = snext.match; - if ((m & (QRegExpEngine::CharClassBit | QRegExpEngine::BackRefBit)) == 0) { - if (eng->cs) - inside = (m == ch); - else - inside = (QChar(m).toLower() == QChar(ch).toLower()); - } else if (next == QRegExpEngine::FinalState) { - matchLen = i; - stop = minimal; - inside = true; - } else if ((m & QRegExpEngine::CharClassBit) != 0) { -#ifndef QT_NO_REGEXP_CCLASS - const QRegExpCharClass &cc = eng->cl.at(m ^ QRegExpEngine::CharClassBit); - if (eng->cs) - inside = cc.in(QChar(ch)); - else if (cc.negative()) - inside = cc.in(QChar(ch).toLower()) && - cc.in(QChar(ch).toUpper()); - else - inside = cc.in(QChar(ch).toLower()) || - cc.in(QChar(ch).toUpper()); -#endif -#if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE) - } else { /* ((m & QRegExpEngine::BackRefBit) != 0) */ - int bref = m ^ QRegExpEngine::BackRefBit; - int ell = j * ncap + eng->captureForOfficialCapture.at(bref - 1); - - inside = bref <= ncap && curCapBegin[ell] != EmptyCapture; - if (inside) { - if (eng->cs) - inside = (in[pos + curCapBegin[ell]] == QChar(ch)); - else - inside = (in[pos + curCapBegin[ell]].toLower() - == QChar(ch).toLower()); - } - - if (inside) { - int delta; - if (curCapEnd[ell] == EmptyCapture) - delta = i - curCapBegin[ell]; - else - delta = curCapEnd[ell] - curCapBegin[ell]; - - inside = (delta <= len - (pos + i)); - if (inside && delta > 1) { - int n = 1; - if (eng->cs) { - while (n < delta) { - if (in[pos + curCapBegin[ell] + n] - != in[pos + i + n]) - break; - ++n; - } - } else { - while (n < delta) { - QChar a = in[pos + curCapBegin[ell] + n]; - QChar b = in[pos + i + n]; - if (a.toLower() != b.toLower()) - break; - ++n; - } - } - inside = (n == delta); - if (inside) - needSomeSleep = delta - 1; - } - } -#endif - } - } - - /* - We must now update our data structures. - */ - if (inside) { -#ifndef QT_NO_REGEXP_CAPTURE - int *capBegin, *capEnd; -#endif - /* - If the next state was not encountered yet, all - is fine. - */ - if ((m = inNextStack[next]) == -1) { - m = nnext++; - nextStack[m] = next; - inNextStack[next] = m; -#ifndef QT_NO_REGEXP_CAPTURE - capBegin = nextCapBegin + m * ncap; - capEnd = nextCapEnd + m * ncap; - - /* - Otherwise, we'll first maintain captures in - temporary arrays, and decide at the end whether - it's best to keep the previous capture zones or - the new ones. - */ - } else { - capBegin = tempCapBegin; - capEnd = tempCapEnd; -#endif - } - -#ifndef QT_NO_REGEXP_CAPTURE - /* - Updating the capture zones is much of a task. - */ - if (ncap > 0) { - memcpy(capBegin, curCapBegin + j * ncap, ncap * sizeof(int)); - memcpy(capEnd, curCapEnd + j * ncap, ncap * sizeof(int)); - int c = scur.atom, n = snext.atom; - int p = -1, q = -1; - int cap; - - /* - Lemma 1. For any x in the range [0..nf), we - have f[x].parent < x. - - Proof. By looking at startAtom(), it is - clear that cf < nf holds all the time, and - thus that f[nf].parent < nf. - */ - - /* - If we are reentering an atom, we empty all - capture zones inside it. - */ - if ((q = scur.reenter.value(next)) != 0) { - QBitArray b(eng->nf, false); - b.setBit(q, true); - for (int ell = q + 1; ell < eng->nf; ell++) { - if (b.testBit(eng->f.at(ell).parent)) { - b.setBit(ell, true); - cap = eng->f.at(ell).capture; - if (cap >= 0) { - capBegin[cap] = EmptyCapture; - capEnd[cap] = EmptyCapture; - } - } - } - p = eng->f.at(q).parent; - - /* - Otherwise, close the capture zones we are - leaving. We are leaving f[c].capture, - f[f[c].parent].capture, - f[f[f[c].parent].parent].capture, ..., - until f[x].capture, with x such that - f[x].parent is the youngest common ancestor - for c and n. - - We go up along c's and n's ancestry until - we find x. - */ - } else { - p = c; - q = n; - while (p != q) { - if (p > q) { - cap = eng->f.at(p).capture; - if (cap >= 0) { - if (capBegin[cap] == i) { - capBegin[cap] = EmptyCapture; - capEnd[cap] = EmptyCapture; - } else { - capEnd[cap] = i; - } - } - p = eng->f.at(p).parent; - } else { - q = eng->f.at(q).parent; - } - } - } - - /* - In any case, we now open the capture zones - we are entering. We work upwards from n - until we reach p (the parent of the atom we - reenter or the youngest common ancestor). - */ - while (n > p) { - cap = eng->f.at(n).capture; - if (cap >= 0) { - capBegin[cap] = i; - capEnd[cap] = EmptyCapture; - } - n = eng->f.at(n).parent; - } - /* - If the next state was already in - nextStack, we must choose carefully which - capture zones we want to keep. - */ - if (capBegin == tempCapBegin && - isBetterCapture(ncap, capBegin, capEnd, nextCapBegin + m * ncap, - nextCapEnd + m * ncap)) { - memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int)); - memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int)); - } - } -#ifndef QT_NO_REGEXP_BACKREF - /* - We are done with updating the capture zones. - It's now time to put the next state to sleep, - if it needs to, and to remove it from - nextStack. - */ - if (needSomeSleep > 0) { - QList zzZ(2 + 2 * ncap); - zzZ[0] = i + needSomeSleep; - zzZ[1] = next; - if (ncap > 0) { - memcpy(zzZ.data() + 2, capBegin, ncap * sizeof(int)); - memcpy(zzZ.data() + 2 + ncap, capEnd, ncap * sizeof(int)); - } - inNextStack[nextStack[--nnext]] = -1; - sleeping.append(zzZ); - } -#endif -#endif - } - } - } -#ifndef QT_NO_REGEXP_CAPTURE - /* - If we reached the final state, hurray! Copy the captured - zone. - */ - if (ncap > 0 && (m = inNextStack[QRegExpEngine::FinalState]) != -1) { - memcpy(capBegin, nextCapBegin + m * ncap, ncap * sizeof(int)); - memcpy(capEnd, nextCapEnd + m * ncap, ncap * sizeof(int)); - } -#ifndef QT_NO_REGEXP_BACKREF - /* - It's time to wake up the sleepers. - */ - j = 0; - while (j < sleeping.count()) { - if (sleeping.at(j)[0] == i) { - const QList &zzZ = sleeping.at(j); - int next = zzZ[1]; - const int *capBegin = zzZ.data() + 2; - const int *capEnd = zzZ.data() + 2 + ncap; - bool copyOver = true; - - if ((m = inNextStack[next]) == -1) { - m = nnext++; - nextStack[m] = next; - inNextStack[next] = m; - } else { - copyOver = isBetterCapture(ncap, nextCapBegin + m * ncap, nextCapEnd + m * ncap, - capBegin, capEnd); - } - if (copyOver) { - memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int)); - memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int)); - } - - sleeping.removeAt(j); - } else { - ++j; - } - } -#endif -#endif - for (j = 0; j < nnext; j++) - inNextStack[nextStack[j]] = -1; - - // avoid needless iteration that confuses oneTestMatchedLen - if (nnext == 1 && nextStack[0] == QRegExpEngine::FinalState -#ifndef QT_NO_REGEXP_BACKREF - && sleeping.isEmpty() -#endif - ) - stop = true; - - qSwap(curStack, nextStack); -#ifndef QT_NO_REGEXP_CAPTURE - qSwap(curCapBegin, nextCapBegin); - qSwap(curCapEnd, nextCapEnd); -#endif - ncur = nnext; - nnext = 0; - ++i; - } - -#ifndef QT_NO_REGEXP_BACKREF - /* - If minimal matching is enabled, we might have some sleepers - left. - */ - if (!sleeping.isEmpty()) - sleeping.clear(); -#endif - - oneTestMatchedLen = i - 1; - return (matchLen >= 0); -} - -#ifndef QT_NO_REGEXP_CCLASS - -QRegExpCharClass::QRegExpCharClass() - : c(0), n(false) -{ -#ifndef QT_NO_REGEXP_OPTIM - occ1.fill(NoOccurrence, NumBadChars); -#endif -} - -void QRegExpCharClass::clear() -{ - c = 0; - r.clear(); - n = false; -} - -void QRegExpCharClass::setNegative(bool negative) -{ - n = negative; -#ifndef QT_NO_REGEXP_OPTIM - occ1.fill(0, NumBadChars); -#endif -} - -void QRegExpCharClass::addCategories(uint cats) -{ - static const int all_cats = FLAG(QChar::Mark_NonSpacing) | - FLAG(QChar::Mark_SpacingCombining) | - FLAG(QChar::Mark_Enclosing) | - FLAG(QChar::Number_DecimalDigit) | - FLAG(QChar::Number_Letter) | - FLAG(QChar::Number_Other) | - FLAG(QChar::Separator_Space) | - FLAG(QChar::Separator_Line) | - FLAG(QChar::Separator_Paragraph) | - FLAG(QChar::Other_Control) | - FLAG(QChar::Other_Format) | - FLAG(QChar::Other_Surrogate) | - FLAG(QChar::Other_PrivateUse) | - FLAG(QChar::Other_NotAssigned) | - FLAG(QChar::Letter_Uppercase) | - FLAG(QChar::Letter_Lowercase) | - FLAG(QChar::Letter_Titlecase) | - FLAG(QChar::Letter_Modifier) | - FLAG(QChar::Letter_Other) | - FLAG(QChar::Punctuation_Connector) | - FLAG(QChar::Punctuation_Dash) | - FLAG(QChar::Punctuation_Open) | - FLAG(QChar::Punctuation_Close) | - FLAG(QChar::Punctuation_InitialQuote) | - FLAG(QChar::Punctuation_FinalQuote) | - FLAG(QChar::Punctuation_Other) | - FLAG(QChar::Symbol_Math) | - FLAG(QChar::Symbol_Currency) | - FLAG(QChar::Symbol_Modifier) | - FLAG(QChar::Symbol_Other); - c |= (all_cats & cats); -#ifndef QT_NO_REGEXP_OPTIM - occ1.fill(0, NumBadChars); -#endif -} - -void QRegExpCharClass::addRange(ushort from, ushort to) -{ - if (from > to) - qSwap(from, to); - int m = r.size(); - r.resize(m + 1); - r[m].from = from; - r[m].len = to - from + 1; - -#ifndef QT_NO_REGEXP_OPTIM - int i; - - if (to - from < NumBadChars) { - if (from % NumBadChars <= to % NumBadChars) { - for (i = from % NumBadChars; i <= to % NumBadChars; i++) - occ1[i] = 0; - } else { - for (i = 0; i <= to % NumBadChars; i++) - occ1[i] = 0; - for (i = from % NumBadChars; i < NumBadChars; i++) - occ1[i] = 0; - } - } else { - occ1.fill(0, NumBadChars); - } -#endif -} - -bool QRegExpCharClass::in(QChar ch) const -{ -#ifndef QT_NO_REGEXP_OPTIM - if (occ1.at(BadChar(ch)) == NoOccurrence) - return n; -#endif - - if (c != 0 && (c & FLAG(ch.category())) != 0) - return !n; - - const int uc = ch.unicode(); - int size = r.size(); - - for (int i = 0; i < size; ++i) { - const QRegExpCharClassRange &range = r.at(i); - if (uint(uc - range.from) < uint(r.at(i).len)) - return !n; - } - return n; -} - -#if defined(QT_DEBUG) -void QRegExpCharClass::dump() const -{ - int i; - qDebug(" %stive character class", n ? "nega" : "posi"); -#ifndef QT_NO_REGEXP_CCLASS - if (c != 0) - qDebug(" categories 0x%.8x", c); -#endif - for (i = 0; i < r.size(); i++) - qDebug(" 0x%.4x through 0x%.4x", r[i].from, r[i].from + r[i].len - 1); -} -#endif -#endif - -QRegExpEngine::Box::Box(QRegExpEngine *engine) - : eng(engine), skipanchors(0) -#ifndef QT_NO_REGEXP_OPTIM - , earlyStart(0), lateStart(0), maxl(0) -#endif -{ -#ifndef QT_NO_REGEXP_OPTIM - occ1.fill(NoOccurrence, NumBadChars); -#endif - minl = 0; -} - -QRegExpEngine::Box &QRegExpEngine::Box::operator=(const Box &b) -{ - eng = b.eng; - ls = b.ls; - rs = b.rs; - lanchors = b.lanchors; - ranchors = b.ranchors; - skipanchors = b.skipanchors; -#ifndef QT_NO_REGEXP_OPTIM - earlyStart = b.earlyStart; - lateStart = b.lateStart; - str = b.str; - leftStr = b.leftStr; - rightStr = b.rightStr; - maxl = b.maxl; - occ1 = b.occ1; -#endif - minl = b.minl; - return *this; -} - -void QRegExpEngine::Box::set(QChar ch) -{ - ls.resize(1); - ls[0] = eng->createState(ch); - rs = ls; -#ifndef QT_NO_REGEXP_OPTIM - str = ch; - leftStr = ch; - rightStr = ch; - maxl = 1; - occ1[BadChar(ch)] = 0; -#endif - minl = 1; -} - -void QRegExpEngine::Box::set(const QRegExpCharClass &cc) -{ - ls.resize(1); - ls[0] = eng->createState(cc); - rs = ls; -#ifndef QT_NO_REGEXP_OPTIM - maxl = 1; - occ1 = cc.firstOccurrence(); -#endif - minl = 1; -} - -#ifndef QT_NO_REGEXP_BACKREF -void QRegExpEngine::Box::set(int bref) -{ - ls.resize(1); - ls[0] = eng->createState(bref); - rs = ls; - if (bref >= 1 && bref <= MaxBackRefs) - skipanchors = Anchor_BackRef0Empty << bref; -#ifndef QT_NO_REGEXP_OPTIM - maxl = InftyLen; -#endif - minl = 0; -} -#endif - -void QRegExpEngine::Box::cat(const Box &b) -{ - eng->addCatTransitions(rs, b.ls); - addAnchorsToEngine(b); - if (minl == 0) { - lanchors.insert(b.lanchors); - if (skipanchors != 0) { - for (int i = 0; i < b.ls.size(); i++) { - int a = eng->anchorConcatenation(lanchors.value(b.ls.at(i), 0), skipanchors); - lanchors.insert(b.ls.at(i), a); - } - } - mergeInto(&ls, b.ls); - } - if (b.minl == 0) { - ranchors.insert(b.ranchors); - if (b.skipanchors != 0) { - for (int i = 0; i < rs.size(); i++) { - int a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), b.skipanchors); - ranchors.insert(rs.at(i), a); - } - } - mergeInto(&rs, b.rs); - } else { - ranchors = b.ranchors; - rs = b.rs; - } - -#ifndef QT_NO_REGEXP_OPTIM - if (maxl != InftyLen) { - if (rightStr.length() + b.leftStr.length() > - qMax(str.length(), b.str.length())) { - earlyStart = minl - rightStr.length(); - lateStart = maxl - rightStr.length(); - str = rightStr + b.leftStr; - } else if (b.str.length() > str.length()) { - earlyStart = minl + b.earlyStart; - lateStart = maxl + b.lateStart; - str = b.str; - } - } - - if (leftStr.length() == maxl) - leftStr += b.leftStr; - - if (b.rightStr.length() == b.maxl) { - rightStr += b.rightStr; - } else { - rightStr = b.rightStr; - } - - if (maxl == InftyLen || b.maxl == InftyLen) { - maxl = InftyLen; - } else { - maxl += b.maxl; - } - - for (int i = 0; i < NumBadChars; i++) { - if (b.occ1.at(i) != NoOccurrence && minl + b.occ1.at(i) < occ1.at(i)) - occ1[i] = minl + b.occ1.at(i); - } -#endif - - minl += b.minl; - if (minl == 0) - skipanchors = eng->anchorConcatenation(skipanchors, b.skipanchors); - else - skipanchors = 0; -} - -void QRegExpEngine::Box::orx(const Box &b) -{ - mergeInto(&ls, b.ls); - lanchors.insert(b.lanchors); - mergeInto(&rs, b.rs); - ranchors.insert(b.ranchors); - - if (b.minl == 0) { - if (minl == 0) - skipanchors = eng->anchorAlternation(skipanchors, b.skipanchors); - else - skipanchors = b.skipanchors; - } - -#ifndef QT_NO_REGEXP_OPTIM - for (int i = 0; i < NumBadChars; i++) { - if (occ1.at(i) > b.occ1.at(i)) - occ1[i] = b.occ1.at(i); - } - earlyStart = 0; - lateStart = 0; - str = QString(); - leftStr = QString(); - rightStr = QString(); - if (b.maxl > maxl) - maxl = b.maxl; -#endif - if (b.minl < minl) - minl = b.minl; -} - -void QRegExpEngine::Box::plus(int atom) -{ -#ifndef QT_NO_REGEXP_CAPTURE - eng->addPlusTransitions(rs, ls, atom); -#else - Q_UNUSED(atom); - eng->addCatTransitions(rs, ls); -#endif - addAnchorsToEngine(*this); -#ifndef QT_NO_REGEXP_OPTIM - maxl = InftyLen; -#endif -} - -void QRegExpEngine::Box::opt() -{ -#ifndef QT_NO_REGEXP_OPTIM - earlyStart = 0; - lateStart = 0; - str = QString(); - leftStr = QString(); - rightStr = QString(); -#endif - skipanchors = 0; - minl = 0; -} - -void QRegExpEngine::Box::catAnchor(int a) -{ - if (a != 0) { - for (int i = 0; i < rs.size(); i++) { - a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), a); - ranchors.insert(rs.at(i), a); - } - if (minl == 0) - skipanchors = eng->anchorConcatenation(skipanchors, a); - } -} - -#ifndef QT_NO_REGEXP_OPTIM -void QRegExpEngine::Box::setupHeuristics() -{ - eng->goodEarlyStart = earlyStart; - eng->goodLateStart = lateStart; - eng->goodStr = eng->cs ? str : str.toLower(); - - eng->minl = minl; - if (eng->cs) { - /* - A regular expression such as 112|1 has occ1['2'] = 2 and minl = - 1 at this point. An entry of occ1 has to be at most minl or - infinity for the rest of the algorithm to go well. - - We waited until here before normalizing these cases (instead of - doing it in Box::orx()) because sometimes things improve by - themselves. Consider for example (112|1)34. - */ - for (int i = 0; i < NumBadChars; i++) { - if (occ1.at(i) != NoOccurrence && occ1.at(i) >= minl) - occ1[i] = minl; - } - eng->occ1 = occ1; - } else { - eng->occ1.fill(0, NumBadChars); - } - - eng->heuristicallyChooseHeuristic(); -} -#endif - -#if defined(QT_DEBUG) -void QRegExpEngine::Box::dump() const -{ - int i; - qDebug("Box of at least %d character%s", minl, minl == 1 ? "" : "s"); - qDebug(" Left states:"); - for (i = 0; i < ls.size(); i++) { - if (lanchors.value(ls[i], 0) == 0) - qDebug(" %d", ls[i]); - else - qDebug(" %d [anchors 0x%.8x]", ls[i], lanchors[ls[i]]); - } - qDebug(" Right states:"); - for (i = 0; i < rs.size(); i++) { - if (ranchors.value(rs[i], 0) == 0) - qDebug(" %d", rs[i]); - else - qDebug(" %d [anchors 0x%.8x]", rs[i], ranchors[rs[i]]); - } - qDebug(" Skip anchors: 0x%.8x", skipanchors); -} -#endif - -void QRegExpEngine::Box::addAnchorsToEngine(const Box &to) const -{ - for (int i = 0; i < to.ls.size(); i++) { - for (int j = 0; j < rs.size(); j++) { - int a = eng->anchorConcatenation(ranchors.value(rs.at(j), 0), - to.lanchors.value(to.ls.at(i), 0)); - eng->addAnchors(rs[j], to.ls[i], a); - } - } -} - -#ifndef QT_NO_REGEXP_CCLASS -// fast lookup hash for xml schema extensions -// sorted by name for b-search -static const struct CategoriesRangeMapEntry { - const char name[40]; - uint first, second; -} categoriesRangeMap[] = { - { "AegeanNumbers", 0x10100, 0x1013F }, - { "AlphabeticPresentationForms", 0xFB00, 0xFB4F }, - { "AncientGreekMusicalNotation", 0x1D200, 0x1D24F }, - { "AncientGreekNumbers", 0x10140, 0x1018F }, - { "Arabic", 0x0600, 0x06FF }, - { "ArabicPresentationForms-A", 0xFB50, 0xFDFF }, - { "ArabicPresentationForms-B", 0xFE70, 0xFEFF }, - { "ArabicSupplement", 0x0750, 0x077F }, - { "Armenian", 0x0530, 0x058F }, - { "Arrows", 0x2190, 0x21FF }, - { "BasicLatin", 0x0000, 0x007F }, - { "Bengali", 0x0980, 0x09FF }, - { "BlockElements", 0x2580, 0x259F }, - { "Bopomofo", 0x3100, 0x312F }, - { "BopomofoExtended", 0x31A0, 0x31BF }, - { "BoxDrawing", 0x2500, 0x257F }, - { "BraillePatterns", 0x2800, 0x28FF }, - { "Buginese", 0x1A00, 0x1A1F }, - { "Buhid", 0x1740, 0x175F }, - { "ByzantineMusicalSymbols", 0x1D000, 0x1D0FF }, - { "CJKCompatibility", 0x3300, 0x33FF }, - { "CJKCompatibilityForms", 0xFE30, 0xFE4F }, - { "CJKCompatibilityIdeographs", 0xF900, 0xFAFF }, - { "CJKCompatibilityIdeographsSupplement", 0x2F800, 0x2FA1F }, - { "CJKRadicalsSupplement", 0x2E80, 0x2EFF }, - { "CJKStrokes", 0x31C0, 0x31EF }, - { "CJKSymbolsandPunctuation", 0x3000, 0x303F }, - { "CJKUnifiedIdeographs", 0x4E00, 0x9FFF }, - { "CJKUnifiedIdeographsExtensionA", 0x3400, 0x4DB5 }, - { "CJKUnifiedIdeographsExtensionB", 0x20000, 0x2A6DF }, - { "Cherokee", 0x13A0, 0x13FF }, - { "CombiningDiacriticalMarks", 0x0300, 0x036F }, - { "CombiningDiacriticalMarksSupplement", 0x1DC0, 0x1DFF }, - { "CombiningHalfMarks", 0xFE20, 0xFE2F }, - { "CombiningMarksforSymbols", 0x20D0, 0x20FF }, - { "ControlPictures", 0x2400, 0x243F }, - { "Coptic", 0x2C80, 0x2CFF }, - { "CurrencySymbols", 0x20A0, 0x20CF }, - { "CypriotSyllabary", 0x10800, 0x1083F }, - { "Cyrillic", 0x0400, 0x04FF }, - { "CyrillicSupplement", 0x0500, 0x052F }, - { "Deseret", 0x10400, 0x1044F }, - { "Devanagari", 0x0900, 0x097F }, - { "Dingbats", 0x2700, 0x27BF }, - { "EnclosedAlphanumerics", 0x2460, 0x24FF }, - { "EnclosedCJKLettersandMonths", 0x3200, 0x32FF }, - { "Ethiopic", 0x1200, 0x137F }, - { "EthiopicExtended", 0x2D80, 0x2DDF }, - { "EthiopicSupplement", 0x1380, 0x139F }, - { "GeneralPunctuation", 0x2000, 0x206F }, - { "GeometricShapes", 0x25A0, 0x25FF }, - { "Georgian", 0x10A0, 0x10FF }, - { "GeorgianSupplement", 0x2D00, 0x2D2F }, - { "Glagolitic", 0x2C00, 0x2C5F }, - { "Gothic", 0x10330, 0x1034F }, - { "Greek", 0x0370, 0x03FF }, - { "GreekExtended", 0x1F00, 0x1FFF }, - { "Gujarati", 0x0A80, 0x0AFF }, - { "Gurmukhi", 0x0A00, 0x0A7F }, - { "HalfwidthandFullwidthForms", 0xFF00, 0xFFEF }, - { "HangulCompatibilityJamo", 0x3130, 0x318F }, - { "HangulJamo", 0x1100, 0x11FF }, - { "HangulSyllables", 0xAC00, 0xD7A3 }, - { "Hanunoo", 0x1720, 0x173F }, - { "Hebrew", 0x0590, 0x05FF }, - { "Hiragana", 0x3040, 0x309F }, - { "IPAExtensions", 0x0250, 0x02AF }, - { "IdeographicDescriptionCharacters", 0x2FF0, 0x2FFF }, - { "Kanbun", 0x3190, 0x319F }, - { "KangxiRadicals", 0x2F00, 0x2FDF }, - { "Kannada", 0x0C80, 0x0CFF }, - { "Katakana", 0x30A0, 0x30FF }, - { "KatakanaPhoneticExtensions", 0x31F0, 0x31FF }, - { "Kharoshthi", 0x10A00, 0x10A5F }, - { "Khmer", 0x1780, 0x17FF }, - { "KhmerSymbols", 0x19E0, 0x19FF }, - { "Lao", 0x0E80, 0x0EFF }, - { "Latin-1Supplement", 0x0080, 0x00FF }, - { "LatinExtended-A", 0x0100, 0x017F }, - { "LatinExtended-B", 0x0180, 0x024F }, - { "LatinExtendedAdditional", 0x1E00, 0x1EFF }, - { "LetterlikeSymbols", 0x2100, 0x214F }, - { "Limbu", 0x1900, 0x194F }, - { "LinearBIdeograms", 0x10080, 0x100FF }, - { "LinearBSyllabary", 0x10000, 0x1007F }, - { "Malayalam", 0x0D00, 0x0D7F }, - { "MathematicalAlphanumericSymbols", 0x1D400, 0x1D7FF }, - { "MathematicalOperators", 0x2200, 0x22FF }, - { "MiscellaneousMathematicalSymbols-A", 0x27C0, 0x27EF }, - { "MiscellaneousMathematicalSymbols-B", 0x2980, 0x29FF }, - { "MiscellaneousSymbols", 0x2600, 0x26FF }, - { "MiscellaneousSymbolsandArrows", 0x2B00, 0x2BFF }, - { "MiscellaneousTechnical", 0x2300, 0x23FF }, - { "ModifierToneLetters", 0xA700, 0xA71F }, - { "Mongolian", 0x1800, 0x18AF }, - { "MusicalSymbols", 0x1D100, 0x1D1FF }, - { "Myanmar", 0x1000, 0x109F }, - { "NewTaiLue", 0x1980, 0x19DF }, - { "NumberForms", 0x2150, 0x218F }, - { "Ogham", 0x1680, 0x169F }, - { "OldItalic", 0x10300, 0x1032F }, - { "OldPersian", 0x103A0, 0x103DF }, - { "OpticalCharacterRecognition", 0x2440, 0x245F }, - { "Oriya", 0x0B00, 0x0B7F }, - { "Osmanya", 0x10480, 0x104AF }, - { "PhoneticExtensions", 0x1D00, 0x1D7F }, - { "PhoneticExtensionsSupplement", 0x1D80, 0x1DBF }, - { "PrivateUse", 0xE000, 0xF8FF }, - { "Runic", 0x16A0, 0x16FF }, - { "Shavian", 0x10450, 0x1047F }, - { "Sinhala", 0x0D80, 0x0DFF }, - { "SmallFormVariants", 0xFE50, 0xFE6F }, - { "SpacingModifierLetters", 0x02B0, 0x02FF }, - { "Specials", 0xFFF0, 0xFFFF }, - { "SuperscriptsandSubscripts", 0x2070, 0x209F }, - { "SupplementalArrows-A", 0x27F0, 0x27FF }, - { "SupplementalArrows-B", 0x2900, 0x297F }, - { "SupplementalMathematicalOperators", 0x2A00, 0x2AFF }, - { "SupplementalPunctuation", 0x2E00, 0x2E7F }, - { "SupplementaryPrivateUseArea-A", 0xF0000, 0xFFFFF }, - { "SupplementaryPrivateUseArea-B", 0x100000, 0x10FFFF }, - { "SylotiNagri", 0xA800, 0xA82F }, - { "Syriac", 0x0700, 0x074F }, - { "Tagalog", 0x1700, 0x171F }, - { "Tagbanwa", 0x1760, 0x177F }, - { "Tags", 0xE0000, 0xE007F }, - { "TaiLe", 0x1950, 0x197F }, - { "TaiXuanJingSymbols", 0x1D300, 0x1D35F }, - { "Tamil", 0x0B80, 0x0BFF }, - { "Telugu", 0x0C00, 0x0C7F }, - { "Thaana", 0x0780, 0x07BF }, - { "Thai", 0x0E00, 0x0E7F }, - { "Tibetan", 0x0F00, 0x0FFF }, - { "Tifinagh", 0x2D30, 0x2D7F }, - { "Ugaritic", 0x10380, 0x1039F }, - { "UnifiedCanadianAboriginalSyllabics", 0x1400, 0x167F }, - { "VariationSelectors", 0xFE00, 0xFE0F }, - { "VariationSelectorsSupplement", 0xE0100, 0xE01EF }, - { "VerticalForms", 0xFE10, 0xFE1F }, - { "YiRadicals", 0xA490, 0xA4CF }, - { "YiSyllables", 0xA000, 0xA48F }, - { "YijingHexagramSymbols", 0x4DC0, 0x4DFF } -}; - -inline bool operator<(const CategoriesRangeMapEntry &entry1, const CategoriesRangeMapEntry &entry2) -{ return qstrcmp(entry1.name, entry2.name) < 0; } -inline bool operator<(const char *name, const CategoriesRangeMapEntry &entry) -{ return qstrcmp(name, entry.name) < 0; } -inline bool operator<(const CategoriesRangeMapEntry &entry, const char *name) -{ return qstrcmp(entry.name, name) < 0; } -#endif // QT_NO_REGEXP_CCLASS - -int QRegExpEngine::getChar() -{ - return (yyPos == yyLen) ? EOS : yyIn[yyPos++].unicode(); -} - -int QRegExpEngine::getEscape() -{ -#ifndef QT_NO_REGEXP_ESCAPE - const char tab[] = "afnrtv"; // no b, as \b means word boundary - const char backTab[] = "\a\f\n\r\t\v"; - ushort low; - int i; -#endif - ushort val; - int prevCh = yyCh; - - if (prevCh == EOS) { - error(RXERR_END); - return Tok_Char | '\\'; - } - yyCh = getChar(); -#ifndef QT_NO_REGEXP_ESCAPE - if ((prevCh & ~0xff) == 0) { - const char *p = strchr(tab, prevCh); - if (p != nullptr) - return Tok_Char | backTab[p - tab]; - } -#endif - - switch (prevCh) { -#ifndef QT_NO_REGEXP_ESCAPE - case '0': - val = 0; - for (i = 0; i < 3; i++) { - if (yyCh >= '0' && yyCh <= '7') - val = (val << 3) | (yyCh - '0'); - else - break; - yyCh = getChar(); - } - if ((val & ~0377) != 0) - error(RXERR_OCTAL); - return Tok_Char | val; -#endif -#ifndef QT_NO_REGEXP_ESCAPE - case 'B': - return Tok_NonWord; -#endif -#ifndef QT_NO_REGEXP_CCLASS - case 'D': - // see QChar::isDigit() - yyCharClass->addCategories(uint(-1) ^ FLAG(QChar::Number_DecimalDigit)); - return Tok_CharClass; - case 'S': - // see QChar::isSpace() - yyCharClass->addCategories(uint(-1) ^ (FLAG(QChar::Separator_Space) | - FLAG(QChar::Separator_Line) | - FLAG(QChar::Separator_Paragraph) | - FLAG(QChar::Other_Control))); - yyCharClass->addRange(0x0000, 0x0008); - yyCharClass->addRange(0x000e, 0x001f); - yyCharClass->addRange(0x007f, 0x0084); - yyCharClass->addRange(0x0086, 0x009f); - return Tok_CharClass; - case 'W': - // see QChar::isLetterOrNumber() and QChar::isMark() - yyCharClass->addCategories(uint(-1) ^ (FLAG(QChar::Mark_NonSpacing) | - FLAG(QChar::Mark_SpacingCombining) | - FLAG(QChar::Mark_Enclosing) | - FLAG(QChar::Number_DecimalDigit) | - FLAG(QChar::Number_Letter) | - FLAG(QChar::Number_Other) | - FLAG(QChar::Letter_Uppercase) | - FLAG(QChar::Letter_Lowercase) | - FLAG(QChar::Letter_Titlecase) | - FLAG(QChar::Letter_Modifier) | - FLAG(QChar::Letter_Other) | - FLAG(QChar::Punctuation_Connector))); - yyCharClass->addRange(0x203f, 0x2040); - yyCharClass->addSingleton(0x2040); - yyCharClass->addSingleton(0x2054); - yyCharClass->addSingleton(0x30fb); - yyCharClass->addRange(0xfe33, 0xfe34); - yyCharClass->addRange(0xfe4d, 0xfe4f); - yyCharClass->addSingleton(0xff3f); - yyCharClass->addSingleton(0xff65); - return Tok_CharClass; -#endif -#ifndef QT_NO_REGEXP_ESCAPE - case 'b': - return Tok_Word; -#endif -#ifndef QT_NO_REGEXP_CCLASS - case 'd': - // see QChar::isDigit() - yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit)); - return Tok_CharClass; - case 's': - // see QChar::isSpace() - yyCharClass->addCategories(FLAG(QChar::Separator_Space) | - FLAG(QChar::Separator_Line) | - FLAG(QChar::Separator_Paragraph)); - yyCharClass->addRange(0x0009, 0x000d); - yyCharClass->addSingleton(0x0085); - return Tok_CharClass; - case 'w': - // see QChar::isLetterOrNumber() and QChar::isMark() - yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | - FLAG(QChar::Mark_SpacingCombining) | - FLAG(QChar::Mark_Enclosing) | - FLAG(QChar::Number_DecimalDigit) | - FLAG(QChar::Number_Letter) | - FLAG(QChar::Number_Other) | - FLAG(QChar::Letter_Uppercase) | - FLAG(QChar::Letter_Lowercase) | - FLAG(QChar::Letter_Titlecase) | - FLAG(QChar::Letter_Modifier) | - FLAG(QChar::Letter_Other)); - yyCharClass->addSingleton(0x005f); // '_' - return Tok_CharClass; - case 'I': - if (!xmlSchemaExtensions) - break; - yyCharClass->setNegative(!yyCharClass->negative()); - Q_FALLTHROUGH(); - case 'i': - if (xmlSchemaExtensions) { - yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | - FLAG(QChar::Mark_SpacingCombining) | - FLAG(QChar::Mark_Enclosing) | - FLAG(QChar::Number_DecimalDigit) | - FLAG(QChar::Number_Letter) | - FLAG(QChar::Number_Other) | - FLAG(QChar::Letter_Uppercase) | - FLAG(QChar::Letter_Lowercase) | - FLAG(QChar::Letter_Titlecase) | - FLAG(QChar::Letter_Modifier) | - FLAG(QChar::Letter_Other)); - yyCharClass->addSingleton(0x003a); // ':' - yyCharClass->addSingleton(0x005f); // '_' - yyCharClass->addRange(0x0041, 0x005a); // [A-Z] - yyCharClass->addRange(0x0061, 0x007a); // [a-z] - yyCharClass->addRange(0xc0, 0xd6); - yyCharClass->addRange(0xd8, 0xf6); - yyCharClass->addRange(0xf8, 0x2ff); - yyCharClass->addRange(0x370, 0x37d); - yyCharClass->addRange(0x37f, 0x1fff); - yyCharClass->addRange(0x200c, 0x200d); - yyCharClass->addRange(0x2070, 0x218f); - yyCharClass->addRange(0x2c00, 0x2fef); - yyCharClass->addRange(0x3001, 0xd7ff); - yyCharClass->addRange(0xf900, 0xfdcf); - yyCharClass->addRange(0xfdf0, 0xfffd); - yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff); - return Tok_CharClass; - } else { - break; - } - case 'C': - if (!xmlSchemaExtensions) - break; - yyCharClass->setNegative(!yyCharClass->negative()); - Q_FALLTHROUGH(); - case 'c': - if (xmlSchemaExtensions) { - yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | - FLAG(QChar::Mark_SpacingCombining) | - FLAG(QChar::Mark_Enclosing) | - FLAG(QChar::Number_DecimalDigit) | - FLAG(QChar::Number_Letter) | - FLAG(QChar::Number_Other) | - FLAG(QChar::Letter_Uppercase) | - FLAG(QChar::Letter_Lowercase) | - FLAG(QChar::Letter_Titlecase) | - FLAG(QChar::Letter_Modifier) | - FLAG(QChar::Letter_Other)); - yyCharClass->addSingleton(0x002d); // '-' - yyCharClass->addSingleton(0x002e); // '.' - yyCharClass->addSingleton(0x003a); // ':' - yyCharClass->addSingleton(0x005f); // '_' - yyCharClass->addSingleton(0xb7); - yyCharClass->addRange(0x0030, 0x0039); // [0-9] - yyCharClass->addRange(0x0041, 0x005a); // [A-Z] - yyCharClass->addRange(0x0061, 0x007a); // [a-z] - yyCharClass->addRange(0xc0, 0xd6); - yyCharClass->addRange(0xd8, 0xf6); - yyCharClass->addRange(0xf8, 0x2ff); - yyCharClass->addRange(0x370, 0x37d); - yyCharClass->addRange(0x37f, 0x1fff); - yyCharClass->addRange(0x200c, 0x200d); - yyCharClass->addRange(0x2070, 0x218f); - yyCharClass->addRange(0x2c00, 0x2fef); - yyCharClass->addRange(0x3001, 0xd7ff); - yyCharClass->addRange(0xf900, 0xfdcf); - yyCharClass->addRange(0xfdf0, 0xfffd); - yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff); - yyCharClass->addRange(0x0300, 0x036f); - yyCharClass->addRange(0x203f, 0x2040); - return Tok_CharClass; - } else { - break; - } - case 'P': - if (!xmlSchemaExtensions) - break; - yyCharClass->setNegative(!yyCharClass->negative()); - Q_FALLTHROUGH(); - case 'p': - if (xmlSchemaExtensions) { - if (yyCh != '{') { - error(RXERR_CHARCLASS); - return Tok_CharClass; - } - - QByteArray category; - yyCh = getChar(); - while (yyCh != '}') { - if (yyCh == EOS) { - error(RXERR_END); - return Tok_CharClass; - } - category.append(yyCh); - yyCh = getChar(); - } - yyCh = getChar(); // skip closing '}' - - int catlen = category.length(); - if (catlen == 1 || catlen == 2) { - switch (category.at(0)) { - case 'M': - if (catlen == 1) { - yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | - FLAG(QChar::Mark_SpacingCombining) | - FLAG(QChar::Mark_Enclosing)); - } else { - switch (category.at(1)) { - case 'n': yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing)); break; // Mn - case 'c': yyCharClass->addCategories(FLAG(QChar::Mark_SpacingCombining)); break; // Mc - case 'e': yyCharClass->addCategories(FLAG(QChar::Mark_Enclosing)); break; // Me - default: error(RXERR_CATEGORY); break; - } - } - break; - case 'N': - if (catlen == 1) { - yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit) | - FLAG(QChar::Number_Letter) | - FLAG(QChar::Number_Other)); - } else { - switch (category.at(1)) { - case 'd': yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit)); break; // Nd - case 'l': yyCharClass->addCategories(FLAG(QChar::Number_Letter)); break; // Hl - case 'o': yyCharClass->addCategories(FLAG(QChar::Number_Other)); break; // No - default: error(RXERR_CATEGORY); break; - } - } - break; - case 'Z': - if (catlen == 1) { - yyCharClass->addCategories(FLAG(QChar::Separator_Space) | - FLAG(QChar::Separator_Line) | - FLAG(QChar::Separator_Paragraph)); - } else { - switch (category.at(1)) { - case 's': yyCharClass->addCategories(FLAG(QChar::Separator_Space)); break; // Zs - case 'l': yyCharClass->addCategories(FLAG(QChar::Separator_Line)); break; // Zl - case 'p': yyCharClass->addCategories(FLAG(QChar::Separator_Paragraph)); break; // Zp - default: error(RXERR_CATEGORY); break; - } - } - break; - case 'C': - if (catlen == 1) { - yyCharClass->addCategories(FLAG(QChar::Other_Control) | - FLAG(QChar::Other_Format) | - FLAG(QChar::Other_Surrogate) | - FLAG(QChar::Other_PrivateUse) | - FLAG(QChar::Other_NotAssigned)); - } else { - switch (category.at(1)) { - case 'c': yyCharClass->addCategories(FLAG(QChar::Other_Control)); break; // Cc - case 'f': yyCharClass->addCategories(FLAG(QChar::Other_Format)); break; // Cf - case 's': yyCharClass->addCategories(FLAG(QChar::Other_Surrogate)); break; // Cs - case 'o': yyCharClass->addCategories(FLAG(QChar::Other_PrivateUse)); break; // Co - case 'n': yyCharClass->addCategories(FLAG(QChar::Other_NotAssigned)); break; // Cn - default: error(RXERR_CATEGORY); break; - } - } - break; - case 'L': - if (catlen == 1) { - yyCharClass->addCategories(FLAG(QChar::Letter_Uppercase) | - FLAG(QChar::Letter_Lowercase) | - FLAG(QChar::Letter_Titlecase) | - FLAG(QChar::Letter_Modifier) | - FLAG(QChar::Letter_Other)); - } else { - switch (category.at(1)) { - case 'u': yyCharClass->addCategories(FLAG(QChar::Letter_Uppercase)); break; // Lu - case 'l': yyCharClass->addCategories(FLAG(QChar::Letter_Lowercase)); break; // Ll - case 't': yyCharClass->addCategories(FLAG(QChar::Letter_Titlecase)); break; // Lt - case 'm': yyCharClass->addCategories(FLAG(QChar::Letter_Modifier)); break; // Lm - case 'o': yyCharClass->addCategories(FLAG(QChar::Letter_Other)); break; // Lo - default: error(RXERR_CATEGORY); break; - } - } - break; - case 'P': - if (catlen == 1) { - yyCharClass->addCategories(FLAG(QChar::Punctuation_Connector) | - FLAG(QChar::Punctuation_Dash) | - FLAG(QChar::Punctuation_Open) | - FLAG(QChar::Punctuation_Close) | - FLAG(QChar::Punctuation_InitialQuote) | - FLAG(QChar::Punctuation_FinalQuote) | - FLAG(QChar::Punctuation_Other)); - } else { - switch (category.at(1)) { - case 'c': yyCharClass->addCategories(FLAG(QChar::Punctuation_Connector)); break; // Pc - case 'd': yyCharClass->addCategories(FLAG(QChar::Punctuation_Dash)); break; // Pd - case 's': yyCharClass->addCategories(FLAG(QChar::Punctuation_Open)); break; // Ps - case 'e': yyCharClass->addCategories(FLAG(QChar::Punctuation_Close)); break; // Pe - case 'i': yyCharClass->addCategories(FLAG(QChar::Punctuation_InitialQuote)); break; // Pi - case 'f': yyCharClass->addCategories(FLAG(QChar::Punctuation_FinalQuote)); break; // Pf - case 'o': yyCharClass->addCategories(FLAG(QChar::Punctuation_Other)); break; // Po - default: error(RXERR_CATEGORY); break; - } - } - break; - case 'S': - if (catlen == 1) { - yyCharClass->addCategories(FLAG(QChar::Symbol_Math) | - FLAG(QChar::Symbol_Currency) | - FLAG(QChar::Symbol_Modifier) | - FLAG(QChar::Symbol_Other)); - } else { - switch (category.at(1)) { - case 'm': yyCharClass->addCategories(FLAG(QChar::Symbol_Math)); break; // Sm - case 'c': yyCharClass->addCategories(FLAG(QChar::Symbol_Currency)); break; // Sc - case 'k': yyCharClass->addCategories(FLAG(QChar::Symbol_Modifier)); break; // Sk - case 'o': yyCharClass->addCategories(FLAG(QChar::Symbol_Other)); break; // So - default: error(RXERR_CATEGORY); break; - } - } - break; - default: - error(RXERR_CATEGORY); - break; - } - } else if (catlen > 2 && category.at(0) == 'I' && category.at(1) == 's') { - static const int N = sizeof(categoriesRangeMap) / sizeof(categoriesRangeMap[0]); - const char * const categoryFamily = category.constData() + 2; - const CategoriesRangeMapEntry *r = std::lower_bound(categoriesRangeMap, categoriesRangeMap + N, categoryFamily); - if (r != categoriesRangeMap + N && qstrcmp(r->name, categoryFamily) == 0) - yyCharClass->addRange(r->first, r->second); - else - error(RXERR_CATEGORY); - } else { - error(RXERR_CATEGORY); - } - return Tok_CharClass; - } else { - break; - } -#endif -#ifndef QT_NO_REGEXP_ESCAPE - case 'x': - val = 0; - for (i = 0; i < 4; i++) { - low = QChar(yyCh).toLower().unicode(); - if (low >= '0' && low <= '9') - val = (val << 4) | (low - '0'); - else if (low >= 'a' && low <= 'f') - val = (val << 4) | (low - 'a' + 10); - else - break; - yyCh = getChar(); - } - return Tok_Char | val; -#endif - default: - break; - } - if (prevCh >= '1' && prevCh <= '9') { -#ifndef QT_NO_REGEXP_BACKREF - val = prevCh - '0'; - while (yyCh >= '0' && yyCh <= '9') { - val = (val * 10) + (yyCh - '0'); - yyCh = getChar(); - } - return Tok_BackRef | val; -#else - error(RXERR_DISABLED); -#endif - } - return Tok_Char | prevCh; -} - -#ifndef QT_NO_REGEXP_INTERVAL -int QRegExpEngine::getRep(int def) -{ - if (yyCh >= '0' && yyCh <= '9') { - int rep = 0; - do { - rep = 10 * rep + yyCh - '0'; - if (rep >= InftyRep) { - error(RXERR_REPETITION); - rep = def; - } - yyCh = getChar(); - } while (yyCh >= '0' && yyCh <= '9'); - return rep; - } else { - return def; - } -} -#endif - -#ifndef QT_NO_REGEXP_LOOKAHEAD -void QRegExpEngine::skipChars(int n) -{ - if (n > 0) { - yyPos += n - 1; - yyCh = getChar(); - } -} -#endif - -void QRegExpEngine::error(const char *msg) -{ - if (yyError.isEmpty()) - yyError = QLatin1String(msg); -} - -void QRegExpEngine::startTokenizer(const QChar *rx, int len) -{ - yyIn = rx; - yyPos0 = 0; - yyPos = 0; - yyLen = len; - yyCh = getChar(); - yyCharClass.reset(new QRegExpCharClass); - yyMinRep = 0; - yyMaxRep = 0; - yyError = QString(); -} - -int QRegExpEngine::getToken() -{ -#ifndef QT_NO_REGEXP_CCLASS - ushort pendingCh = 0; - bool charPending; - bool rangePending; - int tok; -#endif - int prevCh = yyCh; - - yyPos0 = yyPos - 1; -#ifndef QT_NO_REGEXP_CCLASS - yyCharClass->clear(); -#endif - yyMinRep = 0; - yyMaxRep = 0; - yyCh = getChar(); - - switch (prevCh) { - case EOS: - yyPos0 = yyPos; - return Tok_Eos; - case '$': - return Tok_Dollar; - case '(': - if (yyCh == '?') { - prevCh = getChar(); - yyCh = getChar(); - switch (prevCh) { -#ifndef QT_NO_REGEXP_LOOKAHEAD - case '!': - return Tok_NegLookahead; - case '=': - return Tok_PosLookahead; -#endif - case ':': - return Tok_MagicLeftParen; - case '<': - error(RXERR_LOOKBEHIND); - return Tok_MagicLeftParen; - default: - error(RXERR_LOOKAHEAD); - return Tok_MagicLeftParen; - } - } else { - return Tok_LeftParen; - } - case ')': - return Tok_RightParen; - case '*': - yyMinRep = 0; - yyMaxRep = InftyRep; - return Tok_Quantifier; - case '+': - yyMinRep = 1; - yyMaxRep = InftyRep; - return Tok_Quantifier; - case '.': -#ifndef QT_NO_REGEXP_CCLASS - yyCharClass->setNegative(true); -#endif - return Tok_CharClass; - case '?': - yyMinRep = 0; - yyMaxRep = 1; - return Tok_Quantifier; - case '[': -#ifndef QT_NO_REGEXP_CCLASS - if (yyCh == '^') { - yyCharClass->setNegative(true); - yyCh = getChar(); - } - charPending = false; - rangePending = false; - do { - if (yyCh == '-' && charPending && !rangePending) { - rangePending = true; - yyCh = getChar(); - } else { - if (charPending && !rangePending) { - yyCharClass->addSingleton(pendingCh); - charPending = false; - } - if (yyCh == '\\') { - yyCh = getChar(); - tok = getEscape(); - if (tok == Tok_Word) - tok = '\b'; - } else { - tok = Tok_Char | yyCh; - yyCh = getChar(); - } - if (tok == Tok_CharClass) { - if (rangePending) { - yyCharClass->addSingleton('-'); - yyCharClass->addSingleton(pendingCh); - charPending = false; - rangePending = false; - } - } else if ((tok & Tok_Char) != 0) { - if (rangePending) { - yyCharClass->addRange(pendingCh, tok ^ Tok_Char); - charPending = false; - rangePending = false; - } else { - pendingCh = tok ^ Tok_Char; - charPending = true; - } - } else { - error(RXERR_CHARCLASS); - } - } - } while (yyCh != ']' && yyCh != EOS); - if (rangePending) - yyCharClass->addSingleton('-'); - if (charPending) - yyCharClass->addSingleton(pendingCh); - if (yyCh == EOS) - error(RXERR_END); - else - yyCh = getChar(); - return Tok_CharClass; -#else - error(RXERR_END); - return Tok_Char | '['; -#endif - case '\\': - return getEscape(); - case ']': - error(RXERR_LEFTDELIM); - return Tok_Char | ']'; - case '^': - return Tok_Caret; - case '{': -#ifndef QT_NO_REGEXP_INTERVAL - yyMinRep = getRep(0); - yyMaxRep = yyMinRep; - if (yyCh == ',') { - yyCh = getChar(); - yyMaxRep = getRep(InftyRep); - } - if (yyMaxRep < yyMinRep) - error(RXERR_INTERVAL); - if (yyCh != '}') - error(RXERR_REPETITION); - yyCh = getChar(); - return Tok_Quantifier; -#else - error(RXERR_DISABLED); - return Tok_Char | '{'; -#endif - case '|': - return Tok_Bar; - case '}': - error(RXERR_LEFTDELIM); - return Tok_Char | '}'; - default: - return Tok_Char | prevCh; - } -} - -int QRegExpEngine::parse(const QChar *pattern, int len) -{ - valid = true; - startTokenizer(pattern, len); - yyTok = getToken(); -#ifndef QT_NO_REGEXP_CAPTURE - yyMayCapture = true; -#else - yyMayCapture = false; -#endif - -#ifndef QT_NO_REGEXP_CAPTURE - int atom = startAtom(false); -#endif - QRegExpCharClass anything; - Box box(this); // create InitialState - box.set(anything); - Box rightBox(this); // create FinalState - rightBox.set(anything); - - Box middleBox(this); - parseExpression(&middleBox); -#ifndef QT_NO_REGEXP_CAPTURE - finishAtom(atom, false); -#endif -#ifndef QT_NO_REGEXP_OPTIM - middleBox.setupHeuristics(); -#endif - box.cat(middleBox); - box.cat(rightBox); - yyCharClass.reset(); - -#ifndef QT_NO_REGEXP_CAPTURE - for (int i = 0; i < nf; ++i) { - switch (f[i].capture) { - case QRegExpAtom::NoCapture: - break; - case QRegExpAtom::OfficialCapture: - f[i].capture = ncap; - captureForOfficialCapture.append(ncap); - ++ncap; - ++officialncap; - break; - case QRegExpAtom::UnofficialCapture: - f[i].capture = greedyQuantifiers ? ncap++ : QRegExpAtom::NoCapture; - } - } - -#ifndef QT_NO_REGEXP_BACKREF -#ifndef QT_NO_REGEXP_OPTIM - if (officialncap == 0 && nbrefs == 0) { - ncap = nf = 0; - f.clear(); - } -#endif - // handle the case where there's a \5 with no corresponding capture - // (captureForOfficialCapture.size() != officialncap) - for (int i = 0; i < nbrefs - officialncap; ++i) { - captureForOfficialCapture.append(ncap); - ++ncap; - } -#endif -#endif - - if (!yyError.isEmpty()) - return -1; - -#ifndef QT_NO_REGEXP_OPTIM - const QRegExpAutomatonState &sinit = s.at(InitialState); - caretAnchored = !sinit.anchors.isEmpty(); - if (caretAnchored) { - const QMap &anchors = sinit.anchors; - QMap::const_iterator a; - for (a = anchors.constBegin(); a != anchors.constEnd(); ++a) { - if ( -#ifndef QT_NO_REGEXP_ANCHOR_ALT - (*a & Anchor_Alternation) != 0 || -#endif - (*a & Anchor_Caret) == 0) - { - caretAnchored = false; - break; - } - } - } -#endif - - // cleanup anchors - int numStates = s.count(); - for (int i = 0; i < numStates; ++i) { - QRegExpAutomatonState &state = s[i]; - if (!state.anchors.isEmpty()) { - QMap::iterator a = state.anchors.begin(); - while (a != state.anchors.end()) { - if (a.value() == 0) - a = state.anchors.erase(a); - else - ++a; - } - } - } - - return yyPos0; -} - -void QRegExpEngine::parseAtom(Box *box) -{ -#ifndef QT_NO_REGEXP_LOOKAHEAD - QRegExpEngine *eng = nullptr; - bool neg; - int len; -#endif - - if ((yyTok & Tok_Char) != 0) { - box->set(QChar(yyTok ^ Tok_Char)); - } else { -#ifndef QT_NO_REGEXP_OPTIM - trivial = false; -#endif - switch (yyTok) { - case Tok_Dollar: - box->catAnchor(Anchor_Dollar); - break; - case Tok_Caret: - box->catAnchor(Anchor_Caret); - break; -#ifndef QT_NO_REGEXP_LOOKAHEAD - case Tok_PosLookahead: - case Tok_NegLookahead: - neg = (yyTok == Tok_NegLookahead); - eng = new QRegExpEngine(cs, greedyQuantifiers); - len = eng->parse(yyIn + yyPos - 1, yyLen - yyPos + 1); - if (len >= 0) - skipChars(len); - else - error(RXERR_LOOKAHEAD); - box->catAnchor(addLookahead(eng, neg)); - yyTok = getToken(); - if (yyTok != Tok_RightParen) - error(RXERR_LOOKAHEAD); - break; -#endif -#ifndef QT_NO_REGEXP_ESCAPE - case Tok_Word: - box->catAnchor(Anchor_Word); - break; - case Tok_NonWord: - box->catAnchor(Anchor_NonWord); - break; -#endif - case Tok_LeftParen: - case Tok_MagicLeftParen: - yyTok = getToken(); - parseExpression(box); - if (yyTok != Tok_RightParen) - error(RXERR_END); - break; - case Tok_CharClass: - box->set(*yyCharClass); - break; - case Tok_Quantifier: - error(RXERR_REPETITION); - break; - default: -#ifndef QT_NO_REGEXP_BACKREF - if ((yyTok & Tok_BackRef) != 0) - box->set(yyTok ^ Tok_BackRef); - else -#endif - error(RXERR_DISABLED); - } - } - yyTok = getToken(); -} - -void QRegExpEngine::parseFactor(Box *box) -{ -#ifndef QT_NO_REGEXP_CAPTURE - int outerAtom = greedyQuantifiers ? startAtom(false) : -1; - int innerAtom = startAtom(yyMayCapture && yyTok == Tok_LeftParen); - bool magicLeftParen = (yyTok == Tok_MagicLeftParen); -#else - const int innerAtom = -1; -#endif - -#ifndef QT_NO_REGEXP_INTERVAL -#define YYREDO() \ - yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \ - *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok - - const QChar *in = yyIn; - int pos0 = yyPos0; - int pos = yyPos; - int len = yyLen; - int ch = yyCh; - QRegExpCharClass charClass; - if (yyTok == Tok_CharClass) - charClass = *yyCharClass; - int tok = yyTok; - bool mayCapture = yyMayCapture; -#endif - - parseAtom(box); -#ifndef QT_NO_REGEXP_CAPTURE - finishAtom(innerAtom, magicLeftParen); -#endif - - bool hasQuantifier = (yyTok == Tok_Quantifier); - if (hasQuantifier) { -#ifndef QT_NO_REGEXP_OPTIM - trivial = false; -#endif - if (yyMaxRep == InftyRep) { - box->plus(innerAtom); -#ifndef QT_NO_REGEXP_INTERVAL - } else if (yyMaxRep == 0) { - box->clear(); -#endif - } - if (yyMinRep == 0) - box->opt(); - -#ifndef QT_NO_REGEXP_INTERVAL - yyMayCapture = false; - int alpha = (yyMinRep == 0) ? 0 : yyMinRep - 1; - int beta = (yyMaxRep == InftyRep) ? 0 : yyMaxRep - (alpha + 1); - - Box rightBox(this); - int i; - - for (i = 0; i < beta; i++) { - YYREDO(); - Box leftBox(this); - parseAtom(&leftBox); - leftBox.cat(rightBox); - leftBox.opt(); - rightBox = leftBox; - } - for (i = 0; i < alpha; i++) { - YYREDO(); - Box leftBox(this); - parseAtom(&leftBox); - leftBox.cat(rightBox); - rightBox = leftBox; - } - rightBox.cat(*box); - *box = rightBox; -#endif - yyTok = getToken(); -#ifndef QT_NO_REGEXP_INTERVAL - yyMayCapture = mayCapture; -#endif - } -#undef YYREDO -#ifndef QT_NO_REGEXP_CAPTURE - if (greedyQuantifiers) - finishAtom(outerAtom, hasQuantifier); -#endif -} - -void QRegExpEngine::parseTerm(Box *box) -{ -#ifndef QT_NO_REGEXP_OPTIM - if (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) - parseFactor(box); -#endif - while (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) { - Box rightBox(this); - parseFactor(&rightBox); - box->cat(rightBox); - } -} - -void QRegExpEngine::parseExpression(Box *box) -{ - parseTerm(box); - while (yyTok == Tok_Bar) { -#ifndef QT_NO_REGEXP_OPTIM - trivial = false; -#endif - Box rightBox(this); - yyTok = getToken(); - parseTerm(&rightBox); - box->orx(rightBox); - } -} - -/* - The struct QRegExpPrivate contains the private data of a regular - expression other than the automaton. It makes it possible for many - QRegExp objects to use the same QRegExpEngine object with different - QRegExpPrivate objects. -*/ -struct QRegExpPrivate -{ - QRegExpEngine *eng; - QRegExpEngineKey engineKey; - bool minimal; -#ifndef QT_NO_REGEXP_CAPTURE - QString t; // last string passed to QRegExp::indexIn() or lastIndexIn() - QStringList capturedCache; // what QRegExp::capturedTexts() returned last -#endif - QRegExpMatchState matchState; - - inline QRegExpPrivate() - : eng(nullptr), engineKey(QString(), QRegExp::RegExp, Qt::CaseSensitive), minimal(false) { } - inline QRegExpPrivate(const QRegExpEngineKey &key) - : eng(nullptr), engineKey(key), minimal(false) {} -}; - -#if !defined(QT_NO_REGEXP_OPTIM) -struct QRECache -{ - typedef QHash EngineCache; - typedef QCache UnusedEngineCache; - EngineCache usedEngines; - UnusedEngineCache unusedEngines; -}; -Q_GLOBAL_STATIC(QRECache, engineCache) -static QBasicMutex engineCacheMutex; -#endif // QT_NO_REGEXP_OPTIM - -static void derefEngine(QRegExpEngine *eng, const QRegExpEngineKey &key) -{ -#if !defined(QT_NO_REGEXP_OPTIM) - const auto locker = qt_scoped_lock(engineCacheMutex); - if (!eng->ref.deref()) { - if (QRECache *c = engineCache()) { - c->unusedEngines.insert(key, eng, 4 + key.pattern.length() / 4); - c->usedEngines.remove(key); - } else { - delete eng; - } - } -#else - Q_UNUSED(key); - if (!eng->ref.deref()) - delete eng; -#endif -} - -static void prepareEngine_helper(QRegExpPrivate *priv) -{ - Q_ASSERT(!priv->eng); - -#if !defined(QT_NO_REGEXP_OPTIM) - const auto locker = qt_scoped_lock(engineCacheMutex); - if (QRECache *c = engineCache()) { - priv->eng = c->unusedEngines.take(priv->engineKey); - if (!priv->eng) - priv->eng = c->usedEngines.value(priv->engineKey); - if (!priv->eng) - priv->eng = new QRegExpEngine(priv->engineKey); - else - priv->eng->ref.ref(); - - c->usedEngines.insert(priv->engineKey, priv->eng); - return; - } -#endif // QT_NO_REGEXP_OPTIM - - priv->eng = new QRegExpEngine(priv->engineKey); -} - -inline static void prepareEngine(QRegExpPrivate *priv) -{ - if (priv->eng) - return; - prepareEngine_helper(priv); - priv->matchState.prepareForMatch(priv->eng); -} - -static void prepareEngineForMatch(QRegExpPrivate *priv, const QString &str) -{ - prepareEngine(priv); - priv->matchState.prepareForMatch(priv->eng); -#ifndef QT_NO_REGEXP_CAPTURE - priv->t = str; - priv->capturedCache.clear(); -#else - Q_UNUSED(str); -#endif -} - -static void invalidateEngine(QRegExpPrivate *priv) -{ - if (priv->eng) { - derefEngine(priv->eng, priv->engineKey); - priv->eng = nullptr; - priv->matchState.drain(); - } -} - -/*! - \enum QRegExp::CaretMode - - The CaretMode enum defines the different meanings of the caret - (\b{^}) in a regular expression. The possible values are: - - \value CaretAtZero - The caret corresponds to index 0 in the searched string. - - \value CaretAtOffset - The caret corresponds to the start offset of the search. - - \value CaretWontMatch - The caret never matches. -*/ - -/*! - \enum QRegExp::PatternSyntax - - The syntax used to interpret the meaning of the pattern. - - \value RegExp A rich Perl-like pattern matching syntax. This is - the default. - - \value RegExp2 Like RegExp, but with \l{greedy quantifiers}. - (Introduced in Qt 4.2.) - - \value Wildcard This provides a simple pattern matching syntax - similar to that used by shells (command interpreters) for "file - globbing". See \l{QRegExp wildcard matching}. - - \value WildcardUnix This is similar to Wildcard but with the - behavior of a Unix shell. The wildcard characters can be escaped - with the character "\\". - - \value FixedString The pattern is a fixed string. This is - equivalent to using the RegExp pattern on a string in - which all metacharacters are escaped using escape(). - - \value W3CXmlSchema11 The pattern is a regular expression as - defined by the W3C XML Schema 1.1 specification. - - \sa setPatternSyntax() -*/ - -/*! - Constructs an empty regexp. - - \sa isValid(), errorString() -*/ -QRegExp::QRegExp() -{ - priv = new QRegExpPrivate; - prepareEngine(priv); -} - -/*! - Constructs a regular expression object for the given \a pattern - string. The pattern must be given using wildcard notation if \a - syntax is \l Wildcard; the default is \l RegExp. The pattern is - case sensitive, unless \a cs is Qt::CaseInsensitive. Matching is - greedy (maximal), but can be changed by calling - setMinimal(). - - \sa setPattern(), setCaseSensitivity(), setPatternSyntax() -*/ -QRegExp::QRegExp(const QString &pattern, Qt::CaseSensitivity cs, PatternSyntax syntax) -{ - priv = new QRegExpPrivate(QRegExpEngineKey(pattern, syntax, cs)); - prepareEngine(priv); -} - -/*! - Constructs a regular expression as a copy of \a rx. - - \sa operator=() -*/ -QRegExp::QRegExp(const QRegExp &rx) -{ - priv = new QRegExpPrivate; - operator=(rx); -} - -/*! - Destroys the regular expression and cleans up its internal data. -*/ -QRegExp::~QRegExp() -{ - invalidateEngine(priv); - delete priv; -} - -/*! - Copies the regular expression \a rx and returns a reference to the - copy. The case sensitivity, wildcard, and minimal matching options - are also copied. -*/ -QRegExp &QRegExp::operator=(const QRegExp &rx) -{ - prepareEngine(rx.priv); // to allow sharing - QRegExpEngine *otherEng = rx.priv->eng; - if (otherEng) - otherEng->ref.ref(); - invalidateEngine(priv); - priv->eng = otherEng; - priv->engineKey = rx.priv->engineKey; - priv->minimal = rx.priv->minimal; -#ifndef QT_NO_REGEXP_CAPTURE - priv->t = rx.priv->t; - priv->capturedCache = rx.priv->capturedCache; -#endif - if (priv->eng) - priv->matchState.prepareForMatch(priv->eng); - priv->matchState.captured = rx.priv->matchState.captured; - return *this; -} - -/*! - \fn QRegExp &QRegExp::operator=(QRegExp &&other) - - Move-assigns \a other to this QRegExp instance. - - \since 5.2 -*/ - -/*! - \fn void QRegExp::swap(QRegExp &other) - \since 4.8 - - Swaps regular expression \a other with this regular - expression. This operation is very fast and never fails. -*/ - -/*! - Returns \c true if this regular expression is equal to \a rx; - otherwise returns \c false. - - Two QRegExp objects are equal if they have the same pattern - strings and the same settings for case sensitivity, wildcard and - minimal matching. -*/ -bool QRegExp::operator==(const QRegExp &rx) const -{ - return priv->engineKey == rx.priv->engineKey && priv->minimal == rx.priv->minimal; -} - -/*! - \since 5.6 - \relates QRegExp - - Returns the hash value for \a key, using - \a seed to seed the calculation. -*/ -size_t qHash(const QRegExp &key, size_t seed) noexcept -{ - QtPrivate::QHashCombine hash; - seed = hash(seed, key.priv->engineKey); - seed = hash(seed, key.priv->minimal); - return seed; -} - -/*! - \fn bool QRegExp::operator!=(const QRegExp &rx) const - - Returns \c true if this regular expression is not equal to \a rx; - otherwise returns \c false. - - \sa operator==() -*/ - -/*! - Returns \c true if the pattern string is empty; otherwise returns - false. - - If you call exactMatch() with an empty pattern on an empty string - it will return true; otherwise it returns \c false since it operates - over the whole string. If you call indexIn() with an empty pattern - on \e any string it will return the start offset (0 by default) - because the empty pattern matches the 'emptiness' at the start of - the string. In this case the length of the match returned by - matchedLength() will be 0. - - See QString::isEmpty(). -*/ - -bool QRegExp::isEmpty() const -{ - return priv->engineKey.pattern.isEmpty(); -} - -/*! - Returns \c true if the regular expression is valid; otherwise returns - false. An invalid regular expression never matches. - - The pattern \b{[a-z} is an example of an invalid pattern, since - it lacks a closing square bracket. - - Note that the validity of a regexp may also depend on the setting - of the wildcard flag, for example \b{*.html} is a valid - wildcard regexp but an invalid full regexp. - - \sa errorString() -*/ -bool QRegExp::isValid() const -{ - if (priv->engineKey.pattern.isEmpty()) { - return true; - } else { - prepareEngine(priv); - return priv->eng->isValid(); - } -} - -/*! - Returns the pattern string of the regular expression. The pattern - has either regular expression syntax or wildcard syntax, depending - on patternSyntax(). - - \sa patternSyntax(), caseSensitivity() -*/ -QString QRegExp::pattern() const -{ - return priv->engineKey.pattern; -} - -/*! - Sets the pattern string to \a pattern. The case sensitivity, - wildcard, and minimal matching options are not changed. - - \sa setPatternSyntax(), setCaseSensitivity() -*/ -void QRegExp::setPattern(const QString &pattern) -{ - if (priv->engineKey.pattern != pattern) { - invalidateEngine(priv); - priv->engineKey.pattern = pattern; - } -} - -/*! - Returns Qt::CaseSensitive if the regexp is matched case - sensitively; otherwise returns Qt::CaseInsensitive. - - \sa patternSyntax(), pattern(), isMinimal() -*/ -Qt::CaseSensitivity QRegExp::caseSensitivity() const -{ - return priv->engineKey.cs; -} - -/*! - Sets case sensitive matching to \a cs. - - If \a cs is Qt::CaseSensitive, \b{\\.txt$} matches - \c{readme.txt} but not \c{README.TXT}. - - \sa setPatternSyntax(), setPattern(), setMinimal() -*/ -void QRegExp::setCaseSensitivity(Qt::CaseSensitivity cs) -{ - if ((bool)cs != (bool)priv->engineKey.cs) { - invalidateEngine(priv); - priv->engineKey.cs = cs; - } -} - -/*! - Returns the syntax used by the regular expression. The default is - QRegExp::RegExp. - - \sa pattern(), caseSensitivity() -*/ -QRegExp::PatternSyntax QRegExp::patternSyntax() const -{ - return priv->engineKey.patternSyntax; -} - -/*! - Sets the syntax mode for the regular expression. The default is - QRegExp::RegExp. - - Setting \a syntax to QRegExp::Wildcard enables simple shell-like - \l{QRegExp wildcard matching}. For example, \b{r*.txt} matches the - string \c{readme.txt} in wildcard mode, but does not match - \c{readme}. - - Setting \a syntax to QRegExp::FixedString means that the pattern - is interpreted as a plain string. Special characters (e.g., - backslash) don't need to be escaped then. - - \sa setPattern(), setCaseSensitivity(), escape() -*/ -void QRegExp::setPatternSyntax(PatternSyntax syntax) -{ - if (syntax != priv->engineKey.patternSyntax) { - invalidateEngine(priv); - priv->engineKey.patternSyntax = syntax; - } -} - -/*! - Returns \c true if minimal (non-greedy) matching is enabled; - otherwise returns \c false. - - \sa caseSensitivity(), setMinimal() -*/ -bool QRegExp::isMinimal() const -{ - return priv->minimal; -} - -/*! - Enables or disables minimal matching. If \a minimal is false, - matching is greedy (maximal) which is the default. - - For example, suppose we have the input string "We must be - bold, very bold!" and the pattern - \b{.*}. With the default greedy (maximal) matching, - the match is "We must be \underline{bold, very - bold}!". But with minimal (non-greedy) matching, the - first match is: "We must be \underline{bold}, very - bold!" and the second match is "We must be bold, - very \underline{bold}!". In practice we might use the pattern - \b{[^<]*\} instead, although this will still fail for - nested tags. - - \sa setCaseSensitivity() -*/ -void QRegExp::setMinimal(bool minimal) -{ - priv->minimal = minimal; -} - -// ### Qt 5: make non-const -/*! - Returns \c true if \a str is matched exactly by this regular - expression; otherwise returns \c false. You can determine how much of - the string was matched by calling matchedLength(). - - For a given regexp string R, exactMatch("R") is the equivalent of - indexIn("^R$") since exactMatch() effectively encloses the regexp - in the start of string and end of string anchors, except that it - sets matchedLength() differently. - - For example, if the regular expression is \b{blue}, then - exactMatch() returns \c true only for input \c blue. For inputs \c - bluebell, \c blutak and \c lightblue, exactMatch() returns \c false - and matchedLength() will return 4, 3 and 0 respectively. - - Although const, this function sets matchedLength(), - capturedTexts(), and pos(). - - \sa indexIn(), lastIndexIn() -*/ -bool QRegExp::exactMatch(const QString &str) const -{ - prepareEngineForMatch(priv, str); - priv->matchState.match(str.unicode(), str.length(), 0, priv->minimal, true, 0); - if (priv->matchState.captured[1] == str.length()) { - return true; - } else { - priv->matchState.captured[0] = 0; - priv->matchState.captured[1] = priv->matchState.oneTestMatchedLen; - return false; - } -} - -/*! - Returns the regexp as a QVariant -*/ -QRegExp::operator QVariant() const -{ -QT_WARNING_PUSH QT_WARNING_DISABLE_DEPRECATED - QVariant v; - v.setValue(*this); - return v; -QT_WARNING_POP -} - -// ### Qt 5: make non-const -/*! - Attempts to find a match in \a str from position \a offset (0 by - default). If \a offset is -1, the search starts at the last - character; if -2, at the next to last character; etc. - - Returns the position of the first match, or -1 if there was no - match. - - The \a caretMode parameter can be used to instruct whether \b{^} - should match at index 0 or at \a offset. - - You might prefer to use QString::indexOf(), QString::contains(), - or even QStringList::filter(). To replace matches use - QString::replace(). - - Example: - \snippet code/src_corelib_text_qregexp.cpp 13 - - Although const, this function sets matchedLength(), - capturedTexts() and pos(). - - If the QRegExp is a wildcard expression (see setPatternSyntax()) - and want to test a string against the whole wildcard expression, - use exactMatch() instead of this function. - - \sa lastIndexIn(), exactMatch() -*/ - -int QRegExp::indexIn(const QString &str, int offset, CaretMode caretMode) const -{ - prepareEngineForMatch(priv, str); - if (offset < 0) - offset += str.length(); - priv->matchState.match(str.unicode(), str.length(), offset, - priv->minimal, false, caretIndex(offset, caretMode)); - return priv->matchState.captured[0]; -} - -// ### Qt 5: make non-const -/*! - Attempts to find a match backwards in \a str from position \a - offset. If \a offset is -1 (the default), the search starts at the - last character; if -2, at the next to last character; etc. - - Returns the position of the first match, or -1 if there was no - match. - - The \a caretMode parameter can be used to instruct whether \b{^} - should match at index 0 or at \a offset. - - Although const, this function sets matchedLength(), - capturedTexts() and pos(). - - \warning Searching backwards is much slower than searching - forwards. - - \sa indexIn(), exactMatch() -*/ - -int QRegExp::lastIndexIn(const QString &str, int offset, CaretMode caretMode) const -{ - prepareEngineForMatch(priv, str); - if (offset < 0) - offset += str.length(); - if (offset < 0 || offset > str.length()) { - memset(priv->matchState.captured, -1, priv->matchState.capturedSize*sizeof(int)); - return -1; - } - - while (offset >= 0) { - priv->matchState.match(str.unicode(), str.length(), offset, - priv->minimal, true, caretIndex(offset, caretMode)); - if (priv->matchState.captured[0] == offset) - return offset; - --offset; - } - return -1; -} - -/*! - Returns the length of the last matched string, or -1 if there was - no match. - - \sa exactMatch(), indexIn(), lastIndexIn() -*/ -int QRegExp::matchedLength() const -{ - return priv->matchState.captured[1]; -} - - -/*! - Replaces every occurrence of this regular expression in - \a str with \a after and returns the result. - - For regular expressions containing \l{capturing parentheses}, - occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced - with \a{rx}.cap(1), cap(2), ... - - \sa indexIn(), lastIndexIn(), QRegExp::cap() -*/ -QString QRegExp::replaceIn(const QString &str, const QString &after) const -{ - struct QStringCapture - { - int pos; - int len; - int no; - }; - - QRegExp rx2(*this); - - if (str.isEmpty() && rx2.indexIn(str) == -1) - return str; - - QString s(str); - - int index = 0; - int numCaptures = rx2.captureCount(); - int al = after.length(); - QRegExp::CaretMode caretMode = QRegExp::CaretAtZero; - - if (numCaptures > 0) { - const QChar *uc = after.unicode(); - int numBackRefs = 0; - - for (int i = 0; i < al - 1; i++) { - if (uc[i] == QLatin1Char('\\')) { - int no = uc[i + 1].digitValue(); - if (no > 0 && no <= numCaptures) - numBackRefs++; - } - } - - /* - This is the harder case where we have back-references. - */ - if (numBackRefs > 0) { - QVarLengthArray captures(numBackRefs); - int j = 0; - - for (int i = 0; i < al - 1; i++) { - if (uc[i] == QLatin1Char('\\')) { - int no = uc[i + 1].digitValue(); - if (no > 0 && no <= numCaptures) { - QStringCapture capture; - capture.pos = i; - capture.len = 2; - - if (i < al - 2) { - int secondDigit = uc[i + 2].digitValue(); - if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) { - no = (no * 10) + secondDigit; - ++capture.len; - } - } - - capture.no = no; - captures[j++] = capture; - } - } - } - - while (index <= s.length()) { - index = rx2.indexIn(s, index, caretMode); - if (index == -1) - break; - - QString after2(after); - for (j = numBackRefs - 1; j >= 0; j--) { - const QStringCapture &capture = captures[j]; - after2.replace(capture.pos, capture.len, rx2.cap(capture.no)); - } - - s.replace(index, rx2.matchedLength(), after2); - index += after2.length(); - - // avoid infinite loop on 0-length matches (e.g., QRegExp("[a-z]*")) - if (rx2.matchedLength() == 0) - ++index; - - caretMode = QRegExp::CaretWontMatch; - } - return s; - } - } - - /* - This is the simple and optimized case where we don't have - back-references. - */ - while (index != -1) { - struct { - int pos; - int length; - } replacements[2048]; - - int pos = 0; - int adjust = 0; - while (pos < 2047) { - index = rx2.indexIn(s, index, caretMode); - if (index == -1) - break; - int ml = rx2.matchedLength(); - replacements[pos].pos = index; - replacements[pos++].length = ml; - index += ml; - adjust += al - ml; - // avoid infinite loop - if (!ml) - index++; - } - if (!pos) - break; - replacements[pos].pos = s.size(); - int newlen = s.size() + adjust; - - // to continue searching at the right position after we did - // the first round of replacements - if (index != -1) - index += adjust; - QString newstring; - newstring.reserve(newlen + 1); - QChar *newuc = newstring.data(); - QChar *uc = newuc; - int copystart = 0; - int i = 0; - while (i < pos) { - int copyend = replacements[i].pos; - int size = copyend - copystart; - memcpy(static_cast(uc), static_cast(s.constData() + copystart), size * sizeof(QChar)); - uc += size; - memcpy(static_cast(uc), static_cast(after.constData()), al * sizeof(QChar)); - uc += al; - copystart = copyend + replacements[i].length; - i++; - } - memcpy(static_cast(uc), static_cast(s.constData() + copystart), (s.size() - copystart) * sizeof(QChar)); - newstring.resize(newlen); - s = newstring; - caretMode = QRegExp::CaretWontMatch; - } - return s; - -} - - -/*! - \fn QString QRegExp::removeIn(const QString &str) - - Removes every occurrence of this regular expression \a str, and - returns the result - - Does the same as replaceIn(str, QString()). - - \sa indexIn(), lastIndexIn(), replaceIn() -*/ - - -/*! - \fn QString QRegExp::countIn(const QString &str) - - Returns the number of times this regular expression matches - in \a str. - - \sa indexIn(), lastIndexIn(), replaceIn() -*/ - -int QRegExp::countIn(const QString &str) const -{ - QRegExp rx2(*this); - int count = 0; - int index = -1; - int len = str.length(); - while (index < len - 1) { // count overlapping matches - index = rx2.indexIn(str, index + 1); - if (index == -1) - break; - count++; - } - return count; -} - -/*! - Splits \a str into substrings wherever this regular expression - matches, and returns the list of those strings. If this regular - expression does not match anywhere in the string, split() returns a - single-element list containing \a str. - - \sa QStringList::join(), section(), QString::split() -*/ -QStringList QRegExp::splitString(const QString &str, Qt::SplitBehavior behavior) const -{ - QRegExp rx2(*this); - QStringList list; - int start = 0; - int extra = 0; - int end; - while ((end = rx2.indexIn(str, start + extra)) != -1) { - int matchedLen = rx2.matchedLength(); - if (start != end || behavior == Qt::KeepEmptyParts) - list.append(str.mid(start, end - start)); - start = end + matchedLen; - extra = (matchedLen == 0) ? 1 : 0; - } - if (start != str.size() || behavior == Qt::KeepEmptyParts) - list.append(str.mid(start, -1)); - return list; -} - -/*! - \fn QStringList QStringList::filter(const QRegExp &rx) const - - \overload - - Returns a list of all the strings that match the regular - expression \a rx. -*/ -QStringList QRegExp::filterList(const QStringList &stringList) const -{ - QStringList res; - for (const QString &s : stringList) { - if (containedIn(s)) - res << s; - } - return res; -} - -/*! - Replaces every occurrence of the regexp \a rx, in each of the - string lists's strings, with \a after. Returns a reference to the - string list. -*/ -QStringList QRegExp::replaceIn(const QStringList &stringList, const QString &after) const -{ - QStringList list; - for (const QString &s : stringList) - list << replaceIn(s, after); - return list; -} - -/*! - Returns the index position of the first exact match of this regexp in - \a list, searching forward from index position \a from. Returns - -1 if no item matched. - - \sa lastIndexIn(), contains(), exactMatch() -*/ -int QRegExp::indexIn(const QStringList &list, int from) const -{ - QRegExp rx2(*this); - if (from < 0) - from = qMax(from + list.size(), 0); - for (int i = from; i < list.size(); ++i) { - if (rx2.exactMatch(list.at(i))) - return i; - } - return -1; -} - -/*! - Returns the index position of the last exact match of this regexp in - \a list, searching backward from index position \a from. If \a - from is -1 (the default), the search starts at the last item. - Returns -1 if no item matched. - - \sa indexOf(), contains(), QRegExp::exactMatch() -*/ -int QRegExp::lastIndexIn(const QStringList &list, int from) const -{ - QRegExp rx2(*this); - if (from < 0) - from += list.size(); - else if (from >= list.size()) - from = list.size() - 1; - for (int i = from; i >= 0; --i) { - if (rx2.exactMatch(list.at(i))) - return i; - } - return -1; -} - -#ifndef QT_NO_REGEXP_CAPTURE - -/*! - \since 4.6 - Returns the number of captures contained in the regular expression. - */ -int QRegExp::captureCount() const -{ - prepareEngine(priv); - return priv->eng->captureCount(); -} - -/*! - Returns a list of the captured text strings. - - The first string in the list is the entire matched string. Each - subsequent list element contains a string that matched a - (capturing) subexpression of the regexp. - - For example: - \snippet code/src_corelib_text_qregexp.cpp 14 - - The above example also captures elements that may be present but - which we have no interest in. This problem can be solved by using - non-capturing parentheses: - - \snippet code/src_corelib_text_qregexp.cpp 15 - - Note that if you want to iterate over the list, you should iterate - over a copy, e.g. - \snippet code/src_corelib_text_qregexp.cpp 16 - - Some regexps can match an indeterminate number of times. For - example if the input string is "Offsets: 12 14 99 231 7" and the - regexp, \c{rx}, is \b{(\\d+)+}, we would hope to get a list of - all the numbers matched. However, after calling - \c{rx.indexIn(str)}, capturedTexts() will return the list ("12", - "12"), i.e. the entire match was "12" and the first subexpression - matched was "12". The correct approach is to use cap() in a - \l{QRegExp#cap_in_a_loop}{loop}. - - The order of elements in the string list is as follows. The first - element is the entire matching string. Each subsequent element - corresponds to the next capturing open left parentheses. Thus - capturedTexts()[1] is the text of the first capturing parentheses, - capturedTexts()[2] is the text of the second and so on - (corresponding to $1, $2, etc., in some other regexp languages). - - \sa cap(), pos() -*/ -QStringList QRegExp::capturedTexts() const -{ - if (priv->capturedCache.isEmpty()) { - prepareEngine(priv); - const int *captured = priv->matchState.captured; - int n = priv->matchState.capturedSize; - - for (int i = 0; i < n; i += 2) { - QString m; - if (captured[i + 1] == 0) - m = QLatin1String(""); // ### Qt 5: don't distinguish between null and empty - else if (captured[i] >= 0) - m = priv->t.mid(captured[i], captured[i + 1]); - priv->capturedCache.append(m); - } - priv->t.clear(); - } - return priv->capturedCache; -} - -/*! - \internal -*/ -QStringList QRegExp::capturedTexts() -{ - return const_cast(this)->capturedTexts(); -} - -/*! - Returns the text captured by the \a nth subexpression. The entire - match has index 0 and the parenthesized subexpressions have - indexes starting from 1 (excluding non-capturing parentheses). - - \snippet code/src_corelib_text_qregexp.cpp 17 - - The order of elements matched by cap() is as follows. The first - element, cap(0), is the entire matching string. Each subsequent - element corresponds to the next capturing open left parentheses. - Thus cap(1) is the text of the first capturing parentheses, cap(2) - is the text of the second, and so on. - - \sa capturedTexts(), pos() -*/ -QString QRegExp::cap(int nth) const -{ - return capturedTexts().value(nth); -} - -/*! - \internal -*/ -QString QRegExp::cap(int nth) -{ - return const_cast(this)->cap(nth); -} - -/*! - Returns the position of the \a nth captured text in the searched - string. If \a nth is 0 (the default), pos() returns the position - of the whole match. - - Example: - \snippet code/src_corelib_text_qregexp.cpp 18 - - For zero-length matches, pos() always returns -1. (For example, if - cap(4) would return an empty string, pos(4) returns -1.) This is - a feature of the implementation. - - \sa cap(), capturedTexts() -*/ -int QRegExp::pos(int nth) const -{ - if (nth < 0 || nth >= priv->matchState.capturedSize / 2) - return -1; - else - return priv->matchState.captured[2 * nth]; -} - -/*! - \internal -*/ -int QRegExp::pos(int nth) -{ - return const_cast(this)->pos(nth); -} - -/*! - Returns a text string that explains why a regexp pattern is - invalid the case being; otherwise returns "no error occurred". - - \sa isValid() -*/ -QString QRegExp::errorString() const -{ - if (isValid()) { - return QString::fromLatin1(RXERR_OK); - } else { - return priv->eng->errorString(); - } -} - -/*! - \internal -*/ -QString QRegExp::errorString() -{ - return const_cast(this)->errorString(); -} - -#endif - -/*! - Returns the string \a str with every regexp special character - escaped with a backslash. The special characters are $, (,), *, +, - ., ?, [, \,], ^, {, | and }. - - Example: - - \snippet code/src_corelib_text_qregexp.cpp 19 - - This function is useful to construct regexp patterns dynamically: - - \snippet code/src_corelib_text_qregexp.cpp 20 - - \sa setPatternSyntax() -*/ -QString QRegExp::escape(const QString &str) -{ - QString quoted; - const int count = str.count(); - quoted.reserve(count * 2); - const QLatin1Char backslash('\\'); - for (int i = 0; i < count; i++) { - switch (str.at(i).toLatin1()) { - case '$': - case '(': - case ')': - case '*': - case '+': - case '.': - case '?': - case '[': - case '\\': - case ']': - case '^': - case '{': - case '|': - case '}': - quoted.append(backslash); - } - quoted.append(str.at(i)); - } - return quoted; -} - - -#ifndef QT_NO_DATASTREAM -/*! - \relates QRegExp - - Writes the regular expression \a regExp to stream \a out. - - \sa {Serializing Qt Data Types} -*/ -QDataStream &operator<<(QDataStream &out, const QRegExp ®Exp) -{ - return out << regExp.pattern() << (quint8)regExp.caseSensitivity() - << (quint8)regExp.patternSyntax() - << (quint8)!!regExp.isMinimal(); -} - -/*! - \relates QRegExp - - Reads a regular expression from stream \a in into \a regExp. - - \sa {Serializing Qt Data Types} -*/ -QDataStream &operator>>(QDataStream &in, QRegExp ®Exp) -{ - QString pattern; - quint8 cs; - quint8 patternSyntax; - quint8 isMinimal; - - in >> pattern >> cs >> patternSyntax >> isMinimal; - - QRegExp newRegExp(pattern, Qt::CaseSensitivity(cs), - QRegExp::PatternSyntax(patternSyntax)); - - newRegExp.setMinimal(isMinimal); - regExp = newRegExp; - return in; -} -#endif // QT_NO_DATASTREAM - -#ifndef QT_NO_DEBUG_STREAM -QDebug operator<<(QDebug dbg, const QRegExp &r) -{ - QDebugStateSaver saver(dbg); - dbg.nospace() << "QRegExp(patternSyntax=" << r.patternSyntax() - << ", pattern='"<< r.pattern() << "')"; - return dbg; -} -#endif - -QT_END_NAMESPACE diff --git a/src/corelib/text/qregexp.h b/src/corelib/text/qregexp.h deleted file mode 100644 index 0c117fd17f..0000000000 --- a/src/corelib/text/qregexp.h +++ /dev/null @@ -1,151 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#ifndef QREGEXP_H -#define QREGEXP_H - -#include - -#ifndef QT_NO_REGEXP - -#include -#include - -QT_BEGIN_NAMESPACE - - -struct QRegExpPrivate; -class QStringList; -class QRegExp; - -Q_CORE_EXPORT size_t qHash(const QRegExp &key, size_t seed = 0) noexcept; - -class Q_CORE_EXPORT QRegExp -{ -public: - enum PatternSyntax { - RegExp, - Wildcard, - FixedString, - RegExp2, - WildcardUnix, - W3CXmlSchema11 }; - enum CaretMode { CaretAtZero, CaretAtOffset, CaretWontMatch }; - - QRegExp(); - explicit QRegExp(const QString &pattern, Qt::CaseSensitivity cs = Qt::CaseSensitive, - PatternSyntax syntax = RegExp); - QRegExp(const QRegExp &rx); - ~QRegExp(); - QRegExp &operator=(const QRegExp &rx); - QRegExp &operator=(QRegExp &&other) noexcept { swap(other); return *this; } - void swap(QRegExp &other) noexcept { qSwap(priv, other.priv); } - - bool operator==(const QRegExp &rx) const; - inline bool operator!=(const QRegExp &rx) const { return !operator==(rx); } - - bool isEmpty() const; - bool isValid() const; - QString pattern() const; - void setPattern(const QString &pattern); - Qt::CaseSensitivity caseSensitivity() const; - void setCaseSensitivity(Qt::CaseSensitivity cs); - PatternSyntax patternSyntax() const; - void setPatternSyntax(PatternSyntax syntax); - - bool isMinimal() const; - void setMinimal(bool minimal); - - bool exactMatch(const QString &str) const; - - operator QVariant() const; - - int indexIn(const QString &str, int offset = 0, CaretMode caretMode = CaretAtZero) const; - int lastIndexIn(const QString &str, int offset = -1, CaretMode caretMode = CaretAtZero) const; - int matchedLength() const; -#ifndef QT_NO_REGEXP_CAPTURE - int captureCount() const; - QStringList capturedTexts() const; - QStringList capturedTexts(); - QString cap(int nth = 0) const; - QString cap(int nth = 0); - int pos(int nth = 0) const; - int pos(int nth = 0); - QString errorString() const; - QString errorString(); -#endif - - QString replaceIn(const QString &str, const QString &after) const; - QString removeIn(const QString &str) const - { return replaceIn(str, QString()); } - bool containedIn(const QString &str) const - { return indexIn(str) != -1; } - int countIn(const QString &str) const; - - QStringList splitString(const QString &str, Qt::SplitBehavior behavior = Qt::KeepEmptyParts) const; - - int indexIn(const QStringList &list, int from) const; - int lastIndexIn(const QStringList &list, int from) const; - QStringList replaceIn(const QStringList &stringList, const QString &after) const; - QStringList filterList(const QStringList &stringList) const; - - static QString escape(const QString &str); - - friend Q_CORE_EXPORT size_t qHash(const QRegExp &key, size_t seed) noexcept; - -private: - QRegExpPrivate *priv; -}; - -#ifndef QT_NO_DATASTREAM -Q_CORE_EXPORT QDataStream &operator<<(QDataStream &out, const QRegExp ®Exp); -Q_CORE_EXPORT QDataStream &operator>>(QDataStream &in, QRegExp ®Exp); -#endif - -#ifndef QT_NO_DEBUG_STREAM -Q_CORE_EXPORT QDebug operator<<(QDebug, const QRegExp &); -#endif - -QT_END_NAMESPACE - -Q_DECLARE_METATYPE(QRegExp) - -#endif // QT_NO_REGEXP - -#endif // QREGEXP_H diff --git a/src/corelib/text/text.pri b/src/corelib/text/text.pri index 89fbdddd83..0d9a6af454 100644 --- a/src/corelib/text/text.pri +++ b/src/corelib/text/text.pri @@ -16,7 +16,6 @@ HEADERS += \ text/qlocale_p.h \ text/qlocale_tools_p.h \ text/qlocale_data_p.h \ - text/qregexp.h \ text/qstring.h \ text/qstringalgorithms.h \ text/qstringalgorithms_p.h \ @@ -41,7 +40,6 @@ SOURCES += \ text/qcollator.cpp \ text/qlocale.cpp \ text/qlocale_tools.cpp \ - text/qregexp.cpp \ text/qstring.cpp \ text/qstringbuilder.cpp \ text/qstringconverter.cpp \ diff --git a/src/tools/uic/qclass_lib_map.h b/src/tools/uic/qclass_lib_map.h index b63b05107b..c0862a9c02 100644 --- a/src/tools/uic/qclass_lib_map.h +++ b/src/tools/uic/qclass_lib_map.h @@ -214,7 +214,6 @@ QT_CLASS_LIB(QPointF, QtCore, qpoint.h) QT_CLASS_LIB(QQueue, QtCore, qqueue.h) QT_CLASS_LIB(QRect, QtCore, qrect.h) QT_CLASS_LIB(QRectF, QtCore, qrect.h) -QT_CLASS_LIB(QRegExp, QtCore, qregexp.h) QT_CLASS_LIB(QScopedPointerDeleter, QtCore, qscopedpointer.h) QT_CLASS_LIB(QScopedPointerArrayDeleter, QtCore, qscopedpointer.h) QT_CLASS_LIB(QScopedPointerPodDeleter, QtCore, qscopedpointer.h) diff --git a/tests/auto/corelib/text/.prev_CMakeLists.txt b/tests/auto/corelib/text/.prev_CMakeLists.txt index e23de92c8c..d68a8e9c2d 100644 --- a/tests/auto/corelib/text/.prev_CMakeLists.txt +++ b/tests/auto/corelib/text/.prev_CMakeLists.txt @@ -8,7 +8,6 @@ add_subdirectory(qchar) add_subdirectory(qcollator) add_subdirectory(qlatin1string) add_subdirectory(qlocale) -add_subdirectory(qregexp) add_subdirectory(qregularexpression) add_subdirectory(qstring) add_subdirectory(qstring_no_cast_from_bytearray) diff --git a/tests/auto/corelib/text/CMakeLists.txt b/tests/auto/corelib/text/CMakeLists.txt index 0c36f1e088..285fdb27c3 100644 --- a/tests/auto/corelib/text/CMakeLists.txt +++ b/tests/auto/corelib/text/CMakeLists.txt @@ -10,7 +10,6 @@ add_subdirectory(qchar) add_subdirectory(qcollator) add_subdirectory(qlatin1string) add_subdirectory(qlocale) -add_subdirectory(qregexp) add_subdirectory(qregularexpression) add_subdirectory(qstring) add_subdirectory(qstring_no_cast_from_bytearray) diff --git a/tests/auto/corelib/text/qregexp/.gitignore b/tests/auto/corelib/text/qregexp/.gitignore deleted file mode 100644 index e6e629ee2c..0000000000 --- a/tests/auto/corelib/text/qregexp/.gitignore +++ /dev/null @@ -1 +0,0 @@ -tst_qregexp diff --git a/tests/auto/corelib/text/qregexp/CMakeLists.txt b/tests/auto/corelib/text/qregexp/CMakeLists.txt deleted file mode 100644 index 257e87a684..0000000000 --- a/tests/auto/corelib/text/qregexp/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -# Generated from qregexp.pro. - -##################################################################### -## tst_qregexp Test: -##################################################################### - -qt_add_test(tst_qregexp - SOURCES - tst_qregexp.cpp -) - -# Resources: -set(qregexp_resource_files - "data/qdatastream_4.9.bin" - "data/qdatastream_5.0.bin" -) - -qt_add_resource(tst_qregexp "qregexp" - PREFIX - "/" - FILES - ${qregexp_resource_files} -) - diff --git a/tests/auto/corelib/text/qregexp/data/qdatastream_4.9.bin b/tests/auto/corelib/text/qregexp/data/qdatastream_4.9.bin deleted file mode 100644 index db8518e064e288165c016781b8ec6f7d54d0be48..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 30 jcmZQzVBlj2WC&tNWk_dmWvE~%U|?X7X8aEXj0_9_NdgAM diff --git a/tests/auto/corelib/text/qregexp/data/qdatastream_5.0.bin b/tests/auto/corelib/text/qregexp/data/qdatastream_5.0.bin deleted file mode 100644 index db8518e064e288165c016781b8ec6f7d54d0be48..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 30 jcmZQzVBlj2WC&tNWk_dmWvE~%U|?X7X8aEXj0_9_NdgAM diff --git a/tests/auto/corelib/text/qregexp/qregexp.pro b/tests/auto/corelib/text/qregexp/qregexp.pro deleted file mode 100644 index 748e6a248c..0000000000 --- a/tests/auto/corelib/text/qregexp/qregexp.pro +++ /dev/null @@ -1,5 +0,0 @@ -CONFIG += testcase -TARGET = tst_qregexp -QT = core testlib -SOURCES = tst_qregexp.cpp -RESOURCES += qregexp.qrc diff --git a/tests/auto/corelib/text/qregexp/qregexp.qrc b/tests/auto/corelib/text/qregexp/qregexp.qrc deleted file mode 100644 index 8fd168793f..0000000000 --- a/tests/auto/corelib/text/qregexp/qregexp.qrc +++ /dev/null @@ -1,6 +0,0 @@ - - - data/qdatastream_4.9.bin - data/qdatastream_5.0.bin - - diff --git a/tests/auto/corelib/text/qregexp/tst_qregexp.cpp b/tests/auto/corelib/text/qregexp/tst_qregexp.cpp deleted file mode 100644 index 29ddf3673f..0000000000 --- a/tests/auto/corelib/text/qregexp/tst_qregexp.cpp +++ /dev/null @@ -1,1726 +0,0 @@ - -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the test suite of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:GPL-EXCEPT$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 as published by the Free Software -** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include -#include - -const int N = 1; - -class tst_QRegExp : public QObject -{ - Q_OBJECT -private slots: - void getSetCheck(); - void indexIn_data(); - void indexIn_addMoreRows(const QByteArray &stri); - void indexIn(); - void lastIndexIn_data(); - void lastIndexIn(); - void matchedLength(); - void wildcard_data(); - void wildcard(); - void testEscapingWildcard_data(); - void testEscapingWildcard(); - void testInvalidWildcard_data(); - void testInvalidWildcard(); - void caretAnchoredOptimization(); - void isEmpty(); - void prepareEngineOptimization(); - void swap(); - void operator_eq(); - - void exactMatch(); - void capturedTexts(); - void staticRegExp(); - void rainersSlowRegExpCopyBug(); - void nonExistingBackReferenceBug(); - - void reentrancy(); - void threadsafeEngineCache(); - - void posAndCapConsistency_data(); - void posAndCapConsistency(); - void interval(); - void validityCheck_data(); - void validityCheck(); - void escapeSequences(); - - void splitString_data(); - void splitString(); - - void countIn(); - void containedIn(); - - void replaceIn_data(); - void replaceIn(); - void removeIn_data(); - void removeIn(); - - void filterList(); - void replaceInList(); - - void datastream_data(); - void datastream(); - - void datastream2(); - -private: - void readQRegExp(QDataStream *s); - void writeQRegExp(QDataStream* dev); -}; - -// Testing get/set functions -void tst_QRegExp::getSetCheck() -{ - QRegExp obj1; - // PatternSyntax QRegExp::patternSyntax() - // void QRegExp::setPatternSyntax(PatternSyntax) - obj1.setPatternSyntax(QRegExp::PatternSyntax(QRegExp::RegExp)); - QCOMPARE(QRegExp::PatternSyntax(QRegExp::RegExp), obj1.patternSyntax()); - obj1.setPatternSyntax(QRegExp::PatternSyntax(QRegExp::Wildcard)); - QCOMPARE(QRegExp::PatternSyntax(QRegExp::Wildcard), obj1.patternSyntax()); - obj1.setPatternSyntax(QRegExp::PatternSyntax(QRegExp::FixedString)); - QCOMPARE(QRegExp::PatternSyntax(QRegExp::FixedString), obj1.patternSyntax()); -} - -extern const char email[]; - -void tst_QRegExp::lastIndexIn_data() -{ - indexIn_data(); -} - -void tst_QRegExp::indexIn_data() -{ - QTest::addColumn("regexpStr"); - QTest::addColumn("target"); - QTest::addColumn("pos"); - QTest::addColumn("len"); - QTest::addColumn("caps"); - - for (int i = 0; i < N; ++i) { - QByteArray stri; - if (i > 0) - stri.setNum(i); - - // anchors - QTest::newRow(qPrintable(stri + "anc00")) << QString("a(?=)z") << QString("az") << 0 << 2 << QStringList(); - QTest::newRow(qPrintable(stri + "anc01")) << QString("a(?!)z") << QString("az") << -1 << -1 << QStringList(); - QTest::newRow(qPrintable(stri + "anc02")) << QString("a(?:(?=)|(?=))z") << QString("az") << 0 << 2 - << QStringList(); - QTest::newRow(qPrintable(stri + "anc03")) << QString("a(?:(?=)|(?!))z") << QString("az") << 0 << 2 - << QStringList(); - QTest::newRow(qPrintable(stri + "anc04")) << QString("a(?:(?!)|(?=))z") << QString("az") << 0 << 2 - << QStringList(); - QTest::newRow(qPrintable(stri + "anc05")) << QString("a(?:(?!)|(?!))z") << QString("az") << -1 << -1 - << QStringList(); - QTest::newRow(qPrintable(stri + "anc06")) << QString("a(?:(?=)|b)z") << QString("az") << 0 << 2 - << QStringList(); - QTest::newRow(qPrintable(stri + "anc07")) << QString("a(?:(?=)|b)z") << QString("abz") << 0 << 3 - << QStringList(); - QTest::newRow(qPrintable(stri + "anc08")) << QString("a(?:(?!)|b)z") << QString("az") << -1 << -1 - << QStringList(); - QTest::newRow(qPrintable(stri + "anc09")) << QString("a(?:(?!)|b)z") << QString("abz") << 0 << 3 - << QStringList(); - QTest::newRow(qPrintable(stri + "anc10")) << QString("a?(?=^b$)") << QString("ab") << -1 << -1 - << QStringList(); - QTest::newRow(qPrintable(stri + "anc11")) << QString("a?(?=^b$)") << QString("b") << 0 << 0 - << QStringList(); - - // back-references - QTest::newRow(qPrintable(stri + "bref00")) << QString("(a*)(\\1)") << QString("aaaaa") << 0 << 4 - << QStringList( QStringList() << "aa" << "aa" ); - QTest::newRow(qPrintable(stri + "bref01")) << QString("<(\\w*)>.+") << QString("blablabla") - << 0 << 13 << QStringList( QStringList() << "b" ); - QTest::newRow(qPrintable(stri + "bref02")) << QString("<(\\w*)>.+") << QString("<>blablabla") - << 0 << 18 << QStringList( QStringList() << "" ); - QTest::newRow(qPrintable(stri + "bref03")) << QString("((a*\\2)\\2)") << QString("aaaa") << 0 << 4 - << QStringList( QStringList() << QString("aaaa") << "aa" ); - QTest::newRow(qPrintable(stri + "bref04")) << QString("^(aa+)\\1+$") << QString("aaaaaa") << 0 << 6 - << QStringList( QStringList() << QString("aa") ); - QTest::newRow(qPrintable(stri + "bref05")) << QString("^(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)(12)(13)(14)" - "\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1") - << QString("12345678910111213141413121110987654321") << 0 << 38 - << QStringList( QStringList() << "1" << "2" << "3" << "4" << "5" << "6" - << "7" << "8" << "9" << "10" << "11" - << "12" << "13" << "14"); - - // captures - QTest::newRow(qPrintable(stri + "cap00")) << QString("(a*)") << QString("") << 0 << 0 - << QStringList( QStringList() << QString("") ); - QTest::newRow(qPrintable(stri + "cap01")) << QString("(a*)") << QString("aaa") << 0 << 3 - << QStringList( QStringList() << "aaa" ); - QTest::newRow(qPrintable(stri + "cap02")) << QString("(a*)") << QString("baaa") << 0 << 0 - << QStringList( QStringList() << QString("") ); - QTest::newRow(qPrintable(stri + "cap03")) << QString("(a*)(a*)") << QString("aaa") << 0 << 3 - << QStringList( QStringList() << QString("aaa") << QString("") ); - QTest::newRow(qPrintable(stri + "cap04")) << QString("(a*)(b*)") << QString("aaabbb") << 0 << 6 - << QStringList( QStringList() << QString("aaa") << QString("bbb") ); - QTest::newRow(qPrintable(stri + "cap06")) << QString("(a*)a*") << QString("aaa") << 0 << 3 - << QStringList( QStringList() << QString("aaa") ); - QTest::newRow(qPrintable(stri + "cap07")) << QString("((a*a*)*)") << QString("aaa") << 0 << 3 - << QStringList( QStringList() << "aaa" << QString("aaa") ); - QTest::newRow(qPrintable(stri + "cap08")) << QString("(((a)*(b)*)*)") << QString("ababa") << 0 << 5 - << QStringList( QStringList() << QString("ababa") << QString("a") << QString("a") - << "" ); - QTest::newRow(qPrintable(stri + "cap09")) << QString("(((a)*(b)*)c)*") << QString("") << 0 << 0 - << QStringList( QStringList() << QString("") << QString("") << QString("") << QString("") ); - QTest::newRow(qPrintable(stri + "cap10")) << QString("(((a)*(b)*)c)*") << QString("abc") << 0 << 3 - << QStringList( QStringList() << "abc" << "ab" << "a" - << "b" ); - QTest::newRow(qPrintable(stri + "cap11")) << QString("(((a)*(b)*)c)*") << QString("abcc") << 0 << 4 - << QStringList( QStringList() << "c" << "" << "" << "" ); - QTest::newRow(qPrintable(stri + "cap12")) << QString("(((a)*(b)*)c)*") << QString("abcac") << 0 << 5 - << QStringList( QStringList() << "ac" << "a" << "a" << "" ); - QTest::newRow(qPrintable(stri + "cap13")) << QString("(to|top)?(o|polo)?(gical|o?logical)") - << QString("topological") << 0 << 11 - << QStringList( QStringList() << "top" << "o" - << "logical" ); - QTest::newRow(qPrintable(stri + "cap14")) << QString("(a)+") << QString("aaaa") << 0 << 4 - << QStringList( QStringList() << "a" ); - - // concatenation - QTest::newRow(qPrintable(stri + "cat00")) << QString("") << QString("") << 0 << 0 << QStringList(); - QTest::newRow(qPrintable(stri + "cat01")) << QString("") << QString("a") << 0 << 0 << QStringList(); - QTest::newRow(qPrintable(stri + "cat02")) << QString("a") << QString("") << -1 << -1 << QStringList(); - QTest::newRow(qPrintable(stri + "cat03")) << QString("a") << QString("a") << 0 << 1 << QStringList(); - QTest::newRow(qPrintable(stri + "cat04")) << QString("a") << QString("b") << -1 << -1 << QStringList(); - QTest::newRow(qPrintable(stri + "cat05")) << QString("b") << QString("a") << -1 << -1 << QStringList(); - QTest::newRow(qPrintable(stri + "cat06")) << QString("ab") << QString("ab") << 0 << 2 << QStringList(); - QTest::newRow(qPrintable(stri + "cat07")) << QString("ab") << QString("ba") << -1 << -1 << QStringList(); - QTest::newRow(qPrintable(stri + "cat08")) << QString("abab") << QString("abbaababab") << 4 << 4 << QStringList(); - - indexIn_addMoreRows(stri); - } -} - -void tst_QRegExp::indexIn_addMoreRows(const QByteArray &stri) -{ - // from Perl Cookbook - QTest::newRow(qPrintable(stri + "cook00")) << QString("^(m*)(d?c{0,3}|c[dm])(1?x{0,3}|x[lc])(v?i{0,3}|i[vx])$") - << QString("mmxl") << 0 << 4 - << QStringList( QStringList() << "mm" << "" << "xl" - << "" ); - QTest::newRow(qPrintable(stri + "cook01")) << QString("(\\S+)(\\s+)(\\S+)") << QString(" a b") << 1 << 5 - << QStringList( QStringList() << "a" << " " << "b" ); - QTest::newRow(qPrintable(stri + "cook02")) << QString("(\\w+)\\s*=\\s*(.*)\\s*$") << QString(" PATH=. ") << 1 - << 7 << QStringList( QStringList() << "PATH" << ". " ); - QTest::newRow(qPrintable(stri + "cook03")) << QString(".{80,}") - << QString("0000000011111111222222223333333344444444555" - "5555566666666777777778888888899999999000000" - "00aaaaaaaa") - << 0 << 96 << QStringList(); - QTest::newRow(qPrintable(stri + "cook04")) << QString("(\\d+)/(\\d+)/(\\d+) (\\d+):(\\d+):(\\d+)") - << QString("1978/05/24 07:30:00") << 0 << 19 - << QStringList( QStringList() << "1978" << "05" << "24" - << "07" << "30" << "00" ); - QTest::newRow(qPrintable(stri + "cook05")) << QString("/usr/bin") << QString("/usr/local/bin:/usr/bin") - << 15 << 8 << QStringList(); - QTest::newRow(qPrintable(stri + "cook06")) << QString("%([0-9A-Fa-f]{2})") << QString("http://%7f") << 7 << 3 - << QStringList( QStringList() << "7f" ); - QTest::newRow(qPrintable(stri + "cook07")) << QString("/\\*.*\\*/") << QString("i++; /* increment i */") << 5 - << 17 << QStringList(); - QTest::newRow(qPrintable(stri + "cook08")) << QString("^\\s+") << QString(" aaa ") << 0 << 3 - << QStringList(); - QTest::newRow(qPrintable(stri + "cook09")) << QString("\\s+$") << QString(" aaa ") << 6 << 3 - << QStringList(); - QTest::newRow(qPrintable(stri + "cook10")) << QString("^.*::") << QString("Box::cat") << 0 << 5 - << QStringList(); - QTest::newRow(qPrintable(stri + "cook11")) << QString("^([01]?\\d\\d|2[0-4]\\d|25[0-5])\\.([01]?\\" - "d\\d|2[0-4]\\d|25[0-5])\\.([01]?\\d\\d|2[0-" - "4]\\d|25[0-5])\\.([01]?\\d\\d|2[0-4]\\d|25[" - "0-5])$") - << QString("255.00.40.30") << 0 << 12 - << QStringList( QStringList() << "255" << "00" << "40" - << "30" ); - QTest::newRow(qPrintable(stri + "cook12")) << QString("^.*/") << QString(" /usr/local/bin/moc") << 0 << 16 - << QStringList(); - QTest::newRow(qPrintable(stri + "cook13")) << QString(":co#(\\d+):") << QString("bla:co#55:") << 3 << 7 - << QStringList( QStringList() << "55" ); - QTest::newRow(qPrintable(stri + "cook14")) << QString("linux") << QString("alphalinuxinunix") << 5 << 5 - << QStringList(); - QTest::newRow(qPrintable(stri + "cook15")) << QString("(\\d+\\.?\\d*|\\.\\d+)") << QString("0.0.5") << 0 << 3 - << QStringList( QStringList() << "0.0" ); - - // mathematical trivia - QTest::newRow(qPrintable(stri + "math00")) << QString("^(a\\1*)$") << QString("a") << 0 << 1 - << QStringList( QStringList() << "a" ); - QTest::newRow(qPrintable(stri + "math01")) << QString("^(a\\1*)$") << QString("aa") << 0 << 2 - << QStringList( QStringList() << "aa" ); - QTest::newRow(qPrintable(stri + "math02")) << QString("^(a\\1*)$") << QString("aaa") << -1 << -1 - << QStringList( QStringList() << QString() ); - QTest::newRow(qPrintable(stri + "math03")) << QString("^(a\\1*)$") << QString("aaaa") << 0 << 4 - << QStringList( QStringList() << "aaaa" ); - QTest::newRow(qPrintable(stri + "math04")) << QString("^(a\\1*)$") << QString("aaaaa") << -1 << -1 - << QStringList( QStringList() << QString() ); - QTest::newRow(qPrintable(stri + "math05")) << QString("^(a\\1*)$") << QString("aaaaaa") << -1 << -1 - << QStringList( QStringList() << QString() ); - QTest::newRow(qPrintable(stri + "math06")) << QString("^(a\\1*)$") << QString("aaaaaaa") << -1 << -1 - << QStringList( QStringList() << QString() ); - QTest::newRow(qPrintable(stri + "math07")) << QString("^(a\\1*)$") << QString("aaaaaaaa") << 0 << 8 - << QStringList( QStringList() << "aaaaaaaa" ); - QTest::newRow(qPrintable(stri + "math08")) << QString("^(a\\1*)$") << QString("aaaaaaaaa") << -1 << -1 - << QStringList( QStringList() << QString() ); - QTest::newRow(qPrintable(stri + "math09")) << QString("^a(?:a(\\1a))*$") << QString("a") << 0 << 1 - << QStringList( QStringList() << "" ); - QTest::newRow(qPrintable(stri + "math10")) << QString("^a(?:a(\\1a))*$") << QString("aaa") << 0 << 3 - << QStringList( QStringList() << "a" ); - - QTest::newRow(qPrintable(stri + "math13")) << QString("^(?:((?:^a)?\\2\\3)(\\3\\1|(?=a$))(\\1\\2|(" - "?=a$)))*a$") - << QString("aaa") << 0 << 3 - << QStringList( QStringList() << "a" << "a" << "" ); - QTest::newRow(qPrintable(stri + "math14")) << QString("^(?:((?:^a)?\\2\\3)(\\3\\1|(?=a$))(\\1\\2|(" - "?=a$)))*a$") - << QString("aaaaa") << 0 << 5 - << QStringList( QStringList() << "a" << "a" << "aa" ); - QTest::newRow(qPrintable(stri + "math17")) << QString("^(?:(a(?:(\\1\\3)(\\1\\2))*(?:\\1\\3)?)|((?" - ":(\\4(?:^a)?\\6)(\\4\\5))*(?:\\4\\6)?))$") - << QString("aaa") << 0 << 3 - << QStringList( QStringList() << "" << "" << "" << "aaa" << "a" << "aa" ); - QTest::newRow(qPrintable(stri + "math18")) << QString("^(?:(a(?:(\\1\\3)(\\1\\2))*(?:\\1\\3)?)|((?" - ":(\\4(?:^a)?\\6)(\\4\\5))*(?:\\4\\6)?))$") - << QString("aaaaa") << 0 << 5 - << QStringList( QStringList() << "aaaaa" << "a" << "aaa" << "" << "" << "" ); - QTest::newRow(qPrintable(stri + "math19")) << QString("^(?:(a(?:(\\1\\3)(\\1\\2))*(?:\\1\\3)?)|((?" - ":(\\4(?:^a)?\\6)(\\4\\5))*(?:\\4\\6)?))$") - << QString("aaaaaaaa") << 0 << 8 - << QStringList( QStringList() << "" << "" << "" << "aaaaaaaa" << "a" << "aa" ); - QTest::newRow(qPrintable(stri + "math20")) << QString("^(?:(a(?:(\\1\\3)(\\1\\2))*(?:\\1\\3)?)|((?" - ":(\\4(?:^a)?\\6)(\\4\\5))*(?:\\4\\6)?))$") - << QString("aaaaaaaaa") << -1 << -1 - << QStringList( QStringList() << QString() - << QString() - << QString() - << QString() - << QString() - << QString() ); - QTest::newRow(qPrintable(stri + "math21")) << QString("^(aa+)\\1+$") << QString("aaaaaaaaaaaa") << 0 << 12 - << QStringList( QStringList() << "aa" ); - - static const char * const squareRegExp[] = { - "^a(?:(\\1aa)a)*$", - "^(\\2(\\1a))+$", - "^((\\2a)*)\\1\\2a$", - 0 - }; - - int ii = 0; - - while ( squareRegExp[ii] != 0 ) { - for ( int j = 0; j < 100; j++ ) { - const QString name = QString::asprintf( "square%.1d%.2d", ii, j ); - - QString target = ""; - target.fill( 'a', j ); - - int pos = -1; - int len = -1; - - for ( int k = 1; k * k <= j; k++ ) { - if ( k * k == j ) { - pos = 0; - len = j; - break; - } - } - - QTest::newRow( name.toLatin1() ) << QString( squareRegExp[ii] ) << target - << pos << len << QStringList( "IGNORE ME" ); - } - ii++; - } - - // miscellaneous - QTest::newRow(qPrintable(stri + "misc00")) << QString(email) - << QString("email123@example.com") << 0 << 20 - << QStringList(); - QTest::newRow(qPrintable(stri + "misc01")) << QString("[0-9]*\\.[0-9]+") << QString("pi = 3.14") << 5 << 4 - << QStringList(); - - // or operator - QTest::newRow(qPrintable(stri + "or00")) << QString("(?:|b)") << QString("xxx") << 0 << 0 << QStringList(); - QTest::newRow(qPrintable(stri + "or01")) << QString("(?:|b)") << QString("b") << 0 << 1 << QStringList(); - QTest::newRow(qPrintable(stri + "or02")) << QString("(?:b|)") << QString("") << 0 << 0 << QStringList(); - QTest::newRow(qPrintable(stri + "or03")) << QString("(?:b|)") << QString("b") << 0 << 1 << QStringList(); - QTest::newRow(qPrintable(stri + "or04")) << QString("(?:||b||)") << QString("") << 0 << 0 << QStringList(); - QTest::newRow(qPrintable(stri + "or05")) << QString("(?:||b||)") << QString("b") << 0 << 1 << QStringList(); - QTest::newRow(qPrintable(stri + "or06")) << QString("(?:a|b)") << QString("") << -1 << -1 << QStringList(); - QTest::newRow(qPrintable(stri + "or07")) << QString("(?:a|b)") << QString("cc") << -1 << -1 << QStringList(); - QTest::newRow(qPrintable(stri + "or08")) << QString("(?:a|b)") << QString("abc") << 0 << 1 << QStringList(); - QTest::newRow(qPrintable(stri + "or09")) << QString("(?:a|b)") << QString("cba") << 1 << 1 << QStringList(); - QTest::newRow(qPrintable(stri + "or10")) << QString("(?:ab|ba)") << QString("aba") << 0 << 2 - << QStringList(); - QTest::newRow(qPrintable(stri + "or11")) << QString("(?:ab|ba)") << QString("bab") << 0 << 2 - << QStringList(); - QTest::newRow(qPrintable(stri + "or12")) << QString("(?:ab|ba)") << QString("caba") << 1 << 2 - << QStringList(); - QTest::newRow(qPrintable(stri + "or13")) << QString("(?:ab|ba)") << QString("cbab") << 1 << 2 - << QStringList(); - - // quantifiers - QTest::newRow(qPrintable(stri + "qua00")) << QString("((([a-j])){0,0})") << QString("") << 0 << 0 - << QStringList( QStringList() << "" << "" << "" ); - QTest::newRow(qPrintable(stri + "qua01")) << QString("((([a-j])){0,0})") << QString("a") << 0 << 0 - << QStringList( QStringList() << "" << "" << "" ); - QTest::newRow(qPrintable(stri + "qua02")) << QString("((([a-j])){0,0})") << QString("xyz") << 0 << 0 - << QStringList( QStringList() << "" << "" << "" ); - QTest::newRow(qPrintable(stri + "qua03")) << QString("((([a-j]))?)") << QString("") << 0 << 0 - << QStringList( QStringList() << "" << "" << "" ); - QTest::newRow(qPrintable(stri + "qua04")) << QString("((([a-j]))?)") << QString("a") << 0 << 1 - << QStringList( QStringList() << "a" << "a" << "a" ); - QTest::newRow(qPrintable(stri + "qua05")) << QString("((([a-j]))?)") << QString("x") << 0 << 0 - << QStringList( QStringList() << "" << "" << "" ); - QTest::newRow(qPrintable(stri + "qua06")) << QString("((([a-j]))?)") << QString("ab") << 0 << 1 - << QStringList( QStringList() << "a" << "a" << "a" ); - QTest::newRow(qPrintable(stri + "qua07")) << QString("((([a-j]))?)") << QString("xa") << 0 << 0 - << QStringList( QStringList() << "" << "" << "" ); - QTest::newRow(qPrintable(stri + "qua08")) << QString("((([a-j])){0,3})") << QString("") << 0 << 0 - << QStringList( QStringList() << "" << "" << "" ); - QTest::newRow(qPrintable(stri + "qua09")) << QString("((([a-j])){0,3})") << QString("a") << 0 << 1 - << QStringList( QStringList() << "a" << "a" << "a" ); - QTest::newRow(qPrintable(stri + "qua10")) << QString("((([a-j])){0,3})") << QString("abcd") << 0 << 3 - << QStringList( QStringList() << "abc" << "c" << "c" ); - QTest::newRow(qPrintable(stri + "qua11")) << QString("((([a-j])){0,3})") << QString("abcde") << 0 << 3 - << QStringList( QStringList() << "abc" << "c" << "c" ); - QTest::newRow(qPrintable(stri + "qua12")) << QString("((([a-j])){2,4})") << QString("a") << -1 << -1 - << QStringList( QStringList() << QString() - << QString() - << QString() ); - QTest::newRow(qPrintable(stri + "qua13")) << QString("((([a-j])){2,4})") << QString("ab") << 0 << 2 - << QStringList( QStringList() << "ab" << "b" << "b" ); - QTest::newRow(qPrintable(stri + "qua14")) << QString("((([a-j])){2,4})") << QString("abcd") << 0 << 4 - << QStringList( QStringList() << "abcd" << "d" << "d" ); - QTest::newRow(qPrintable(stri + "qua15")) << QString("((([a-j])){2,4})") << QString("abcdef") << 0 << 4 - << QStringList( QStringList() << "abcd" << "d" << "d" ); - QTest::newRow(qPrintable(stri + "qua16")) << QString("((([a-j])){2,4})") << QString("xaybcd") << 3 << 3 - << QStringList( QStringList() << "bcd" << "d" << "d" ); - QTest::newRow(qPrintable(stri + "qua17")) << QString("((([a-j])){0,})") << QString("abcdefgh") << 0 << 8 - << QStringList( QStringList() << "abcdefgh" << "h" << "h" ); - QTest::newRow(qPrintable(stri + "qua18")) << QString("((([a-j])){,0})") << QString("abcdefgh") << 0 << 0 - << QStringList( QStringList() << "" << "" << "" ); - QTest::newRow(qPrintable(stri + "qua19")) << QString("(1(2(3){3,4}){2,3}){1,2}") << QString("123332333") << 0 - << 9 - << QStringList( QStringList() << "123332333" << "2333" - << "3" ); - QTest::newRow(qPrintable(stri + "qua20")) << QString("(1(2(3){3,4}){2,3}){1,2}") - << QString("12333323333233331233332333323333") << 0 << 32 - << QStringList( QStringList() << "1233332333323333" - << "23333" << "3" ); - QTest::newRow(qPrintable(stri + "qua21")) << QString("(1(2(3){3,4}){2,3}){1,2}") << QString("") << -1 << -1 - << QStringList( QStringList() << QString() - << QString() - << QString() ); - QTest::newRow(qPrintable(stri + "qua22")) << QString("(1(2(3){3,4}){2,3}){1,2}") << QString("12333") << -1 - << -1 - << QStringList( QStringList() << QString() - << QString() - << QString() ); - QTest::newRow(qPrintable(stri + "qua23")) << QString("(1(2(3){3,4}){2,3}){1,2}") << QString("12333233") << -1 - << -1 - << QStringList( QStringList() << QString() - << QString() - << QString() ); - QTest::newRow(qPrintable(stri + "qua24")) << QString("(1(2(3){3,4}){2,3}){1,2}") << QString("122333") << -1 - << -1 - << QStringList( QStringList() << QString() - << QString() - << QString() ); - - // star operator - QTest::newRow(qPrintable(stri + "star00")) << QString("(?:)*") << QString("") << 0 << 0 << QStringList(); - QTest::newRow(qPrintable(stri + "star01")) << QString("(?:)*") << QString("abc") << 0 << 0 << QStringList(); - QTest::newRow(qPrintable(stri + "star02")) << QString("(?:a)*") << QString("") << 0 << 0 << QStringList(); - QTest::newRow(qPrintable(stri + "star03")) << QString("(?:a)*") << QString("a") << 0 << 1 << QStringList(); - QTest::newRow(qPrintable(stri + "star04")) << QString("(?:a)*") << QString("aaa") << 0 << 3 << QStringList(); - QTest::newRow(qPrintable(stri + "star05")) << QString("(?:a)*") << QString("bbbbaaa") << 0 << 0 - << QStringList(); - QTest::newRow(qPrintable(stri + "star06")) << QString("(?:a)*") << QString("bbbbaaabbaaaaa") << 0 << 0 - << QStringList(); - QTest::newRow(qPrintable(stri + "star07")) << QString("(?:b)*(?:a)*") << QString("") << 0 << 0 - << QStringList(); - QTest::newRow(qPrintable(stri + "star08")) << QString("(?:b)*(?:a)*") << QString("a") << 0 << 1 - << QStringList(); - QTest::newRow(qPrintable(stri + "star09")) << QString("(?:b)*(?:a)*") << QString("aaa") << 0 << 3 - << QStringList(); - QTest::newRow(qPrintable(stri + "star10")) << QString("(?:b)*(?:a)*") << QString("bbbbaaa") << 0 << 7 - << QStringList(); - QTest::newRow(qPrintable(stri + "star11")) << QString("(?:b)*(?:a)*") << QString("bbbbaaabbaaaaa") << 0 << 7 - << QStringList(); - QTest::newRow(qPrintable(stri + "star12")) << QString("(?:a|b)*") << QString("c") << 0 << 0 << QStringList(); - QTest::newRow(qPrintable(stri + "star13")) << QString("(?:a|b)*") << QString("abac") << 0 << 3 - << QStringList(); - QTest::newRow(qPrintable(stri + "star14")) << QString("(?:a|b|)*") << QString("c") << 0 << 0 - << QStringList(); - QTest::newRow(qPrintable(stri + "star15")) << QString("(?:a|b|)*") << QString("abac") << 0 << 3 - << QStringList(); - QTest::newRow(qPrintable(stri + "star16")) << QString("(?:ab|ba|b)*") << QString("abbbababbbaaab") << 0 << 11 - << QStringList(); -} - -void tst_QRegExp::exactMatch() -{ - QRegExp rx_d( "\\d" ); - QRegExp rx_s( "\\s" ); - QRegExp rx_w( "\\w" ); - QRegExp rx_D( "\\D" ); - QRegExp rx_S( "\\S" ); - QRegExp rx_W( "\\W" ); - - for ( int i = 0; i < 65536; i++ ) { - QChar ch( i ); - bool is_d = ( ch.category() == QChar::Number_DecimalDigit ); - bool is_s = ch.isSpace(); - bool is_w = ( ch.isLetterOrNumber() - || ch.isMark() - || ch == '_' ); - - QVERIFY( rx_d.exactMatch(QString(ch)) == is_d ); - QVERIFY( rx_s.exactMatch(QString(ch)) == is_s ); - QVERIFY( rx_w.exactMatch(QString(ch)) == is_w ); - QVERIFY( rx_D.exactMatch(QString(ch)) != is_d ); - QVERIFY( rx_S.exactMatch(QString(ch)) != is_s ); - QVERIFY( rx_W.exactMatch(QString(ch)) != is_w ); - } -} - -void tst_QRegExp::capturedTexts() -{ - QRegExp rx1("a*(a*)", Qt::CaseSensitive, QRegExp::RegExp); - rx1.exactMatch("aaa"); - QCOMPARE(rx1.matchedLength(), 3); - QCOMPARE(rx1.cap(0), QString("aaa")); - QCOMPARE(rx1.cap(1), QString("aaa")); - - QRegExp rx2("a*(a*)", Qt::CaseSensitive, QRegExp::RegExp2); - rx2.exactMatch("aaa"); - QCOMPARE(rx2.matchedLength(), 3); - QCOMPARE(rx2.cap(0), QString("aaa")); - QCOMPARE(rx2.cap(1), QString("")); - - QRegExp rx3("(?:a|aa)(a*)", Qt::CaseSensitive, QRegExp::RegExp); - rx3.exactMatch("aaa"); - QCOMPARE(rx3.matchedLength(), 3); - QCOMPARE(rx3.cap(0), QString("aaa")); - QCOMPARE(rx3.cap(1), QString("aa")); - - QRegExp rx4("(?:a|aa)(a*)", Qt::CaseSensitive, QRegExp::RegExp2); - rx4.exactMatch("aaa"); - QCOMPARE(rx4.matchedLength(), 3); - QCOMPARE(rx4.cap(0), QString("aaa")); - QCOMPARE(rx4.cap(1), QString("a")); - - QRegExp rx5("(a)*(a*)", Qt::CaseSensitive, QRegExp::RegExp); - rx5.exactMatch("aaa"); - QCOMPARE(rx5.matchedLength(), 3); - QCOMPARE(rx5.cap(0), QString("aaa")); - QCOMPARE(rx5.cap(1), QString("a")); - QCOMPARE(rx5.cap(2), QString("aa")); - - QRegExp rx6("(a)*(a*)", Qt::CaseSensitive, QRegExp::RegExp2); - rx6.exactMatch("aaa"); - QCOMPARE(rx6.matchedLength(), 3); - QCOMPARE(rx6.cap(0), QString("aaa")); - QCOMPARE(rx6.cap(1), QString("a")); - QCOMPARE(rx6.cap(2), QString("")); - - QRegExp rx7("([A-Za-z_])([A-Za-z_0-9]*)"); - rx7.setCaseSensitivity(Qt::CaseSensitive); - rx7.setPatternSyntax(QRegExp::RegExp); - QCOMPARE(rx7.captureCount(), 2); - - int pos = rx7.indexIn("(10 + delta4) * 32"); - QCOMPARE(pos, 6); - QCOMPARE(rx7.matchedLength(), 6); - QCOMPARE(rx7.cap(0), QString("delta4")); - QCOMPARE(rx7.cap(1), QString("d")); - QCOMPARE(rx7.cap(2), QString("elta4")); -} - -void tst_QRegExp::indexIn() -{ - QFETCH( QString, regexpStr ); - QFETCH( QString, target ); - QFETCH( int, pos ); - QFETCH( int, len ); - QFETCH( QStringList, caps ); - - caps.prepend( "dummy cap(0)" ); - - { - QRegExp rx( regexpStr ); - QVERIFY( rx.isValid() ); - - int mypos = rx.indexIn( target ); - int mylen = rx.matchedLength(); - QStringList mycaps = rx.capturedTexts(); - - QCOMPARE( mypos, pos ); - QCOMPARE( mylen, len ); - if ( caps.size() > 1 && caps[1] != "IGNORE ME" ) { - QCOMPARE( mycaps.count(), caps.count() ); - for ( int i = 1; i < (int) mycaps.count(); i++ ) - QCOMPARE( mycaps[i], caps[i] ); - } - } - - // same as above, but with RegExp2 - { - QRegExp rx( regexpStr, Qt::CaseSensitive, QRegExp::RegExp2 ); - QVERIFY( rx.isValid() ); - - int mypos = rx.indexIn( target ); - int mylen = rx.matchedLength(); - QStringList mycaps = rx.capturedTexts(); - - QCOMPARE( mypos, pos ); - QCOMPARE( mylen, len ); - if ( caps.size() > 1 && caps[1] != "IGNORE ME" ) { - QCOMPARE( mycaps.count(), caps.count() ); - for ( int i = 1; i < (int) mycaps.count(); i++ ) - QCOMPARE( mycaps[i], caps[i] ); - } - } -} - -void tst_QRegExp::lastIndexIn() -{ - QFETCH( QString, regexpStr ); - QFETCH( QString, target ); - QFETCH( int, pos ); - QFETCH( int, len ); - QFETCH( QStringList, caps ); - - caps.prepend( "dummy" ); - - /* - The test data was really designed for indexIn(), not - lastIndexIn(), but it turns out that we can reuse much of that - for lastIndexIn(). - */ - - { - QRegExp rx( regexpStr ); - QVERIFY( rx.isValid() ); - - int mypos = rx.lastIndexIn( target, target.length() ); - int mylen = rx.matchedLength(); - QStringList mycaps = rx.capturedTexts(); - - if ( mypos <= pos || pos == -1 ) { - QCOMPARE( mypos, pos ); - QCOMPARE( mylen, len ); - - if (caps.size() > 1 && caps[1] != "IGNORE ME") { - QCOMPARE( mycaps.count(), caps.count() ); - for ( int i = 1; i < (int) mycaps.count(); i++ ) - QCOMPARE( mycaps[i], caps[i] ); - } - } - } - - { - QRegExp rx( regexpStr, Qt::CaseSensitive, QRegExp::RegExp2 ); - QVERIFY( rx.isValid() ); - - int mypos = rx.lastIndexIn( target, target.length() ); - int mylen = rx.matchedLength(); - QStringList mycaps = rx.capturedTexts(); - - if ( mypos <= pos || pos == -1 ) { - QCOMPARE( mypos, pos ); - QCOMPARE( mylen, len ); - - if (caps.size() > 1 && caps[1] != "IGNORE ME") { - QCOMPARE( mycaps.count(), caps.count() ); - for ( int i = 1; i < (int) mycaps.count(); i++ ) - QCOMPARE( mycaps[i], caps[i] ); - } - } - } -} - -void tst_QRegExp::matchedLength() -{ - QRegExp r1( "a+" ); - r1.exactMatch( "aaaba" ); - QCOMPARE( r1.matchedLength(), 3 ); -} - -const char email[] = - "^[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff" - "]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\x" - "ff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:" - "(?:[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@" - ",;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\\\\\x80-\\xff\\n\\015\"" - "]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015\"]*)*\")[\\040\\t]*(?" - ":\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x" - "80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*" - ")*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:\\.[\\040\\t]*" - "(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\" - "\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015(" - ")]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>" - "@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[" - "\\]\\000-\\037\\x80-\\xff])|\"[^\\\\\\x80-\\xff\\n\\015\"]*(?:\\\\[^\\" - "x80-\\xff][^\\\\\\x80-\\xff\\n\\015\"]*)*\")[\\040\\t]*(?:\\([^\\\\\\x" - "80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\" - "015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\" - "\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*)*@[\\040\\t]*(?:\\([^\\\\\\x" - "80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\" - "015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\" - "\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>@,;:\".\\\\\\[" - "\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037" - "\\x80-\\xff])|\\[(?:[^\\\\\\x80-\\xff\\n\\015\\[\\]]|\\\\[^\\x80-\\xff" - "])*\\])[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80" - "-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x" - "80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]" - "*)*(?:\\.[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x" - "80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\" - "\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040" - "\\t]*)*(?:[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\" - "040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:[^\\\\\\x80-\\xf" - "f\\n\\015\\[\\]]|\\\\[^\\x80-\\xff])*\\])[\\040\\t]*(?:\\([^\\\\\\x80-" - "\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015" - "()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x8" - "0-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*)*|(?:[^(\\040)<>@,;:\".\\\\\\[\\" - "]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x" - "80-\\xff])|\"[^\\\\\\x80-\\xff\\n\\015\"]*(?:\\\\[^\\x80-\\xff][^\\\\" - "\\x80-\\xff\\n\\015\"]*)*\")[^()<>@,;:\".\\\\\\[\\]\\x80-\\xff\\000-\\" - "010\\012-\\037]*(?:(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x8" - "0-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\" - "x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)|\"[^\\\\" - "\\x80-\\xff\\n\\015\"]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015" - "\"]*)*\")[^()<>@,;:\".\\\\\\[\\]\\x80-\\xff\\000-\\010\\012-\\037]*)*<" - "[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]" - "|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xf" - "f\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:@" - "[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]" - "|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xf" - "f\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[" - "^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:" - "\".\\\\\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:[^\\\\\\x80-\\xff\\n\\015" - "\\[\\]]|\\\\[^\\x80-\\xff])*\\])[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n" - "\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:" - "\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff" - "\\n\\015()]*)*\\)[\\040\\t]*)*(?:\\.[\\040\\t]*(?:\\([^\\\\\\x80-\\xff" - "\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(" - "?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\x" - "ff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>@,;:\".\\\\\\[\\]\\000-" - "\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xf" - "f])|\\[(?:[^\\\\\\x80-\\xff\\n\\015\\[\\]]|\\\\[^\\x80-\\xff])*\\])[\\" - "040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\" - "([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\" - "n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*)*(?:,[" - "\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|" - "\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff" - "\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*@[\\0" - "40\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\(" - "[^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n" - "\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\" - "040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\" - "\\\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:[^\\\\\\x80-\\xff\\n\\015\\[\\]" - "]|\\\\[^\\x80-\\xff])*\\])[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()" - "]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\" - "x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015" - "()]*)*\\)[\\040\\t]*)*(?:\\.[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015" - "()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^" - "\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\0" - "15()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x8" - "0-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff])|\\[(?" - ":[^\\\\\\x80-\\xff\\n\\015\\[\\]]|\\\\[^\\x80-\\xff])*\\])[\\040\\t]*(" - "?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\" - "x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]" - "*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*)*)*:[\\040\\t]*" - "(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\" - "\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015(" - ")]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*)?(?:[^(\\040)" - "<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\" - "[\\]\\000-\\037\\x80-\\xff])|\"[^\\\\\\x80-\\xff\\n\\015\"]*(?:\\\\[^" - "\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015\"]*)*\")[\\040\\t]*(?:\\([^\\\\" - "\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\" - "n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\" - "\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:\\.[\\040\\t]*(?:\\([^\\" - "\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff" - "\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^" - "\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>@,;:\".\\\\" - "\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\0" - "37\\x80-\\xff])|\"[^\\\\\\x80-\\xff\\n\\015\"]*(?:\\\\[^\\x80-\\xff][^" - "\\\\\\x80-\\xff\\n\\015\"]*)*\")[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n" - "\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:" - "\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff" - "\\n\\015()]*)*\\)[\\040\\t]*)*)*@[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n" - "\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:" - "\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff" - "\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\0" - "37\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff])" - "|\\[(?:[^\\\\\\x80-\\xff\\n\\015\\[\\]]|\\\\[^\\x80-\\xff])*\\])[\\040" - "\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^" - "\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\" - "015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:\\.[\\0" - "40\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\(" - "[^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n" - "\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\" - "040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\" - "\\\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:[^\\\\\\x80-\\xff\\n\\015\\[\\]" - "]|\\\\[^\\x80-\\xff])*\\])[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()" - "]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\" - "x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015" - "()]*)*\\)[\\040\\t]*)*)*>)$"; - -void tst_QRegExp::wildcard_data() -{ - QTest::addColumn("rxp"); - QTest::addColumn("string"); - QTest::addColumn("foundIndex"); - - QTest::newRow( "data0" ) << QString("*.html") << QString("test.html") << 0; - QTest::newRow( "data1" ) << QString("*.html") << QString("test.htm") << -1; - QTest::newRow( "data2" ) << QString("bar*") << QString("foobarbaz") << 3; - QTest::newRow( "data3" ) << QString("*") << QString("Qt Rocks!") << 0; - QTest::newRow( "data4" ) << QString(".html") << QString("test.html") << 4; - QTest::newRow( "data5" ) << QString(".h") << QString("test.cpp") << -1; - QTest::newRow( "data6" ) << QString(".???l") << QString("test.html") << 4; - QTest::newRow( "data7" ) << QString("?") << QString("test.html") << 0; - QTest::newRow( "data8" ) << QString("?m") << QString("test.html") << 6; - QTest::newRow( "data9" ) << QString(".h[a-z]ml") << QString("test.html") << 4; - QTest::newRow( "data10" ) << QString(".h[A-Z]ml") << QString("test.html") << -1; - QTest::newRow( "data11" ) << QString(".h[A-Z]ml") << QString("test.hTml") << 4; -} - -void tst_QRegExp::wildcard() -{ - QFETCH( QString, rxp ); - QFETCH( QString, string ); - QFETCH( int, foundIndex ); - - QRegExp r( rxp ); - r.setPatternSyntax(QRegExp::WildcardUnix); - QCOMPARE( r.indexIn( string ), foundIndex ); -} - -void tst_QRegExp::testEscapingWildcard_data(){ - QTest::addColumn("pattern"); - QTest::addColumn("teststring"); - QTest::addColumn("isMatching"); - - QTest::newRow("[ Not escaped") << "[Qt;" << "[Qt;" << false; - QTest::newRow("[ Escaped") << "\\[Qt;" << "[Qt;" << true; - - QTest::newRow("] Not escaped") << "]Ik;" << "]Ik;" << false; - QTest::newRow("] Escaped") << "\\]Ip;" << "]Ip;" << true; - - QTest::newRow("? Not escaped valid") << "?Ou:" << ".Ou:" << true; - QTest::newRow("? Not escaped invalid") << "?Tr;" << "Tr;" << false; - QTest::newRow("? Escaped") << "\\?O;" << "?O;" << true; - - QTest::newRow("[] not escaped") << "[lL]" << "l" << true; - QTest::newRow("[] escaped") << "\\[\\]" << "[]" << true; - - QTest::newRow("case [[]") << "[[abc]" << "[" << true; - QTest::newRow("case []abc] match ]") << "[]abc]" << "]" << true; - QTest::newRow("case []abc] match a") << "[]abc]" << "a" << true; - QTest::newRow("case [abc] match a") << "[abc]" << "a" << true; - QTest::newRow("case []] don't match [") << "[]abc]" << "[" << false; - QTest::newRow("case [^]abc] match d") << "[^]abc]" << "d" << true; - QTest::newRow("case [^]abc] don't match ]") << "[^]abc]" << "]" << false; - - QTest::newRow("* Not escaped with char") << "*Te;" << "12345Te;" << true; - QTest::newRow("* Not escaped without char") << "*Ch;" << "Ch;" << true; - QTest::newRow("* Not escaped invalid") << "*Ro;" << "o;" << false; - QTest::newRow("* Escaped") << "\\[Cks;" << "[Cks;" << true; - - QTest::newRow("a true '\\' in input") << "\\Qt;" << "\\Qt;" << true; - QTest::newRow("two true '\\' in input") << "\\\\Qt;" << "\\\\Qt;" << true; - QTest::newRow("a '\\' at the end") << "\\\\Qt;\\" << "\\\\Qt;\\" << true; - - QTest::newRow("[]\\] matches ]") << "[]\\]" << "]" << true; - QTest::newRow("[]\\] matches \\") << "[]\\]" << "\\" << true; - QTest::newRow("[]\\] does not match [") << "[]\\]" << "[" << false; - QTest::newRow("[]\\]a matches ]a") << "[]\\]a" << "]a" << true; - QTest::newRow("[]\\]a matches \\a") << "[]\\]a" << "\\a" << true; - QTest::newRow("[]\\]a does not match [a") << "[]\\]a" << "[a" << false; -} - -void tst_QRegExp::testEscapingWildcard(){ - QFETCH(QString, pattern); - - QRegExp re(pattern); - re.setPatternSyntax(QRegExp::WildcardUnix); - - QFETCH(QString, teststring); - QFETCH(bool, isMatching); - QCOMPARE(re.exactMatch(teststring), isMatching); -} - -void tst_QRegExp::testInvalidWildcard_data(){ - QTest::addColumn("pattern"); - QTest::addColumn("isValid"); - - QTest::newRow("valid []") << "[abc]" << true; - QTest::newRow("invalid [") << "[abc" << false; - QTest::newRow("ending [") << "abc[" << false; - QTest::newRow("ending ]") << "abc]" << false; - QTest::newRow("ending [^") << "abc[^" << false; - QTest::newRow("ending [\\") << "abc[\\" << false; - QTest::newRow("ending []") << "abc[]" << false; - QTest::newRow("ending [[") << "abc[[" << false; -} - -void tst_QRegExp::testInvalidWildcard(){ - QFETCH(QString, pattern); - - QRegExp re(pattern); - re.setPatternSyntax(QRegExp::Wildcard); - - QFETCH(bool, isValid); - QCOMPARE(re.isValid(), isValid); -} - -void tst_QRegExp::caretAnchoredOptimization() -{ - QString s = "---babnana----"; - s = QRegExp("^-*|(-*)$").replaceIn(s, "" ); - QCOMPARE(s, QLatin1String("babnana")); - - s = "---babnana----"; - s = QRegExp("^-*|(-{0,})$").replaceIn(s, "" ); - QCOMPARE(s, QLatin1String("babnana")); - - s = "---babnana----"; - s = QRegExp("^-*|(-{1,})$").replaceIn(s, "" ); - QCOMPARE(s, QLatin1String("babnana")); - - s = "---babnana----"; - s = QRegExp("^-*|(-+)$").replaceIn(s, "" ); - QCOMPARE(s, QLatin1String("babnana")); -} - -void tst_QRegExp::isEmpty() -{ - QRegExp rx1; - QVERIFY(rx1.isEmpty()); - - QRegExp rx2 = rx1; - QVERIFY(rx2.isEmpty()); - - rx2.setPattern(""); - QVERIFY(rx2.isEmpty()); - - rx2.setPattern("foo"); - QVERIFY(!rx2.isEmpty()); - - rx2.setPattern(")("); - QVERIFY(!rx2.isEmpty()); - - rx2.setPattern(""); - QVERIFY(rx2.isEmpty()); - - rx2.setPatternSyntax(QRegExp::Wildcard); - rx2.setPattern(""); - QVERIFY(rx2.isEmpty()); -} - -static QRegExp re("foo.*bar"); - -void tst_QRegExp::staticRegExp() -{ - QVERIFY(re.exactMatch("fooHARRYbar")); - // the actual test is that a static regexp should not crash -} - -void tst_QRegExp::rainersSlowRegExpCopyBug() -{ - // this test should take an extreme amount of time if QRegExp is broken - QRegExp original(email); - for (int i = 0; i < 100000; ++i) { - QRegExp copy = original; - (void)copy.exactMatch("~"); - QRegExp copy2 = original; - } -} - -void tst_QRegExp::nonExistingBackReferenceBug() -{ - { - QRegExp rx("<\\5>"); - QVERIFY(rx.isValid()); - QCOMPARE(rx.indexIn("<>"), 0); - QCOMPARE(rx.capturedTexts(), QStringList("<>")); - } - - { - QRegExp rx("<\\1>"); - QVERIFY(rx.isValid()); - QCOMPARE(rx.indexIn("<>"), 0); - QCOMPARE(rx.capturedTexts(), QStringList("<>")); - } - - { - QRegExp rx("(?:<\\1>)\\1\\5\\4"); - QVERIFY(rx.isValid()); - QCOMPARE(rx.indexIn("<>"), 0); - QCOMPARE(rx.capturedTexts(), QStringList("<>")); - } -} - -class Thread : public QThread -{ -public: - Thread(const QRegExp &rx) : rx(rx) {} - - void run(); - - QRegExp rx; -}; - -void Thread::run() -{ - QString str = "abc"; - for (int i = 0; i < 10; ++i) - str += str; - str += "abbbdekcz"; - int x; - - for (int j = 0; j < 10000; ++j) - x = rx.indexIn(str); - - QCOMPARE(x, 3072); -} - -void tst_QRegExp::reentrancy() -{ - QRegExp rx("(ab{2,}d?e?f?[g-z]?)c"); - Thread *threads[10]; - - for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i) { - threads[i] = new Thread(rx); - threads[i]->start(); - } - - for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i) - threads[i]->wait(); - - for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i) - delete threads[i]; -} - -class Thread2 : public QThread -{ -public: - void run(); -}; - -void Thread2::run() -{ - QRegExp rx("(ab{2,}d?e?f?[g-z]?)c"); - QString str = "abc"; - for (int i = 0; i < 10; ++i) - str += str; - str += "abbbdekcz"; - int x; - - for (int j = 0; j < 10000; ++j) - x = rx.indexIn(str); - - QCOMPARE(x, 3072); -} - -// Test that multiple threads can construct equal QRegExps. -// (In the current QRegExp design each engine instatance will share -// the same cache key, so the threads will race for the cache entry -// in the global cache.) -void tst_QRegExp::threadsafeEngineCache() -{ - Thread2 *threads[10]; - - for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i) { - threads[i] = new Thread2(); - threads[i]->start(); - } - - for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i) - threads[i]->wait(); - - for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i) - delete threads[i]; -} - - -void tst_QRegExp::prepareEngineOptimization() -{ - QRegExp rx0("(f?)(?:(o?)(o?))?"); - - QRegExp rx1(rx0); - - QCOMPARE(rx1.capturedTexts(), QStringList() << "" << "" << "" << ""); - QCOMPARE(rx1.matchedLength(), -1); - QCOMPARE(rx1.matchedLength(), -1); - QCOMPARE(rx1.captureCount(), 3); - - QCOMPARE(rx1.exactMatch("foo"), true); - QCOMPARE(rx1.matchedLength(), 3); - QCOMPARE(rx1.capturedTexts(), QStringList() << "foo" << "f" << "o" << "o"); - QCOMPARE(rx1.captureCount(), 3); - QCOMPARE(rx1.matchedLength(), 3); - QCOMPARE(rx1.capturedTexts(), QStringList() << "foo" << "f" << "o" << "o"); - QCOMPARE(rx1.pos(3), 2); - - QCOMPARE(rx1.exactMatch("foo"), true); - QCOMPARE(rx1.captureCount(), 3); - QCOMPARE(rx1.matchedLength(), 3); - QCOMPARE(rx1.capturedTexts(), QStringList() << "foo" << "f" << "o" << "o"); - QCOMPARE(rx1.pos(3), 2); - - QRegExp rx2 = rx1; - - QCOMPARE(rx1.captureCount(), 3); - QCOMPARE(rx1.matchedLength(), 3); - QCOMPARE(rx1.capturedTexts(), QStringList() << "foo" << "f" << "o" << "o"); - QCOMPARE(rx1.pos(3), 2); - - QCOMPARE(rx2.captureCount(), 3); - QCOMPARE(rx2.matchedLength(), 3); - QCOMPARE(rx2.capturedTexts(), QStringList() << "foo" << "f" << "o" << "o"); - QCOMPARE(rx2.pos(3), 2); - - QCOMPARE(rx1.exactMatch("fo"), true); - QCOMPARE(rx1.captureCount(), 3); - QCOMPARE(rx1.matchedLength(), 2); - QCOMPARE(rx1.capturedTexts(), QStringList() << "fo" << "f" << "o" << ""); - QCOMPARE(rx1.pos(2), 1); - - QRegExp rx3; - QVERIFY(rx3.isValid()); - - QRegExp rx4("foo", Qt::CaseInsensitive, QRegExp::RegExp); - QVERIFY(rx4.isValid()); - - QRegExp rx5("foo", Qt::CaseInsensitive, QRegExp::RegExp2); - QVERIFY(rx5.isValid()); - - QRegExp rx6("foo", Qt::CaseInsensitive, QRegExp::FixedString); - QVERIFY(rx6.isValid()); - - QRegExp rx7("foo", Qt::CaseInsensitive, QRegExp::Wildcard); - QVERIFY(rx7.isValid()); - - QRegExp rx8("][", Qt::CaseInsensitive, QRegExp::RegExp); - QVERIFY(!rx8.isValid()); - - QRegExp rx9("][", Qt::CaseInsensitive, QRegExp::RegExp2); - QVERIFY(!rx9.isValid()); - - QRegExp rx10("][", Qt::CaseInsensitive, QRegExp::Wildcard); - QVERIFY(!rx10.isValid()); - - QRegExp rx11("][", Qt::CaseInsensitive, QRegExp::FixedString); - QVERIFY(rx11.isValid()); - QVERIFY(rx11.exactMatch("][")); - QCOMPARE(rx11.matchedLength(), 2); - - rx11.setPatternSyntax(QRegExp::Wildcard); - QVERIFY(!rx11.isValid()); - QCOMPARE(rx11.captureCount(), 0); - QCOMPARE(rx11.matchedLength(), -1); - - rx11.setPatternSyntax(QRegExp::RegExp); - QVERIFY(!rx11.isValid()); - QCOMPARE(rx11.captureCount(), 0); - QCOMPARE(rx11.matchedLength(), -1); - - rx11.setPattern("(foo)"); - QVERIFY(rx11.isValid()); - QCOMPARE(rx11.captureCount(), 1); - QCOMPARE(rx11.matchedLength(), -1); - - QCOMPARE(rx11.indexIn("ofoo"), 1); - QCOMPARE(rx11.captureCount(), 1); - QCOMPARE(rx11.matchedLength(), 3); - - rx11.setPatternSyntax(QRegExp::RegExp); - QCOMPARE(rx11.captureCount(), 1); - QCOMPARE(rx11.matchedLength(), 3); - - /* - This behavior isn't entirely consistent with setPatter(), - setPatternSyntax(), and setCaseSensitivity(), but I'm testing - it here to ensure that it doesn't change subtly in future - releases. - */ - rx11.setMinimal(true); - QCOMPARE(rx11.matchedLength(), 3); - rx11.setMinimal(false); - QCOMPARE(rx11.matchedLength(), 3); - - rx11.setPatternSyntax(QRegExp::Wildcard); - QCOMPARE(rx11.captureCount(), 0); - QCOMPARE(rx11.matchedLength(), -1); - - rx11.setPatternSyntax(QRegExp::RegExp); - QCOMPARE(rx11.captureCount(), 1); - QCOMPARE(rx11.matchedLength(), -1); -} - -void tst_QRegExp::swap() -{ - QRegExp r1(QLatin1String(".*")), r2(QLatin1String("a*")); - r1.swap(r2); - QCOMPARE(r1.pattern(),QLatin1String("a*")); - QCOMPARE(r2.pattern(),QLatin1String(".*")); -} - -void tst_QRegExp::operator_eq() -{ - const int I = 2; - const int J = 4; - const int K = 2; - const int ELL = 2; - QRegExp rxtable[I * J * K * ELL]; - int n; - - n = 0; - for (int i = 0; i < I; ++i) { - for (int j = 0; j < J; ++j) { - for (int k = 0; k < K; ++k) { - for (int ell = 0; ell < ELL; ++ell) { - Qt::CaseSensitivity cs = i == 0 ? Qt::CaseSensitive : Qt::CaseInsensitive; - QRegExp::PatternSyntax syntax = QRegExp::PatternSyntax(j); - bool minimal = k == 0; - - if (ell == 0) { - QRegExp rx("foo", cs, syntax); - rx.setMinimal(minimal); - rxtable[n++] = rx; - } else { - QRegExp rx; - rx.setPattern("bar"); - rx.setMinimal(true); - rx.exactMatch("bar"); - rx.setCaseSensitivity(cs); - rx.setMinimal(minimal); - rx.setPattern("foo"); - rx.setPatternSyntax(syntax); - rx.exactMatch("foo"); - rxtable[n++] = rx; - } - } - } - } - } - - for (int i = 0; i < I * J * K * ELL; ++i) { - for (int j = 0; j < I * J * K * ELL; ++j) { - QCOMPARE(rxtable[i] == rxtable[j], i / ELL == j / ELL); - QCOMPARE(rxtable[i] != rxtable[j], i / ELL != j / ELL); - // this just happens to have no hash collisions. If at some point - // we get collisions, restrict the test to only equal elements: - QCOMPARE(qHash(rxtable[i]) == qHash(rxtable[j]), i / ELL == j / ELL); - } - } -} - -// This test aims to ensure that the values returned by pos() and cap() -// are consistent. -void tst_QRegExp::posAndCapConsistency_data() -{ - QTest::addColumn("reStr"); - QTest::addColumn("text"); - QTest::addColumn("matchIndex"); - - QTest::addColumn("pos0"); - QTest::addColumn("pos1"); - QTest::addColumn("pos2"); - - QTest::addColumn("cap0"); - QTest::addColumn("cap1"); - QTest::addColumn("cap2"); - - QTest::newRow("no match") - << QString("(a) (b)") << QString("b a") << -1 - << -1 << -1 << -1 << QString() << QString() << QString(); - - QTest::newRow("both captures match") - << QString("(a) (b)") << QString("a b") << 0 - << 0 << 0 << 2 << QString("a b") << QString("a") << QString("b"); - - QTest::newRow("first capture matches @0") - << QString("(a*)|(b*)") << QString("axx") << 0 - << 0 << 0 << -1 << QString("a") << QString("a") << QString(); - QTest::newRow("second capture matches @0") - << QString("(a*)|(b*)") << QString("bxx") << 0 - << 0 << -1 << 0 << QString("b") << QString() << QString("b"); - QTest::newRow("first capture empty match @0") - << QString("(a*)|(b*)") << QString("xx") << 0 - << 0 << -1 << -1 << QString("") << QString() << QString(); - QTest::newRow("second capture empty match @0") - << QString("(a)|(b*)") << QString("xx") << 0 - << 0 << -1 << -1 << QString("") << QString() << QString(); - - QTest::newRow("first capture matches @1") - << QString("x(?:(a*)|(b*))") << QString("-xa") << 1 - << 1 << 2 << -1 << QString("xa") << QString("a") << QString(); - QTest::newRow("second capture matches @1") - << QString("x(?:(a*)|(b*))") << QString("-xb") << 1 - << 1 << -1 << 2 << QString("xb") << QString() << QString("b"); - QTest::newRow("first capture empty match @1") - << QString("x(?:(a*)|(b*))") << QString("-xx") << 1 - << 1 << -1 << -1 << QString("x") << QString() << QString(); - QTest::newRow("second capture empty match @1") - << QString("x(?:(a)|(b*))") << QString("-xx") << 1 - << 1 << -1 << -1 << QString("x") << QString() << QString(); - - QTest::newRow("first capture matches @2") - << QString("(a)|(b)") << QString("xxa") << 2 - << 2 << 2 << -1 << QString("a") << QString("a") << QString(); - QTest::newRow("second capture matches @2") - << QString("(a)|(b)") << QString("xxb") << 2 - << 2 << -1 << 2 << QString("b") << QString() << QString("b"); - QTest::newRow("no match - with options") - << QString("(a)|(b)") << QString("xx") << -1 - << -1 << -1 << -1 << QString() << QString() << QString(); - -} - -void tst_QRegExp::posAndCapConsistency() -{ - QFETCH( QString, reStr ); - QFETCH( QString, text ); - QFETCH( int, matchIndex ); - QFETCH( int, pos0 ); - QFETCH( int, pos1 ); - QFETCH( int, pos2 ); - QFETCH( QString, cap0 ); - QFETCH( QString, cap1 ); - QFETCH( QString, cap2 ); - - QRegExp re(reStr); - QCOMPARE(re.captureCount(), 2); - QCOMPARE(re.capturedTexts().size(), 3); - - QCOMPARE(re.indexIn(text), matchIndex); - - QCOMPARE( re.pos(0), pos0 ); - QCOMPARE( re.pos(1), pos1 ); - QCOMPARE( re.pos(2), pos2 ); - - QCOMPARE( re.cap(0).isNull(), cap0.isNull() ); - QCOMPARE( re.cap(0), cap0 ); - QCOMPARE( re.cap(1).isNull(), cap1.isNull() ); - QCOMPARE( re.cap(1), cap1 ); - QCOMPARE( re.cap(2).isNull(), cap2.isNull() ); - QCOMPARE( re.cap(2), cap2 ); -} - -void tst_QRegExp::interval() -{ - { - QRegExp exp("a{0,1}"); - QVERIFY(exp.isValid()); - } - { - QRegExp exp("a{1,1}"); - QVERIFY(exp.isValid()); - } - { - QRegExp exp("a{1,0}"); - QVERIFY(!exp.isValid()); - } -} - -void tst_QRegExp::validityCheck_data() -{ - QTest::addColumn("pattern"); - QTest::addColumn("validity"); - QTest::newRow("validity01") << QString() << true; - QTest::newRow("validity02") << QString("abc.*abc") << true; - QTest::newRow("validity03") << QString("[a-z") << false; - QTest::newRow("validity04") << QString("a(b") << false; -} - -void tst_QRegExp::validityCheck() -{ - QFETCH(QString, pattern); - - QRegExp rx(pattern); - QTEST(rx.isValid(), "validity"); - QCOMPARE(rx.matchedLength(), -1); - QCOMPARE(rx.pos(), -1); - QCOMPARE(rx.cap(), QString("")); - - QRegExp rx2(rx); - QTEST(rx2.isValid(), "validity"); - QCOMPARE(rx2.matchedLength(), -1); - QCOMPARE(rx2.pos(), -1); - QCOMPARE(rx2.cap(), QString("")); -} - -void tst_QRegExp::escapeSequences() -{ - QString perlSyntaxSpecialChars("0123456789afnrtvbBdDwWsSx\\|[]{}()^$?+*"); - QString w3cXmlSchema11SyntaxSpecialChars("cCiIpP"); // as well as the perl ones - QString pattern = QLatin1String("\\?"); - for (int i = ' '; i <= 127; ++i) { - QLatin1Char c(i); - if (perlSyntaxSpecialChars.indexOf(c) == -1) { - pattern[1] = c; - QRegExp rx(pattern, Qt::CaseSensitive, QRegExp::RegExp); - // we'll never have c == 'a' since it's a special character - const QString s = QLatin1String("aaa") + c + QLatin1String("aaa"); - QCOMPARE(rx.indexIn(s), 3); - - rx.setPatternSyntax(QRegExp::RegExp2); - QCOMPARE(rx.indexIn(s), 3); - - if (w3cXmlSchema11SyntaxSpecialChars.indexOf(c) == -1) { - rx.setPatternSyntax(QRegExp::W3CXmlSchema11); - QCOMPARE(rx.indexIn(s), 3); - } - } - } -} - -void tst_QRegExp::splitString_data() -{ - QTest::addColumn("string"); - QTest::addColumn("pattern"); - QTest::addColumn("result"); - - QTest::newRow("data01") << "Some text\n\twith strange whitespace." - << "\\s+" - << (QStringList() << "Some" << "text" << "with" << "strange" << "whitespace." ); - - QTest::newRow("data02") << "This time, a normal English sentence." - << "\\W+" - << (QStringList() << "This" << "time" << "a" << "normal" << "English" << "sentence" << ""); - - QTest::newRow("data03") << "Now: this sentence fragment." - << "\\b" - << (QStringList() << "" << "Now" << ": " << "this" << " " << "sentence" << " " << "fragment" << "."); -} - -void tst_QRegExp::splitString() -{ - QFETCH(QString, string); - QFETCH(QString, pattern); - QFETCH(QStringList, result); - QStringList list = QRegExp(pattern).splitString(string); - QVERIFY(list == result); - - QVERIFY(list == result); - - result.removeAll(QString()); - - list = QRegExp(pattern).splitString(string, Qt::SkipEmptyParts); - QVERIFY(list == result); -} - -void tst_QRegExp::countIn() -{ - QString a; - a="ABCDEFGHIEfGEFG"; // 15 chars - QCOMPARE(QRegExp("[FG][HI]").countIn(a),1); - QCOMPARE(QRegExp("[G][HE]").countIn(a),2); -} - - -void tst_QRegExp::containedIn() -{ - QString a; - a="ABCDEFGHIEfGEFG"; // 15 chars - QVERIFY(QRegExp("[FG][HI]").containedIn(a)); - QVERIFY(QRegExp("[G][HE]").containedIn(a)); -} - -void tst_QRegExp::replaceIn_data() -{ - QTest::addColumn("string" ); - QTest::addColumn("regexp" ); - QTest::addColumn("after" ); - QTest::addColumn("result" ); - - QTest::newRow( "rem00" ) << QString("alpha") << QString("a+") << QString("") << QString("lph"); - QTest::newRow( "rem01" ) << QString("banana") << QString("^.a") << QString("") << QString("nana"); - QTest::newRow( "rem02" ) << QString("") << QString("^.a") << QString("") << QString(""); - QTest::newRow( "rem03" ) << QString("") << QString("^.a") << QString() << QString(""); - QTest::newRow( "rem04" ) << QString() << QString("^.a") << QString("") << QString(); - QTest::newRow( "rem05" ) << QString() << QString("^.a") << QString() << QString(); - - QTest::newRow( "rep00" ) << QString("A bon mot.") << QString("([^<]*)") << QString("\\emph{\\1}") << QString("A \\emph{bon mot}."); - QTest::newRow( "rep01" ) << QString("banana") << QString("^.a()") << QString("\\1") << QString("nana"); - QTest::newRow( "rep02" ) << QString("banana") << QString("(ba)") << QString("\\1X\\1") << QString("baXbanana"); - QTest::newRow( "rep03" ) << QString("banana") << QString("(ba)(na)na") << QString("\\2X\\1") << QString("naXba"); - - QTest::newRow("backref00") << QString("\\1\\2\\3\\4\\5\\6\\7\\8\\9\\A\\10\\11") << QString("\\\\[34]") - << QString("X") << QString("\\1\\2XX\\5\\6\\7\\8\\9\\A\\10\\11"); - QTest::newRow("backref01") << QString("foo") << QString("[fo]") << QString("\\1") << QString("\\1\\1\\1"); - QTest::newRow("backref02") << QString("foo") << QString("([fo])") << QString("(\\1)") << QString("(f)(o)(o)"); - QTest::newRow("backref03") << QString("foo") << QString("([fo])") << QString("\\2") << QString("\\2\\2\\2"); - QTest::newRow("backref04") << QString("foo") << QString("([fo])") << QString("\\10") << QString("f0o0o0"); - QTest::newRow("backref05") << QString("foo") << QString("([fo])") << QString("\\11") << QString("f1o1o1"); - QTest::newRow("backref06") << QString("foo") << QString("([fo])") << QString("\\19") << QString("f9o9o9"); - QTest::newRow("backref07") << QString("foo") << QString("(f)(o+)") - << QString("\\2\\1\\10\\20\\11\\22\\19\\29\\3") - << QString("ooff0oo0f1oo2f9oo9\\3"); - QTest::newRow("backref08") << QString("abc") << QString("(((((((((((((([abc]))))))))))))))") - << QString("{\\14}") << QString("{a}{b}{c}"); - QTest::newRow("backref09") << QString("abcdefghijklmn") - << QString("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)(n)") - << QString("\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10" - "\\9\\90\\8\\80\\7\\70\\6\\60\\5\\50\\4\\40\\3\\30\\2\\20\\1") - << QString("a9a8a7a6a5nmlkjii0hh0gg0ff0ee0dd0cc0bb0a"); - QTest::newRow("backref10") << QString("abc") << QString("((((((((((((((abc))))))))))))))") - << QString("\\0\\01\\011") << QString("\\0\\01\\011"); - QTest::newRow("invalid") << QString("") << QString("invalid regex\\") << QString("") << QString(""); -} - -void tst_QRegExp::replaceIn() -{ - QFETCH( QString, string ); - QFETCH( QString, regexp ); - QFETCH( QString, after ); - - QString s2 = string; - s2 = QRegExp(regexp).replaceIn(s2, after); - QTEST( s2, "result" ); - s2 = string; -} - -void tst_QRegExp::removeIn_data() -{ - replaceIn_data(); -} - -void tst_QRegExp::removeIn() -{ - QFETCH( QString, string ); - QFETCH( QString, regexp ); - QFETCH( QString, after ); - - if ( after.length() == 0 ) { - QString s2 = string; - s2 = QRegExp(regexp).removeIn(s2); - QTEST( s2, "result" ); - } else { - QCOMPARE( 0, 0 ); // shut Qt Test - } -} - -void tst_QRegExp::filterList() -{ - QStringList list3, list4; - list3 << "Bill Gates" << "Joe Blow" << "Bill Clinton"; - list3 = QRegExp("[i]ll") .filterList(list3); - list4 << "Bill Gates" << "Bill Clinton"; - QCOMPARE( list3, list4 ); -} - -void tst_QRegExp::replaceInList() -{ - QStringList list3, list4; - list3 << "alpha" << "beta" << "gamma" << "epsilon"; - list3 = QRegExp("^a").replaceIn(list3, "o"); - list4 << "olpha" << "beta" << "gamma" << "epsilon"; - QCOMPARE( list3, list4 ); - - QStringList list5, list6; - list5 << "Bill Clinton" << "Gates, Bill"; - list6 << "Bill Clinton" << "Bill Gates"; - list5 = QRegExp("^(.*), (.*)$").replaceIn(list5, "\\2 \\1"); - QCOMPARE( list5, list6 ); -} - -static QRegExp QRegExpData(int index) -{ - switch (index) { - case 0: return QRegExp(); - case 1: return QRegExp(""); - case 2: return QRegExp("A", Qt::CaseInsensitive); - case 3: return QRegExp("ABCDE FGHI", Qt::CaseSensitive, QRegExp::Wildcard); - case 4: return QRegExp("This is a long string", Qt::CaseInsensitive, QRegExp::FixedString); - case 5: return QRegExp("And again a string with a \nCRLF", Qt::CaseInsensitive, QRegExp::RegExp); - case 6: - { - QRegExp rx("abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRESTUVWXYZ 1234567890 ~`!@#$%^&*()_-+={[}]|\\:;\"'<,>.?/"); - rx.setMinimal(true); - return rx; - } - } - return QRegExp("foo"); -} -#define MAX_QREGEXP_DATA 7 - -void tst_QRegExp::datastream_data() -{ - QTest::addColumn("device"); - QTest::addColumn("byteOrder"); - - const char * const devices[] = { - "file", - "bytearray", - "buffer", - 0 - }; - for (int d=0; devices[d] != 0; d++) { - QString device = devices[d]; - for (int b=0; b<2; b++) { - QString byte_order = b == 0 ? "BigEndian" : "LittleEndian"; - - QString tag = device + QLatin1Char('_') + byte_order; - for (int e = 0; e < MAX_QREGEXP_DATA; e++) { - QTest::newRow(qPrintable(tag + QLatin1Char('_') + QString::number(e))) << device << byte_order; - } - } - } -} - -static int dataIndex(const QString &tag) -{ - int pos = tag.lastIndexOf(QLatin1Char('_')); - if (pos >= 0) { - int ret = 0; - QString count = tag.mid(pos + 1); - bool ok; - ret = count.toInt(&ok); - if (ok) - return ret; - } - return -1; -} - -void tst_QRegExp::datastream() -{ - QFETCH(QString, device); \ - - qRegisterMetaTypeStreamOperators("QRegExp"); - - if (device == "bytearray") { \ - QByteArray ba; \ - QDataStream sout(&ba, QIODevice::WriteOnly); \ - writeQRegExp(&sout); \ - QDataStream sin(&ba, QIODevice::ReadOnly); \ - readQRegExp(&sin); \ - } else if (device == "file") { \ - QString fileName = "qdatastream.out"; \ - QFile fOut(fileName); \ - QVERIFY(fOut.open(QIODevice::WriteOnly)); \ - QDataStream sout(&fOut); \ - writeQRegExp(&sout); \ - fOut.close(); \ - QFile fIn(fileName); \ - QVERIFY(fIn.open(QIODevice::ReadOnly)); \ - QDataStream sin(&fIn); \ - readQRegExp(&sin); \ - fIn.close(); \ - } else if (device == "buffer") { \ - QByteArray ba(10000, '\0'); \ - QBuffer bOut(&ba); \ - bOut.open(QIODevice::WriteOnly); \ - QDataStream sout(&bOut); \ - writeQRegExp(&sout); \ - bOut.close(); \ - QBuffer bIn(&ba); \ - bIn.open(QIODevice::ReadOnly); \ - QDataStream sin(&bIn); \ - readQRegExp(&sin); \ - bIn.close(); \ - } -} - -static void saveQVariantFromDataStream(const QString &fileName, QDataStream::Version version) -{ - - QFile file(fileName); - QVERIFY(file.open(QIODevice::ReadOnly)); - QDataStream dataFileStream(&file); - - QString typeName; - dataFileStream >> typeName; - QByteArray data = file.readAll(); - const int id = QMetaType::type(typeName.toLatin1()); - - QBuffer buffer; - buffer.open(QIODevice::ReadWrite); - QDataStream stream(&buffer); - stream.setVersion(version); - - QVariant constructedVariant(static_cast(id)); - QCOMPARE(constructedVariant.userType(), id); - stream << constructedVariant; - - // We are testing QVariant there is no point in testing full array. - QCOMPARE(buffer.data().left(5), data.left(5)); - - buffer.seek(0); - QVariant recunstructedVariant; - stream >> recunstructedVariant; - QCOMPARE(recunstructedVariant.userType(), constructedVariant.userType()); -} - -void tst_QRegExp::datastream2() -{ - saveQVariantFromDataStream(QLatin1String(":/data/qdatastream_4.9.bin"), QDataStream::Qt_4_9); - saveQVariantFromDataStream(QLatin1String(":/data/qdatastream_5.0.bin"), QDataStream::Qt_5_0); -} - -void tst_QRegExp::writeQRegExp(QDataStream* s) -{ - QRegExp test(QRegExpData(dataIndex(QTest::currentDataTag()))); - *s << test; - *s << QString("Her er det noe tekst"); - *s << test; - *s << QString("nonempty"); - *s << test; - *s << QVariant(test); -} - -void tst_QRegExp::readQRegExp(QDataStream *s) -{ - QRegExp R; - QString S; - QVariant V; - QRegExp test(QRegExpData(dataIndex(QTest::currentDataTag()))); - - *s >> R; - QCOMPARE(R, test); - *s >> S; - QCOMPARE(S, QString("Her er det noe tekst")); - *s >> R; - QCOMPARE(R, test); - *s >> S; - QCOMPARE(S, QString("nonempty")); - *s >> R; - QCOMPARE(R, test); - *s >> V; - QCOMPARE(V.userType(), qMetaTypeId()); - QCOMPARE(qvariant_cast(V), test); -} - - -QTEST_APPLESS_MAIN(tst_QRegExp) -#include "tst_qregexp.moc" diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp index 376a4cb5dc..e0d5180b78 100644 --- a/tests/auto/corelib/text/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp @@ -1631,7 +1631,7 @@ void tst_QString::lastIndexOf() QCOMPARE(haystack.lastIndexOf(needle.toLatin1().data(), from, cs), expected); if (from >= -1 && from < haystack.size()) { - // unfortunately, QString and QRegExp don't have the same out of bound semantics + // unfortunately, QString and QRegularExpression don't have the same out of bound semantics // I think QString is wrong -- See file log for contact information. { QRegularExpression::PatternOptions options = QRegularExpression::NoPatternOption; diff --git a/tests/auto/corelib/text/text.pro b/tests/auto/corelib/text/text.pro index 272bcb1013..0e89ea0850 100644 --- a/tests/auto/corelib/text/text.pro +++ b/tests/auto/corelib/text/text.pro @@ -11,7 +11,6 @@ SUBDIRS = \ qcollator \ qlatin1string \ qlocale \ - qregexp \ qregularexpression \ qstring \ qstring_no_cast_from_bytearray \ diff --git a/tests/benchmarks/corelib/text/qregexp/CMakeLists.txt b/tests/benchmarks/corelib/text/qregexp/CMakeLists.txt deleted file mode 100644 index 1646f4778a..0000000000 --- a/tests/benchmarks/corelib/text/qregexp/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -# Generated from qregexp.pro. - -##################################################################### -## tst_bench_qregexp Binary: -##################################################################### - -qt_add_benchmark(tst_bench_qregexp - EXCEPTIONS - SOURCES - main.cpp - PUBLIC_LIBRARIES - Qt::Test -) - -# Resources: -set(qregexp_resource_files - "main.cpp" -) - -qt_add_resource(tst_bench_qregexp "qregexp" - PREFIX - "/" - FILES - ${qregexp_resource_files} -) - - -#### Keys ignored in scope 1:.:.:qregexp.pro:: -# TEMPLATE = "app" - -## Scopes: -##################################################################### - -qt_extend_target(tst_bench_qregexp CONDITION TARGET Qt::Script AND NOT pcre - DEFINES - HAVE_JSC - PUBLIC_LIBRARIES - Qt::Script -) - -qt_extend_target(tst_bench_qregexp CONDITION (NOT QNX) AND (EXISTS _ss_[QT_SYSROOT]/usr/include/boost/regex.hpp) - DEFINES - HAVE_BOOST - PUBLIC_LIBRARIES - boost_regex -) diff --git a/tests/benchmarks/corelib/text/qregexp/main.cpp b/tests/benchmarks/corelib/text/qregexp/main.cpp deleted file mode 100644 index 798b23f2b0..0000000000 --- a/tests/benchmarks/corelib/text/qregexp/main.cpp +++ /dev/null @@ -1,615 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the test suite of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:GPL-EXCEPT$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 as published by the Free Software -** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include -#include -#include -#include - -#include -#ifdef HAVE_BOOST -#include -#endif - -#ifdef HAVE_JSC -#include -#include "pcre/pcre.h" -#endif -#define ZLIB_VERSION "1.2.3.4" - -class tst_qregexp : public QObject -{ - Q_OBJECT -public: - tst_qregexp(); -private slots: - void escape_old(); - void escape_old_data() { escape_data(); } - void escape_new1(); - void escape_new1_data() { escape_data(); } - void escape_new2(); - void escape_new2_data() { escape_data(); } - void escape_new3(); - void escape_new3_data() { escape_data(); } - void escape_new4(); - void escape_new4_data() { escape_data(); } -/* - JSC outperforms everything. - Boost is less impressive then expected. - */ - void simpleFind1(); - void rangeReplace1(); - void matchReplace1(); - - void simpleFind2(); - void rangeReplace2(); - void matchReplace2(); - - void simpleFindJSC(); - void rangeReplaceJSC(); - void matchReplaceJSC(); - - void simpleFindBoost(); - void rangeReplaceBoost(); - void matchReplaceBoost(); - -/* those apply an (incorrect) regexp on entire source - (this main.cpp). JSC appears to handle this - (ab)use case best. QRegExp performs extremly bad. - */ - void horribleWrongReplace1(); - void horribleReplace1(); - void horribleReplace2(); - void horribleWrongReplace2(); - void horribleWrongReplaceJSC(); - void horribleReplaceJSC(); - void horribleWrongReplaceBoost(); - void horribleReplaceBoost(); -private: - QString str1; - QString str2; - void escape_data(); -}; - -tst_qregexp::tst_qregexp() - :QObject() - ,str1("We are all happy monkeys") -{ - QFile f(":/main.cpp"); - f.open(QFile::ReadOnly); - str2=f.readAll(); -} - -static void verify(const QString "ed, const QString &expected) -{ - if (quoted != expected) - qDebug() << "ERROR:" << quoted << expected; -} - -void tst_qregexp::escape_data() -{ - QTest::addColumn("pattern"); - QTest::addColumn("expected"); - - QTest::newRow("escape 0") << "Hello world" << "Hello world"; - QTest::newRow("escape 1") << "(Hello world)" << "\\(Hello world\\)"; - { - QString s; - for (int i = 0; i < 10; ++i) - s += "(escape)"; - QTest::newRow("escape 10") << s << QRegExp::escape(s); - } - { - QString s; - for (int i = 0; i < 100; ++i) - s += "(escape)"; - QTest::newRow("escape 100") << s << QRegExp::escape(s); - } -} - -void tst_qregexp::escape_old() -{ - QFETCH(QString, pattern); - QFETCH(QString, expected); - - QBENCHMARK { - static const char meta[] = "$()*+.?[\\]^{|}"; - QString quoted = pattern; - int i = 0; - - while (i < quoted.length()) { - if (strchr(meta, quoted.at(i).toLatin1()) != 0) - quoted.insert(i++, QLatin1Char('\\')); - ++i; - } - - verify(quoted, expected); - } -} - -void tst_qregexp::escape_new1() -{ - QFETCH(QString, pattern); - QFETCH(QString, expected); - - QBENCHMARK { - QString quoted; - const int count = pattern.count(); - quoted.reserve(count * 2); - const QLatin1Char backslash('\\'); - for (int i = 0; i < count; i++) { - switch (pattern.at(i).toLatin1()) { - case '$': - case '(': - case ')': - case '*': - case '+': - case '.': - case '?': - case '[': - case '\\': - case ']': - case '^': - case '{': - case '|': - case '}': - quoted.append(backslash); - } - quoted.append(pattern.at(i)); - } - verify(quoted, expected); - } -} - -void tst_qregexp::escape_new2() -{ - QFETCH(QString, pattern); - QFETCH(QString, expected); - - QBENCHMARK { - int count = pattern.count(); - const QLatin1Char backslash('\\'); - QString quoted(count * 2, backslash); - const QChar *patternData = pattern.data(); - QChar *quotedData = quoted.data(); - int escaped = 0; - for ( ; --count >= 0; ++patternData) { - const QChar c = *patternData; - switch (c.unicode()) { - case '$': - case '(': - case ')': - case '*': - case '+': - case '.': - case '?': - case '[': - case '\\': - case ']': - case '^': - case '{': - case '|': - case '}': - ++escaped; - ++quotedData; - } - *quotedData = c; - ++quotedData; - } - quoted.resize(pattern.size() + escaped); - - verify(quoted, expected); - } -} - -void tst_qregexp::escape_new3() -{ - QFETCH(QString, pattern); - QFETCH(QString, expected); - - QBENCHMARK { - QString quoted; - const int count = pattern.count(); - quoted.reserve(count * 2); - const QLatin1Char backslash('\\'); - for (int i = 0; i < count; i++) { - switch (pattern.at(i).toLatin1()) { - case '$': - case '(': - case ')': - case '*': - case '+': - case '.': - case '?': - case '[': - case '\\': - case ']': - case '^': - case '{': - case '|': - case '}': - quoted += backslash; - } - quoted += pattern.at(i); - } - - verify(quoted, expected); - } -} - - -static inline bool needsEscaping(int c) -{ - switch (c) { - case '$': - case '(': - case ')': - case '*': - case '+': - case '.': - case '?': - case '[': - case '\\': - case ']': - case '^': - case '{': - case '|': - case '}': - return true; - } - return false; -} - -void tst_qregexp::escape_new4() -{ - QFETCH(QString, pattern); - QFETCH(QString, expected); - - QBENCHMARK { - const int n = pattern.size(); - const QChar *patternData = pattern.data(); - // try to prevent copy if no escape is needed - int i = 0; - for (int i = 0; i != n; ++i) { - const QChar c = patternData[i]; - if (needsEscaping(c.unicode())) - break; - } - if (i == n) { - verify(pattern, expected); - // no escaping needed, "return pattern" should be done here. - return; - } - const QLatin1Char backslash('\\'); - QString quoted(n * 2, backslash); - QChar *quotedData = quoted.data(); - for (int j = 0; j != i; ++j) - *quotedData++ = *patternData++; - int escaped = 0; - for (; i != n; ++i) { - const QChar c = *patternData; - if (needsEscaping(c.unicode())) { - ++escaped; - ++quotedData; - } - *quotedData = c; - ++quotedData; - ++patternData; - } - quoted.resize(n + escaped); - verify(quoted, expected); - // "return quoted" - } -} - - -void tst_qregexp::simpleFind1() -{ - int roff; - QRegExp rx("happy"); - rx.setPatternSyntax(QRegExp::RegExp); - QBENCHMARK{ - roff = rx.indexIn(str1); - } - QCOMPARE(roff, 11); -} - -void tst_qregexp::rangeReplace1() -{ - QString r; - QRegExp rx("[a-f]"); - rx.setPatternSyntax(QRegExp::RegExp); - QBENCHMARK{ - r = QString(str1).replace(rx, "-"); - } - QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys")); -} - -void tst_qregexp::matchReplace1() -{ - QString r; - QRegExp rx("[^a-f]*([a-f]+)[^a-f]*"); - rx.setPatternSyntax(QRegExp::RegExp); - QBENCHMARK{ - r = QString(str1).replace(rx, "\\1"); - } - QCOMPARE(r, QString("eaeaae")); -} - -void tst_qregexp::horribleWrongReplace1() -{ - QString r; - QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*"); - rx.setPatternSyntax(QRegExp::RegExp); - QBENCHMARK{ - r = QString(str2).replace(rx, "\\1.\\2.\\3"); - } - QCOMPARE(r, str2); -} - -void tst_qregexp::horribleReplace1() -{ - QString r; - QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*"); - rx.setPatternSyntax(QRegExp::RegExp); - QBENCHMARK{ - r = QString(str2).replace(rx, "\\1.\\2.\\3"); - } - QCOMPARE(r, QString("1.2.3")); -} - - -void tst_qregexp::simpleFind2() -{ - int roff; - QRegExp rx("happy"); - rx.setPatternSyntax(QRegExp::RegExp2); - QBENCHMARK{ - roff = rx.indexIn(str1); - } - QCOMPARE(roff, 11); -} - -void tst_qregexp::rangeReplace2() -{ - QString r; - QRegExp rx("[a-f]"); - rx.setPatternSyntax(QRegExp::RegExp2); - QBENCHMARK{ - r = QString(str1).replace(rx, "-"); - } - QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys")); -} - -void tst_qregexp::matchReplace2() -{ - QString r; - QRegExp rx("[^a-f]*([a-f]+)[^a-f]*"); - rx.setPatternSyntax(QRegExp::RegExp2); - QBENCHMARK{ - r = QString(str1).replace(rx, "\\1"); - } - QCOMPARE(r, QString("eaeaae")); -} - -void tst_qregexp::horribleWrongReplace2() -{ - QString r; - QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*"); - rx.setPatternSyntax(QRegExp::RegExp2); - QBENCHMARK{ - r = QString(str2).replace(rx, "\\1.\\2.\\3"); - } - QCOMPARE(r, str2); -} - -void tst_qregexp::horribleReplace2() -{ - QString r; - QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*"); - rx.setPatternSyntax(QRegExp::RegExp2); - QBENCHMARK{ - r = QString(str2).replace(rx, "\\1.\\2.\\3"); - } - QCOMPARE(r, QString("1.2.3")); -} -void tst_qregexp::simpleFindJSC() -{ -#ifdef HAVE_JSC - int numr; - const char * errmsg=" "; - QString rxs("happy"); - JSRegExp *rx = jsRegExpCompile(rxs.utf16(), rxs.length(), JSRegExpDoNotIgnoreCase, JSRegExpSingleLine, 0, &errmsg); - QVERIFY(rx != 0); - QString s(str1); - int offsetVector[3]; - QBENCHMARK{ - numr = jsRegExpExecute(rx, s.utf16(), s.length(), 0, offsetVector, 3); - } - jsRegExpFree(rx); - QCOMPARE(numr, 1); - QCOMPARE(offsetVector[0], 11); -#else - QSKIP("JSC is not enabled for this platform"); -#endif -} - -void tst_qregexp::rangeReplaceJSC() -{ -#ifdef HAVE_JSC - QScriptValue r; - QScriptEngine engine; - engine.globalObject().setProperty("s", str1); - QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[a-f]/g, '-') } )"); - QVERIFY(replaceFunc.isFunction()); - QBENCHMARK{ - r = replaceFunc.call(QScriptValue()); - } - QCOMPARE(r.toString(), QString("W- -r- -ll h-ppy monk-ys")); -#else - QSKIP("JSC is not enabled for this platform"); -#endif -} - -void tst_qregexp::matchReplaceJSC() -{ -#ifdef HAVE_JSC - QScriptValue r; - QScriptEngine engine; - engine.globalObject().setProperty("s", str1); - QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[^a-f]*([a-f]+)[^a-f]*/g, '$1') } )"); - QVERIFY(replaceFunc.isFunction()); - QBENCHMARK{ - r = replaceFunc.call(QScriptValue()); - } - QCOMPARE(r.toString(), QString("eaeaae")); -#else - QSKIP("JSC is not enabled for this platform"); -#endif -} - -void tst_qregexp::horribleWrongReplaceJSC() -{ -#ifdef HAVE_JSC - QScriptValue r; - QScriptEngine engine; - engine.globalObject().setProperty("s", str2); - QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*/gm, '$1.$2.$3') } )"); - QVERIFY(replaceFunc.isFunction()); - QBENCHMARK{ - r = replaceFunc.call(QScriptValue()); - } - QCOMPARE(r.toString(), str2); -#else - QSKIP("JSC is not enabled for this platform"); -#endif -} - -void tst_qregexp::horribleReplaceJSC() -{ -#ifdef HAVE_JSC - QScriptValue r; - QScriptEngine engine; - // the m flag doesn't actually work here; dunno - engine.globalObject().setProperty("s", str2.replace('\n', ' ')); - QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*/gm, '$1.$2.$3') } )"); - QVERIFY(replaceFunc.isFunction()); - QBENCHMARK{ - r = replaceFunc.call(QScriptValue()); - } - QCOMPARE(r.toString(), QString("1.2.3")); -#else - QSKIP("JSC is not enabled for this platform"); -#endif -} - -void tst_qregexp::simpleFindBoost() -{ -#ifdef HAVE_BOOST - int roff; - boost::regex rx ("happy", boost::regex_constants::perl); - std::string s = str1.toStdString(); - std::string::const_iterator start, end; - start = s.begin(); - end = s.end(); - boost::match_flag_type flags = boost::match_default; - QBENCHMARK{ - boost::match_results what; - regex_search(start, end, what, rx, flags); - roff = (what[0].first)-start; - } - QCOMPARE(roff, 11); -#else - QSKIP("Boost is not enabled for this platform"); -#endif - -} - -void tst_qregexp::rangeReplaceBoost() -{ -#ifdef HAVE_BOOST - boost::regex pattern ("[a-f]", boost::regex_constants::perl); - std::string s = str1.toStdString(); - std::string r; - QBENCHMARK{ - r = boost::regex_replace (s, pattern, "-"); - } - QCOMPARE(r, std::string("W- -r- -ll h-ppy monk-ys")); -#else - QSKIP("Boost is not enabled for this platform"); -#endif -} - -void tst_qregexp::matchReplaceBoost() -{ -#ifdef HAVE_BOOST - boost::regex pattern ("[^a-f]*([a-f]+)[^a-f]*",boost::regex_constants::perl); - std::string s = str1.toStdString(); - std::string r; - QBENCHMARK{ - r = boost::regex_replace (s, pattern, "$1"); - } - QCOMPARE(r, std::string("eaeaae")); -#else - QSKIP("Boost is not enabled for this platform"); -#endif -} - -void tst_qregexp::horribleWrongReplaceBoost() -{ -#ifdef HAVE_BOOST - boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*", boost::regex_constants::perl); - std::string s = str2.toStdString(); - std::string r; - QBENCHMARK{ - r = boost::regex_replace (s, pattern, "$1.$2.$3"); - } - QCOMPARE(r, s); -#else - QSKIP("Boost is not enabled for this platform"); -#endif -} - -void tst_qregexp::horribleReplaceBoost() -{ -#ifdef HAVE_BOOST - boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*", boost::regex_constants::perl); - std::string s = str2.toStdString(); - std::string r; - QBENCHMARK{ - r = boost::regex_replace (s, pattern, "$1.$2.$3"); - } - QCOMPARE(r, std::string("1.2.3")); -#else - QSKIP("Boost is not enabled for this platform"); -#endif -} - -QTEST_MAIN(tst_qregexp) - -#include "main.moc" diff --git a/tests/benchmarks/corelib/text/qregexp/qregexp.pro b/tests/benchmarks/corelib/text/qregexp/qregexp.pro deleted file mode 100644 index c04c13060b..0000000000 --- a/tests/benchmarks/corelib/text/qregexp/qregexp.pro +++ /dev/null @@ -1,20 +0,0 @@ -TEMPLATE = app -CONFIG += benchmark -CONFIG += exceptions -QT = core testlib - -TARGET = tst_bench_qregexp -SOURCES += main.cpp -RESOURCES += qregexp.qrc - -qtHaveModule(script):!pcre { - DEFINES += HAVE_JSC - QT += script -} - -!qnx { - exists($$[QT_SYSROOT]/usr/include/boost/regex.hpp) { - DEFINES += HAVE_BOOST - LIBS += -lboost_regex - } -} diff --git a/tests/benchmarks/corelib/text/qregexp/qregexp.qrc b/tests/benchmarks/corelib/text/qregexp/qregexp.qrc deleted file mode 100644 index a7fe13c035..0000000000 --- a/tests/benchmarks/corelib/text/qregexp/qregexp.qrc +++ /dev/null @@ -1,6 +0,0 @@ - - - main.cpp - - -