diff --git a/src/corelib/serialization/qxmlstream.cpp b/src/corelib/serialization/qxmlstream.cpp index 802909df00..95cd8638f4 100644 --- a/src/corelib/serialization/qxmlstream.cpp +++ b/src/corelib/serialization/qxmlstream.cpp @@ -185,7 +185,7 @@ WRAP(indexOf, QLatin1StringView) addData() or by waiting for it to arrive on the device(). \value UnexpectedElementError The parser encountered an element - that was different to those it expected. + or token that was different to those it expected. */ @@ -322,13 +322,34 @@ QXmlStreamEntityResolver *QXmlStreamReader::entityResolver() const QXmlStreamReader is a well-formed XML 1.0 parser that does \e not include external parsed entities. As long as no error occurs, the - application code can thus be assured that the data provided by the - stream reader satisfies the W3C's criteria for well-formed XML. For - example, you can be certain that all tags are indeed nested and - closed properly, that references to internal entities have been - replaced with the correct replacement text, and that attributes have - been normalized or added according to the internal subset of the - DTD. + application code can thus be assured, that + \list + \li the data provided by the stream reader satisfies the W3C's + criteria for well-formed XML, + \li tokens are provided in a valid order. + \endlist + + Unless QXmlStreamReader raises an error, it guarantees the following: + \list + \li All tags are nested and closed properly. + \li References to internal entities have been replaced with the + correct replacement text. + \li Attributes have been normalized or added according to the + internal subset of the \l DTD. + \li Tokens of type \l StartDocument happen before all others, + aside from comments and processing instructions. + \li At most one DOCTYPE element (a token of type \l DTD) is present. + \li If present, the DOCTYPE appears before all other elements, + aside from StartDocument, comments and processing instructions. + \endlist + + In particular, once any token of type \l StartElement, \l EndElement, + \l Characters, \l EntityReference or \l EndDocument is seen, no + tokens of type StartDocument or DTD will be seen. If one is present in + the input stream, out of order, an error is raised. + + \note The token types \l Comment and \l ProcessingInstruction may appear + anywhere in the stream. If an error occurs while parsing, atEnd() and hasError() return true, and error() returns the error that occurred. The functions @@ -659,6 +680,7 @@ QXmlStreamReader::TokenType QXmlStreamReader::readNext() d->token = -1; return readNext(); } + d->checkToken(); return d->type; } @@ -743,6 +765,11 @@ static constexpr auto QXmlStreamReader_tokenTypeString = qOffsetStringArray( "ProcessingInstruction" ); +static constexpr auto QXmlStreamReader_XmlContextString = qOffsetStringArray( + "Prolog", + "Body" +); + /*! \property QXmlStreamReader::namespaceProcessing \brief the namespace-processing flag of the stream reader. @@ -777,6 +804,15 @@ QString QXmlStreamReader::tokenString() const return QLatin1StringView(QXmlStreamReader_tokenTypeString.at(d->type)); } +/*! + \internal + \return \param loc (Prolog/Body) as a string. + */ +static constexpr QLatin1StringView contextString(QXmlStreamReaderPrivate::XmlContext ctxt) +{ + return QLatin1StringView(QXmlStreamReader_XmlContextString.at(static_cast(ctxt))); +} + #endif // feature xmlstreamreader QXmlStreamPrivateTagStack::QXmlStreamPrivateTagStack() @@ -864,6 +900,8 @@ void QXmlStreamReaderPrivate::init() type = QXmlStreamReader::NoToken; error = QXmlStreamReader::NoError; + currentContext = XmlContext::Prolog; + foundDTD = false; } /* @@ -3814,6 +3852,97 @@ void QXmlStreamWriter::writeCurrentToken(const QXmlStreamReader &reader) } } +static constexpr bool isTokenAllowedInContext(QXmlStreamReader::TokenType type, + QXmlStreamReaderPrivate::XmlContext loc) +{ + switch (type) { + case QXmlStreamReader::StartDocument: + case QXmlStreamReader::DTD: + return loc == QXmlStreamReaderPrivate::XmlContext::Prolog; + + case QXmlStreamReader::StartElement: + case QXmlStreamReader::EndElement: + case QXmlStreamReader::Characters: + case QXmlStreamReader::EntityReference: + case QXmlStreamReader::EndDocument: + return loc == QXmlStreamReaderPrivate::XmlContext::Body; + + case QXmlStreamReader::Comment: + case QXmlStreamReader::ProcessingInstruction: + return true; + + case QXmlStreamReader::NoToken: + case QXmlStreamReader::Invalid: + return false; + } + + // GCC 8.x does not treat __builtin_unreachable() as constexpr +#if !defined(Q_CC_GNU_ONLY) || (Q_CC_GNU >= 900) + Q_UNREACHABLE_RETURN(false); +#else + return false; +#endif +} + +/*! + \internal + \brief QXmlStreamReader::isValidToken + \return \c true if \param type is a valid token type. + \return \c false if \param type is an unexpected token, + which indicates a non-well-formed or invalid XML stream. + */ +bool QXmlStreamReaderPrivate::isValidToken(QXmlStreamReader::TokenType type) +{ + // Don't change currentContext, if Invalid or NoToken occur in the prolog + if (type == QXmlStreamReader::Invalid || type == QXmlStreamReader::NoToken) + return false; + + // If a token type gets rejected in the body, there is no recovery + const bool result = isTokenAllowedInContext(type, currentContext); + if (result || currentContext == XmlContext::Body) + return result; + + // First non-Prolog token observed => switch context to body and check again. + currentContext = XmlContext::Body; + return isTokenAllowedInContext(type, currentContext); +} + +/*! + \internal + Checks token type and raises an error, if it is invalid + in the current context (prolog/body). + */ +void QXmlStreamReaderPrivate::checkToken() +{ + Q_Q(QXmlStreamReader); + + // The token type must be consumed, to keep track if the body has been reached. + const XmlContext context = currentContext; + const bool ok = isValidToken(type); + + // Do nothing if an error has been raised already (going along with an unexpected token) + if (error != QXmlStreamReader::Error::NoError) + return; + + if (!ok) { + raiseError(QXmlStreamReader::UnexpectedElementError, + QObject::tr("Unexpected token type %1 in %2.") + .arg(q->tokenString(), contextString(context))); + return; + } + + if (type != QXmlStreamReader::DTD) + return; + + // Raise error on multiple DTD tokens + if (foundDTD) { + raiseError(QXmlStreamReader::UnexpectedElementError, + QObject::tr("Found second DTD token in %1.").arg(contextString(context))); + } else { + foundDTD = true; + } +} + /*! \fn bool QXmlStreamAttributes::hasAttribute(QAnyStringView qualifiedName) const diff --git a/src/corelib/serialization/qxmlstream_p.h b/src/corelib/serialization/qxmlstream_p.h index cb3d1975b1..a29ee656e9 100644 --- a/src/corelib/serialization/qxmlstream_p.h +++ b/src/corelib/serialization/qxmlstream_p.h @@ -297,6 +297,17 @@ public: QStringDecoder decoder; bool atEnd; + enum class XmlContext + { + Prolog, + Body, + }; + + XmlContext currentContext = XmlContext::Prolog; + bool foundDTD = false; + bool isValidToken(QXmlStreamReader::TokenType type); + void checkToken(); + /*! \sa setType() */ diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml new file mode 100644 index 0000000000..1c3ca4e271 --- /dev/null +++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml @@ -0,0 +1,20 @@ + + + + + + + + +]> + + + tst_QXmlStream + + + + + ]> + diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml new file mode 100644 index 0000000000..cd398c0f9f --- /dev/null +++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml @@ -0,0 +1,20 @@ + + + + + + + + +]> + + + +]> + + + tst_QXmlStream + + diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml new file mode 100644 index 0000000000..1b61a3f062 --- /dev/null +++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml @@ -0,0 +1,15 @@ + + + + + + + + +]> + + + tst_QXmlStream + + diff --git a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp index 3a1b4511e8..75edda97e0 100644 --- a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp +++ b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp @@ -591,6 +591,9 @@ private slots: void entityExpansionLimit() const; + void tokenErrorHandling_data() const; + void tokenErrorHandling() const; + private: static QByteArray readFile(const QString &filename); @@ -1867,5 +1870,41 @@ void tst_QXmlStream::test_fastScanName() const QCOMPARE(reader.error(), errorType); } +void tst_QXmlStream::tokenErrorHandling_data() const +{ + QTest::addColumn("fileName"); + QTest::addColumn("expectedError"); + QTest::addColumn("errorKeyWord"); + + constexpr auto invalid = QXmlStreamReader::Error::UnexpectedElementError; + constexpr auto valid = QXmlStreamReader::Error::NoError; + QTest::newRow("DtdInBody") << "dtdInBody.xml" << invalid << "DTD"; + QTest::newRow("multipleDTD") << "multipleDtd.xml" << invalid << "second DTD"; + QTest::newRow("wellFormed") << "wellFormed.xml" << valid << ""; +} + +void tst_QXmlStream::tokenErrorHandling() const +{ + QFETCH(const QString, fileName); + QFETCH(const QXmlStreamReader::Error, expectedError); + QFETCH(const QString, errorKeyWord); + + const QDir dir(QFINDTESTDATA("tokenError")); + QFile file(dir.absoluteFilePath(fileName)); + + // Cross-compiling: File will be on host only + if (!file.exists()) + QSKIP("Testfile not found."); + + file.open(QIODevice::ReadOnly); + QXmlStreamReader reader(&file); + while (!reader.atEnd()) + reader.readNext(); + + QCOMPARE(reader.error(), expectedError); + if (expectedError != QXmlStreamReader::Error::NoError) + QVERIFY(reader.errorString().contains(errorKeyWord)); +} + #include "tst_qxmlstream.moc" // vim: et:ts=4:sw=4:sts=4