QDomDocument: Add a way to enable spacing-only text nodes

Added a parse option that can be passed to setContent(), to specify that
spacing-only text nodes must be preserved.

[ChangeLog][QtXml][QDomDocument] Spacing-only text nodes can now
be preserved by passing the ParseOption::PreserveSpacingOnlyNodes option
to setContent().

Fixes: QTBUG-104130
Fixes: QTBUG-89690
Task-number: QTBUG-90003
Change-Id: Id43730ce5b79a856c4b434d1f1d4dd7c49c25f31
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Sona Kurazyan 2022-08-03 10:26:58 +02:00
parent 7ceba9c472
commit 26a73e1b31
6 changed files with 87 additions and 8 deletions

View File

@ -73,6 +73,15 @@
If you use QDomDocument and rely on any of these, you must update
your code and XML documents accordingly.
\section3 Spacing-only text nodes
By default, text nodes containing only spacing characters are stripped
and won't appear in the QDomDocument. The Qt 5 way of changing this behavior
was using the QDomDocument::setContent() overload that allowed a \c QXmlReader
to be supplied. That overload was removed in Qt 6.0, but since Qt 6.5,
you can pass QDomDocument::ParseOption::PreserveSpacingOnlyNodes as a parse
option, to specify that spacing-only text nodes must be preserved.
\section2 Qt Core5 compatibility library
If your application or library cannot be ported right now, the \l

View File

@ -6221,6 +6221,8 @@ bool QDomDocument::setContent(QXmlStreamReader *reader, bool namespaceProcessing
\value Default No parse options are set.
\value UseNamespaceProcessing Namespace processing is enabled.
\value PreserveSpacingOnlyNodes Text nodes containing only spacing
characters are preserved.
\sa setContent()
*/
@ -6307,7 +6309,9 @@ bool QDomDocument::setContent(QXmlStreamReader *reader, bool namespaceProcessing
string if the element or attribute has no prefix.
Text nodes consisting only of whitespace are stripped and won't
appear in the QDomDocument.
appear in the QDomDocument. Since Qt 6.5, one can pass
QDomDocument::ParseOption::PreserveSpacingOnlyNodes as a parse
option, to specify that spacing-only text nodes must be preserved.
\include qdom.cpp entity-refs

View File

@ -268,6 +268,7 @@ public:
enum class ParseOption {
Default = 0x00,
UseNamespaceProcessing = 0x01,
PreserveSpacingOnlyNodes = 0x02,
};
Q_DECLARE_FLAGS(ParseOptions, ParseOption)

View File

@ -349,13 +349,14 @@ bool QDomParser::parseBody()
}
break;
case QXmlStreamReader::Characters:
if (!reader->isWhitespace()) { // Skip the content consisting of only whitespaces
if (reader->isCDATA() || !reader->text().trimmed().isEmpty()) {
if (!domBuilder.characters(reader->text().toString(), reader->isCDATA())) {
domBuilder.fatalError(QDomParser::tr(
"Error occurred while processing the element content"));
return false;
}
// Skip the content if it contains only spacing characters,
// unless it's CDATA or PreserveSpacingOnlyNodes was specified.
if (reader->isCDATA() || domBuilder.preserveSpacingOnlyNodes()
|| !(reader->isWhitespace() || reader->text().trimmed().isEmpty())) {
if (!domBuilder.characters(reader->text().toString(), reader->isCDATA())) {
domBuilder.fatalError(
QDomParser::tr("Error occurred while processing the element content"));
return false;
}
}
break;

View File

@ -56,6 +56,9 @@ public:
void fatalError(const QString &message);
QDomDocument::ParseResult result() const { return parseResult; }
bool preserveSpacingOnlyNodes() const
{ return parseOptions & QDomDocument::ParseOption::PreserveSpacingOnlyNodes; }
private:
QString dtdInternalSubset(const QString &dtd);

View File

@ -32,6 +32,8 @@ private slots:
void setContent();
void setContentOverloads();
void parseOptions();
void spacingOnlyNodes_data() const;
void spacingOnlyNodes() const;
void parseResult();
void toString_01_data();
void toString_01();
@ -292,6 +294,65 @@ void tst_QDom::parseOptions()
}
}
void tst_QDom::spacingOnlyNodes_data() const
{
QTest::addColumn<QString>("input");
QTest::addColumn<QString>("expected");
QTest::addColumn<QDomDocument::ParseOption>("options");
QTest::newRow("spacing-only-remove")
<< u"<a> \t \n \r</a>"_s
<< u"<a/>"_s
<< QDomDocument::ParseOption::Default;
// \r is translated to \n, see https://www.w3.org/TR/xml11/#sec-line-ends
QTest::newRow("spacing-only-preserve")
<< u"<a> \t \n \r</a>"_s
<< u"<a> \t \n \n</a>"_s
<< QDomDocument::ParseOption::PreserveSpacingOnlyNodes;
QTest::newRow("mixed-text-remove")
<< u"<a> abc \t \n \r</a>"_s
<< u"<a> abc \t \n \n</a>"_s
<< QDomDocument::ParseOption::Default;
QTest::newRow("mixed-text-preserve")
<< u"<a> abc \t \n \r</a>"_s
<< u"<a> abc \t \n \n</a>"_s
<< QDomDocument::ParseOption::PreserveSpacingOnlyNodes;
// QDomDocument treats all chacarcters below as spaces (see QTBUG-105348)
static constexpr char16_t spaces[] = {
QChar::Space, QChar::Tabulation, QChar::LineFeed,
QChar::CarriageReturn, QChar::Nbsp,
0x2002, // EN SPACE
0x2003, // EM SPACE
0x2009 // THIN SPACE
};
for (char16_t space : spaces) {
QTest::addRow("spacing-remove-u%04x", space)
<< u"<a>"_s + space + u"</a>"_s
<< u"<a/>"_s
<< QDomDocument::ParseOption::Default;
// \r is translated to \n, see https://www.w3.org/TR/xml11/#sec-line-ends
char16_t expected = (space == QChar::CarriageReturn) ? char16_t(QChar::LineFeed) : space;
QTest::addRow("spacing-preserve-u%04x", space)
<< u"<a>"_s + space + u"</a>"_s
<< u"<a>"_s + expected + u"</a>"_s
<< QDomDocument::ParseOption::PreserveSpacingOnlyNodes;
}
}
void tst_QDom::spacingOnlyNodes() const
{
QFETCH(QString, input);
QFETCH(QString, expected);
QFETCH(QDomDocument::ParseOption, options);
QDomDocument doc;
QVERIFY(doc.setContent(input, options));
QCOMPARE(doc.toString(-1), expected);
}
void tst_QDom::parseResult()
{
QString input = u"<doc xmlns:b='http://example.com/'>"