Always write XML documents as UTF-8
Remove support for setting a codec different from UTF-8 for writing XML files. All XML readers today can handle UTF-8, and there is no reason anymore to write a file in a different encoding. Change-Id: If89fb2d2474a2b55644d9bed7473c11ad91033eb Reviewed-by: Simon Hausmann <hausmann@gmail.com>
This commit is contained in:
parent
99632c2217
commit
2c7f9565ed
@ -48,6 +48,7 @@
|
||||
#if QT_CONFIG(textcodec)
|
||||
#include <qtextcodec.h>
|
||||
#endif
|
||||
#include <qstringconverter.h>
|
||||
#include <qstack.h>
|
||||
#include <qbuffer.h>
|
||||
#include <qscopeguard.h>
|
||||
@ -3009,8 +3010,7 @@ QStringRef QXmlStreamReader::documentEncoding() const
|
||||
writeProcessingInstruction(), and writeDTD(). Chaining of XML
|
||||
streams is supported with writeCurrentToken().
|
||||
|
||||
By default, QXmlStreamWriter encodes XML in UTF-8. Different
|
||||
encodings can be enforced using setCodec().
|
||||
QXmlStreamWriter always encodes XML in UTF-8.
|
||||
|
||||
If an error occurs while writing to the underlying device, hasError()
|
||||
starts returning true and subsequent writes are ignored.
|
||||
@ -3031,9 +3031,6 @@ public:
|
||||
~QXmlStreamWriterPrivate() {
|
||||
if (deleteDevice)
|
||||
delete device;
|
||||
#if QT_CONFIG(textcodec)
|
||||
delete encoder;
|
||||
#endif
|
||||
}
|
||||
|
||||
void write(const QStringRef &);
|
||||
@ -3053,16 +3050,10 @@ public:
|
||||
uint hasIoError :1;
|
||||
uint hasEncodingError :1;
|
||||
uint autoFormatting :1;
|
||||
uint isCodecASCIICompatible :1;
|
||||
QByteArray autoFormattingIndent;
|
||||
NamespaceDeclaration emptyNamespace;
|
||||
qsizetype lastNamespaceDeclaration;
|
||||
|
||||
#if QT_CONFIG(textcodec)
|
||||
QTextCodec *codec;
|
||||
QTextEncoder *encoder;
|
||||
#endif
|
||||
void checkIfASCIICompatibleCodec();
|
||||
QStringEncoder toUtf8;
|
||||
|
||||
NamespaceDeclaration &findNamespace(const QString &namespaceUri, bool writeDeclaration = false, bool noDefault = false);
|
||||
void writeNamespaceDeclaration(const NamespaceDeclaration &namespaceDeclaration);
|
||||
@ -3074,17 +3065,13 @@ public:
|
||||
|
||||
|
||||
QXmlStreamWriterPrivate::QXmlStreamWriterPrivate(QXmlStreamWriter *q)
|
||||
:autoFormattingIndent(4, ' ')
|
||||
: autoFormattingIndent(4, ' '),
|
||||
toUtf8(QStringEncoder::Utf8, QStringEncoder::Flag::Stateless)
|
||||
{
|
||||
q_ptr = q;
|
||||
device = nullptr;
|
||||
stringDevice = nullptr;
|
||||
deleteDevice = false;
|
||||
#if QT_CONFIG(textcodec)
|
||||
codec = QTextCodec::codecForMib(106); // utf8
|
||||
encoder = codec->makeEncoder(QTextCodec::IgnoreHeader); // no byte order mark for utf8
|
||||
#endif
|
||||
checkIfASCIICompatibleCodec();
|
||||
inStartElement = inEmptyElement = false;
|
||||
wroteSomething = false;
|
||||
hasIoError = false;
|
||||
@ -3095,37 +3082,16 @@ QXmlStreamWriterPrivate::QXmlStreamWriterPrivate(QXmlStreamWriter *q)
|
||||
namespacePrefixCount = 0;
|
||||
}
|
||||
|
||||
void QXmlStreamWriterPrivate::checkIfASCIICompatibleCodec()
|
||||
{
|
||||
#if QT_CONFIG(textcodec)
|
||||
Q_ASSERT(encoder);
|
||||
// test ASCII-compatibility using the letter 'a'
|
||||
QChar letterA = QLatin1Char('a');
|
||||
const QByteArray bytesA = encoder->fromUnicode(&letterA, 1);
|
||||
const bool isCodecASCIICompatibleA = (bytesA.count() == 1) && (bytesA[0] == 0x61) ;
|
||||
QChar letterLess = QLatin1Char('<');
|
||||
const QByteArray bytesLess = encoder->fromUnicode(&letterLess, 1);
|
||||
const bool isCodecASCIICompatibleLess = (bytesLess.count() == 1) && (bytesLess[0] == 0x3C) ;
|
||||
isCodecASCIICompatible = isCodecASCIICompatibleA && isCodecASCIICompatibleLess ;
|
||||
#else
|
||||
isCodecASCIICompatible = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
void QXmlStreamWriterPrivate::write(const QStringRef &s)
|
||||
{
|
||||
if (device) {
|
||||
if (hasIoError)
|
||||
return;
|
||||
#if !QT_CONFIG(textcodec)
|
||||
QByteArray bytes = s.toLatin1();
|
||||
#else
|
||||
QByteArray bytes = encoder->fromUnicode(s.constData(), s.size());
|
||||
if (encoder->hasFailure()) {
|
||||
QByteArray bytes = toUtf8(s);
|
||||
if (toUtf8.hasError()) {
|
||||
hasEncodingError = true;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (device->write(bytes) != bytes.size())
|
||||
hasIoError = true;
|
||||
}
|
||||
@ -3140,15 +3106,11 @@ void QXmlStreamWriterPrivate::write(const QString &s)
|
||||
if (device) {
|
||||
if (hasIoError)
|
||||
return;
|
||||
#if !QT_CONFIG(textcodec)
|
||||
QByteArray bytes = s.toLatin1();
|
||||
#else
|
||||
QByteArray bytes = encoder->fromUnicode(s);
|
||||
if (encoder->hasFailure()) {
|
||||
QByteArray bytes = toUtf8(s);
|
||||
if (toUtf8.hasError()) {
|
||||
hasEncodingError = true;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if (device->write(bytes) != bytes.size())
|
||||
hasIoError = true;
|
||||
}
|
||||
@ -3210,20 +3172,18 @@ void QXmlStreamWriterPrivate::writeEscaped(const QString &s, bool escapeWhitespa
|
||||
write(escaped);
|
||||
}
|
||||
|
||||
// Converts from ASCII to output encoding
|
||||
// Writes utf8
|
||||
void QXmlStreamWriterPrivate::write(const char *s, int len)
|
||||
{
|
||||
if (device) {
|
||||
if (hasIoError)
|
||||
return;
|
||||
if (isCodecASCIICompatible) {
|
||||
if (device->write(s, len) != len)
|
||||
hasIoError = true;
|
||||
return;
|
||||
}
|
||||
if (device->write(s, len) != len)
|
||||
hasIoError = true;
|
||||
return;
|
||||
}
|
||||
|
||||
write(QString::fromLatin1(s, len));
|
||||
write(QString::fromUtf8(s, len));
|
||||
}
|
||||
|
||||
void QXmlStreamWriterPrivate::writeNamespaceDeclaration(const NamespaceDeclaration &namespaceDeclaration) {
|
||||
@ -3338,8 +3298,6 @@ QXmlStreamWriter::QXmlStreamWriter(QByteArray *array)
|
||||
|
||||
/*! Constructs a stream writer that writes into \a string.
|
||||
*
|
||||
* Note that when writing to QString, QXmlStreamWriter ignores the codec set
|
||||
* with setCodec(). See that function for more information.
|
||||
*/
|
||||
QXmlStreamWriter::QXmlStreamWriter(QString *string)
|
||||
: d_ptr(new QXmlStreamWriterPrivate(this))
|
||||
@ -3387,67 +3345,6 @@ QIODevice *QXmlStreamWriter::device() const
|
||||
return d->device;
|
||||
}
|
||||
|
||||
|
||||
#if QT_CONFIG(textcodec)
|
||||
/*!
|
||||
Sets the codec for this stream to \a codec. The codec is used for
|
||||
encoding any data that is written. By default, QXmlStreamWriter
|
||||
uses UTF-8.
|
||||
|
||||
The encoding information is stored in the initial xml tag which
|
||||
gets written when you call writeStartDocument(). Call this
|
||||
function before calling writeStartDocument().
|
||||
|
||||
\note When writing the XML to a QString, the codec information is ignored
|
||||
and the XML header will not include any encoding information, since all
|
||||
QStrings are UTF-16. If you later convert the QString to an 8-bit format,
|
||||
you must arrange for the encoding information to be transmitted
|
||||
out-of-band.
|
||||
|
||||
\sa codec()
|
||||
*/
|
||||
void QXmlStreamWriter::setCodec(QTextCodec *codec)
|
||||
{
|
||||
Q_D(QXmlStreamWriter);
|
||||
if (codec) {
|
||||
d->codec = codec;
|
||||
delete d->encoder;
|
||||
d->encoder = codec->makeEncoder(QTextCodec::IgnoreHeader); // no byte order mark for utf8
|
||||
d->checkIfASCIICompatibleCodec();
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
Sets the codec for this stream to the QTextCodec for the encoding
|
||||
specified by \a codecName. Common values for \c codecName include
|
||||
"ISO 8859-1", "UTF-8", and "UTF-16". If the encoding isn't
|
||||
recognized, nothing happens.
|
||||
|
||||
\note When writing the XML to a QString, the codec information is ignored
|
||||
and the XML header will not include any encoding information, since all
|
||||
QStrings are UTF-16. If you later convert the QString to an 8-bit format,
|
||||
you must arrange for the encoding information to be transmitted
|
||||
out-of-band.
|
||||
|
||||
\sa QTextCodec::codecForName()
|
||||
*/
|
||||
void QXmlStreamWriter::setCodec(const char *codecName)
|
||||
{
|
||||
setCodec(QTextCodec::codecForName(codecName));
|
||||
}
|
||||
|
||||
/*!
|
||||
Returns the codec that is currently assigned to the stream.
|
||||
|
||||
\sa setCodec()
|
||||
*/
|
||||
QTextCodec *QXmlStreamWriter::codec() const
|
||||
{
|
||||
Q_D(const QXmlStreamWriter);
|
||||
return d->codec;
|
||||
}
|
||||
#endif // textcodec
|
||||
|
||||
/*!
|
||||
\property QXmlStreamWriter::autoFormatting
|
||||
\since 4.4
|
||||
@ -3886,10 +3783,9 @@ void QXmlStreamWriter::writeProcessingInstruction(const QString &target, const Q
|
||||
|
||||
/*!\overload
|
||||
|
||||
Writes a document start with XML version number "1.0". This also
|
||||
writes the encoding information.
|
||||
Writes a document start with XML version number "1.0".
|
||||
|
||||
\sa writeEndDocument(), setCodec()
|
||||
\sa writeEndDocument()
|
||||
\since 4.5
|
||||
*/
|
||||
void QXmlStreamWriter::writeStartDocument()
|
||||
@ -3909,15 +3805,8 @@ void QXmlStreamWriter::writeStartDocument(const QString &version)
|
||||
d->finishStartElement(false);
|
||||
d->write("<?xml version=\"");
|
||||
d->write(version);
|
||||
if (d->device) { // stringDevice does not get any encoding
|
||||
d->write("\" encoding=\"");
|
||||
#if !QT_CONFIG(textcodec)
|
||||
d->write("iso-8859-1");
|
||||
#else
|
||||
const QByteArray name = d->codec->name();
|
||||
d->write(name.constData(), name.length());
|
||||
#endif
|
||||
}
|
||||
if (d->device) // stringDevice does not get any encoding
|
||||
d->write("\" encoding=\"UTF-8");
|
||||
d->write("\"?>");
|
||||
}
|
||||
|
||||
@ -3933,15 +3822,8 @@ void QXmlStreamWriter::writeStartDocument(const QString &version, bool standalon
|
||||
d->finishStartElement(false);
|
||||
d->write("<?xml version=\"");
|
||||
d->write(version);
|
||||
if (d->device) { // stringDevice does not get any encoding
|
||||
d->write("\" encoding=\"");
|
||||
#if !QT_CONFIG(textcodec)
|
||||
d->write("iso-8859-1");
|
||||
#else
|
||||
const QByteArray name = d->codec->name();
|
||||
d->write(name.constData(), name.length());
|
||||
#endif
|
||||
}
|
||||
if (d->device) // stringDevice does not get any encoding
|
||||
d->write("\" encoding=\"UTF-8");
|
||||
if (standalone)
|
||||
d->write("\" standalone=\"yes\"?>");
|
||||
else
|
||||
|
@ -474,12 +474,6 @@ public:
|
||||
void setDevice(QIODevice *device);
|
||||
QIODevice *device() const;
|
||||
|
||||
#if QT_CONFIG(textcodec)
|
||||
void setCodec(QTextCodec *codec);
|
||||
void setCodec(const char *codecName);
|
||||
QTextCodec *codec() const;
|
||||
#endif
|
||||
|
||||
void setAutoFormatting(bool);
|
||||
bool autoFormatting() const;
|
||||
|
||||
|
@ -560,9 +560,7 @@ private slots:
|
||||
void crashInUTF16Codec() const;
|
||||
void hasAttributeSignature() const;
|
||||
void hasAttribute() const;
|
||||
void writeWithCodec() const;
|
||||
void writeWithUtf8Codec() const;
|
||||
void writeWithUtf16Codec() const;
|
||||
void writeWithStandalone() const;
|
||||
void entitiesAndWhitespace_1() const;
|
||||
void entitiesAndWhitespace_2() const;
|
||||
@ -573,7 +571,6 @@ private slots:
|
||||
void checkCommentIndentation() const;
|
||||
void checkCommentIndentation_data() const;
|
||||
void crashInXmlStreamReader() const;
|
||||
void write8bitCodec() const;
|
||||
void invalidStringCharacters_data() const;
|
||||
void invalidStringCharacters() const;
|
||||
void hasError() const;
|
||||
@ -1258,66 +1255,16 @@ void tst_QXmlStream::hasAttribute() const
|
||||
QVERIFY(!reader.hasError());
|
||||
}
|
||||
|
||||
|
||||
void tst_QXmlStream::writeWithCodec() const
|
||||
{
|
||||
QByteArray outarray;
|
||||
QXmlStreamWriter writer(&outarray);
|
||||
writer.setAutoFormatting(true);
|
||||
|
||||
QTextCodec *codec = QTextCodec::codecForName("ISO 8859-15");
|
||||
QVERIFY(codec);
|
||||
writer.setCodec(codec);
|
||||
|
||||
const char *latin2 = "h\xe9 h\xe9";
|
||||
const QString string = codec->toUnicode(latin2);
|
||||
|
||||
|
||||
writer.writeStartDocument("1.0");
|
||||
|
||||
writer.writeTextElement("foo", string);
|
||||
writer.writeEndElement();
|
||||
writer.writeEndDocument();
|
||||
|
||||
QVERIFY(outarray.contains(latin2));
|
||||
QVERIFY(outarray.contains(codec->name()));
|
||||
}
|
||||
|
||||
void tst_QXmlStream::writeWithUtf8Codec() const
|
||||
{
|
||||
QByteArray outarray;
|
||||
QXmlStreamWriter writer(&outarray);
|
||||
|
||||
QTextCodec *codec = QTextCodec::codecForMib(106); // utf-8
|
||||
QVERIFY(codec);
|
||||
writer.setCodec(codec);
|
||||
|
||||
writer.writeStartDocument("1.0");
|
||||
static const char begin[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
||||
QVERIFY(outarray.startsWith(begin));
|
||||
}
|
||||
|
||||
void tst_QXmlStream::writeWithUtf16Codec() const
|
||||
{
|
||||
QByteArray outarray;
|
||||
QXmlStreamWriter writer(&outarray);
|
||||
|
||||
QTextCodec *codec = QTextCodec::codecForMib(1014); // utf-16LE
|
||||
QVERIFY(codec);
|
||||
writer.setCodec(codec);
|
||||
|
||||
writer.writeStartDocument("1.0");
|
||||
static const char begin[] = "<?xml version=\"1.0\" encoding=\"UTF-16"; // skip potential "LE" suffix
|
||||
const int count = sizeof(begin) - 1; // don't include 0 terminator
|
||||
QByteArray begin_UTF16;
|
||||
begin_UTF16.reserve(2*(count));
|
||||
for (int i = 0; i < count; ++i) {
|
||||
begin_UTF16.append(begin[i]);
|
||||
begin_UTF16.append((char)'\0');
|
||||
}
|
||||
QVERIFY(outarray.startsWith(begin_UTF16));
|
||||
}
|
||||
|
||||
void tst_QXmlStream::writeWithStandalone() const
|
||||
{
|
||||
{
|
||||
@ -1413,7 +1360,6 @@ void tst_QXmlStream::garbageInXMLPrologUTF8Explicitly() const
|
||||
QVERIFY(out.open(QIODevice::ReadWrite));
|
||||
|
||||
QXmlStreamWriter writer (&out);
|
||||
writer.setCodec("UTF-8");
|
||||
writer.writeStartDocument();
|
||||
writer.writeEmptyElement("Foo");
|
||||
writer.writeEndDocument();
|
||||
@ -1602,43 +1548,6 @@ void tst_QXmlStream::hasError() const
|
||||
|
||||
}
|
||||
|
||||
void tst_QXmlStream::write8bitCodec() const
|
||||
{
|
||||
QBuffer outBuffer;
|
||||
QVERIFY(outBuffer.open(QIODevice::WriteOnly));
|
||||
QXmlStreamWriter writer(&outBuffer);
|
||||
writer.setAutoFormatting(false);
|
||||
|
||||
QTextCodec *codec = QTextCodec::codecForName("IBM500");
|
||||
if (!codec) {
|
||||
QSKIP("Encoding IBM500 not available.");
|
||||
}
|
||||
writer.setCodec(codec);
|
||||
|
||||
writer.writeStartDocument();
|
||||
writer.writeStartElement("root");
|
||||
writer.writeAttribute("attrib", "1");
|
||||
writer.writeEndElement();
|
||||
writer.writeEndDocument();
|
||||
outBuffer.close();
|
||||
|
||||
// test 8 bit encoding
|
||||
QByteArray values = outBuffer.data();
|
||||
QVERIFY(values.size() > 1);
|
||||
// check '<'
|
||||
QCOMPARE(values[0] & 0x00FF, 0x4c);
|
||||
// check '?'
|
||||
QCOMPARE(values[1] & 0x00FF, 0x6F);
|
||||
|
||||
// convert the start of the XML
|
||||
const QString expected = ("<?xml version=\"1.0\" encoding=\"IBM500\"?>");
|
||||
QTextDecoder *decoder = codec->makeDecoder();
|
||||
QVERIFY(decoder);
|
||||
QString decodedText = decoder->toUnicode(values);
|
||||
delete decoder;
|
||||
QVERIFY(decodedText.startsWith(expected));
|
||||
}
|
||||
|
||||
void tst_QXmlStream::invalidStringCharacters() const
|
||||
{
|
||||
// test scan in attributes
|
||||
|
Loading…
Reference in New Issue
Block a user