Add tests for decoding too-short UTF-8 sequences
We were handling this properly, but not testing them. I guess we weren't testing because the condition is a valid intermediate state, so hasFailure() is correct it returning false. Testing inspired by the bug reported in https://github.com/intel/tinycbor/issues/137 Change-Id: Ib47c56818178458a88b4fffd1554ecfdd0af637e Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
parent
4d40f09a45
commit
7e1a0c0739
@ -1,6 +1,7 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2016 The Qt Company Ltd.
|
||||
** Copyright (C) 2018 The Qt Company Ltd.
|
||||
** Copyright (C) 2018 Intel Corporation.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
@ -1236,6 +1237,19 @@ bool QTextDecoder::hasFailure() const
|
||||
return state.invalidChars != 0;
|
||||
}
|
||||
|
||||
/*!
|
||||
\internal
|
||||
\since 5.12
|
||||
|
||||
Determines whether the decoder needs more bytes to continue decoding. That
|
||||
is, this signifies that the input string ended in the middle of a
|
||||
multi-byte sequence. Note that it's possible some codecs do not report this.
|
||||
*/
|
||||
bool QTextDecoder::needsMoreData() const
|
||||
{
|
||||
return state.remainingChars;
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_NO_TEXTCODEC
|
||||
|
@ -1,6 +1,6 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2016 The Qt Company Ltd.
|
||||
** Copyright (C) 2018 The Qt Company Ltd.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
@ -162,6 +162,7 @@ public:
|
||||
QString toUnicode(const QByteArray &ba);
|
||||
void toUnicode(QString *target, const char *chars, int len);
|
||||
bool hasFailure() const;
|
||||
bool needsMoreData() const;
|
||||
private:
|
||||
const QTextCodec *c;
|
||||
QTextCodec::ConverterState state;
|
||||
|
@ -1,7 +1,7 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2016 The Qt Company Ltd.
|
||||
** Copyright (C) 2016 Intel Corporation.
|
||||
** Copyright (C) 2018 The Qt Company Ltd.
|
||||
** Copyright (C) 2018 Intel Corporation.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
|
@ -1,7 +1,7 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2016 The Qt Company Ltd.
|
||||
** Copyright (C) 2016 Intel Corporation.
|
||||
** Copyright (C) 2018 The Qt Company Ltd.
|
||||
** Copyright (C) 2018 Intel Corporation.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the test suite of the Qt Toolkit.
|
||||
@ -71,7 +71,7 @@ void tst_Utf8::initTestCase()
|
||||
// is the locale UTF-8?
|
||||
if (QString(QChar(QChar::ReplacementCharacter)).toLocal8Bit() == "\xEF\xBF\xBD") {
|
||||
QTest::newRow("localecodec") << true;
|
||||
qDebug() << "locale is utf8";
|
||||
qInfo() << "locale is utf8";
|
||||
}
|
||||
}
|
||||
|
||||
@ -226,6 +226,15 @@ void tst_Utf8::invalidUtf8()
|
||||
// The system's UTF-8 codec is sometimes buggy
|
||||
// GNU libc's iconv is known to accept U+FFFF and U+FFFE encoded as UTF-8
|
||||
// OS X's iconv is known to accept those, plus surrogates and codepoints above U+10FFFF
|
||||
if (!useLocale)
|
||||
QVERIFY(decoder->hasFailure() || decoder->needsMoreData());
|
||||
else if (!decoder->hasFailure() && !decoder->needsMoreData())
|
||||
qWarning("System codec does not report failure when it should. Should report bug upstream.");
|
||||
|
||||
// add a continuation character and test that we don't accidentally use it
|
||||
// (buffer overrun)
|
||||
utf8 += char(0x80 | 0x3f);
|
||||
decoder->toUnicode(utf8.constData(), utf8.size() - 1);
|
||||
if (!useLocale)
|
||||
QVERIFY(decoder->hasFailure());
|
||||
else if (!decoder->hasFailure())
|
||||
|
@ -1,6 +1,7 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2016 The Qt Company Ltd.
|
||||
** Copyright (C) 2018 The Qt Company Ltd.
|
||||
** Copyright (C) 2018 Intel Corporation.
|
||||
** Contact: https://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the test suite of the Qt Toolkit.
|
||||
@ -29,15 +30,24 @@
|
||||
|
||||
void loadInvalidUtf8Rows()
|
||||
{
|
||||
QTest::newRow("1char") << QByteArray("\x80");
|
||||
QTest::newRow("2chars-1") << QByteArray("\xC2\xC0");
|
||||
QTest::newRow("2chars-2") << QByteArray("\xC3\xDF");
|
||||
QTest::newRow("2chars-3") << QByteArray("\xC7\xF0");
|
||||
QTest::newRow("3chars-1") << QByteArray("\xE0\xA0\xC0");
|
||||
QTest::newRow("3chars-2") << QByteArray("\xE0\xC0\xA0");
|
||||
QTest::newRow("4chars-1") << QByteArray("\xF0\x90\x80\xC0");
|
||||
QTest::newRow("4chars-2") << QByteArray("\xF0\x90\xC0\x80");
|
||||
QTest::newRow("4chars-3") << QByteArray("\xF0\xC0\x80\x80");
|
||||
// Wrong continuations
|
||||
QTest::newRow("bad-continuation-1char") << QByteArray("\x80");
|
||||
QTest::newRow("bad-continuation-2chars-1") << QByteArray("\xC2\xC0");
|
||||
QTest::newRow("bad-continuation-2chars-2") << QByteArray("\xC3\xDF");
|
||||
QTest::newRow("bad-continuation-2chars-3") << QByteArray("\xC7\xF0");
|
||||
QTest::newRow("bad-continuation-3chars-1") << QByteArray("\xE0\xA0\xC0");
|
||||
QTest::newRow("bad-continuation-3chars-2") << QByteArray("\xE0\xC0\xA0");
|
||||
QTest::newRow("bad-continuation-4chars-1") << QByteArray("\xF0\x90\x80\xC0");
|
||||
QTest::newRow("bad-continuation-4chars-2") << QByteArray("\xF0\x90\xC0\x80");
|
||||
QTest::newRow("bad-continuation-4chars-3") << QByteArray("\xF0\xC0\x80\x80");
|
||||
|
||||
// Too short
|
||||
QTest::newRow("too-short-2chars") << QByteArray("\xC2");
|
||||
QTest::newRow("too-short-3chars-1") << QByteArray("\xE0");
|
||||
QTest::newRow("too-short-3chars-2") << QByteArray("\xE0\xA0");
|
||||
QTest::newRow("too-short-4chars-1") << QByteArray("\xF0");
|
||||
QTest::newRow("too-short-4chars-2") << QByteArray("\xF0\x90");
|
||||
QTest::newRow("too-short-4chars-3") << QByteArray("\xF0\x90\x80");
|
||||
|
||||
// Surrogate pairs must now be present either
|
||||
// U+D800: 1101 10 0000 00 0000
|
||||
|
Loading…
Reference in New Issue
Block a user