Remove custom text codec for C strings.

This setting is extremely harmful, as code cannot know whether or not to expect
it. It also made the behaviour of QString::fromAscii and ::toAscii unintuitive,
and caused a lot of people to make mistakes with it.

Change-Id: I2f429fa7ef93bd75bb93a7f64c56db15b7283388
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
This commit is contained in:
Robin Burchell 2012-01-30 14:23:22 +02:00 committed by Qt by Nokia
parent 9d173c9218
commit 186692f81f
13 changed files with 61 additions and 216 deletions

4
dist/changes-5.0.0 vendored
View File

@ -295,6 +295,10 @@ QtCore
* The QDate::addDays() and QDateTime::addDays() methods now take a qint64
* The QDate::daysTo() and QDateTime::daysTo() methods now return a qint64
* QTextCodec::codecForCStrings() and QTextCodec::setCodecForCStrings() have both
been removed. This was removed due to issues with breaking other code from
libraries, creating uncertainty/bugs in using QString easily, and (to a lesser
extent) performance issues.
* QIntValidator and QDoubleValidator no longer fall back to using the C locale if
the requested locale fails to validate the input.

View File

@ -1495,35 +1495,7 @@ QString QTextDecoder::toUnicode(const QByteArray &ba)
files to be loaded. For details of internationalization, see
\l{Internationalization with Qt}.
\sa codecForTr(), setCodecForCStrings()
*/
/*!
\fn QTextCodec* QTextCodec::codecForCStrings()
Returns the codec used by QString to convert to and from \c{const
char *} and QByteArrays. If this function returns 0 (the default),
QString assumes Latin-1.
\sa setCodecForCStrings()
*/
/*!
\fn void QTextCodec::setCodecForCStrings(QTextCodec *codec)
\nonreentrant
Sets the codec used by QString to convert to and from \c{const
char *} and QByteArrays. If the \a codec is 0 (the default),
QString assumes Latin-1.
\warning Some codecs do not preserve the characters in the ASCII
range (0x00 to 0x7F). For example, the Japanese Shift-JIS
encoding maps the backslash character (0x5A) to the Yen
character. To avoid undesirable side-effects, we recommend
avoiding such codecs with setCodecsForCString().
\sa codecForCStrings(), setCodecForTr()
\sa codecForTr()
*/
/*!

View File

@ -75,9 +75,6 @@ public:
static QTextCodec* codecForTr();
static void setCodecForTr(QTextCodec *c);
static QTextCodec* codecForCStrings();
static void setCodecForCStrings(QTextCodec *c);
static QTextCodec *codecForHtml(const QByteArray &ba);
static QTextCodec *codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec);
@ -139,8 +136,6 @@ Q_DECLARE_OPERATORS_FOR_FLAGS(QTextCodec::ConversionFlags)
inline QTextCodec* QTextCodec::codecForTr() { return validCodecs() ? cftr : 0; }
inline void QTextCodec::setCodecForTr(QTextCodec *c) { cftr = c; }
inline QTextCodec* QTextCodec::codecForCStrings() { return validCodecs() ? QString::codecForCStrings : 0; }
inline void QTextCodec::setCodecForCStrings(QTextCodec *c) { QString::codecForCStrings = c; }
class Q_CORE_EXPORT QTextEncoder {
Q_DISABLE_COPY(QTextEncoder)

View File

@ -1162,8 +1162,8 @@ QVariant::QVariant(QDataStream &s)
\fn QVariant::QVariant(const char *val)
Constructs a new variant with a string value of \a val.
The variant creates a deep copy of \a val, using the encoding
set by QTextCodec::setCodecForCStrings().
The variant creates a deep copy of \a val into a QString assuming
UTF-8 encoding on the input \a val.
Note that \a val is converted to a QString for storing in the
variant and QVariant::type() will return QMetaType::QString for
@ -1171,8 +1171,6 @@ QVariant::QVariant(QDataStream &s)
You can disable this operator by defining \c
QT_NO_CAST_FROM_ASCII when you compile your applications.
\sa QTextCodec::setCodecForCStrings()
*/
#ifndef QT_NO_CAST_FROM_ASCII

View File

@ -413,33 +413,16 @@ QT_BEGIN_NAMESPACE
*/
/*!
\fn QChar::QChar(char ch)
Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
*/
QChar::QChar(char ch)
{
#ifndef QT_NO_CODEC_FOR_C_STRINGS
if (QTextCodec::codecForCStrings())
// #####
ucs = QTextCodec::codecForCStrings()->toUnicode(&ch, 1).at(0).unicode();
else
#endif
ucs = uchar(ch);
}
/*!
\fn QChar::QChar(uchar ch)
Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
*/
QChar::QChar(uchar ch)
{
#ifndef QT_NO_CODEC_FOR_C_STRINGS
if (QTextCodec::codecForCStrings()) {
// #####
char c = char(ch);
ucs = QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
} else
#endif
ucs = ch;
}
/*!
\fn QChar::QChar(uchar cell, uchar row)
@ -1256,49 +1239,35 @@ ushort QChar::toCaseFolded(ushort ucs2)
Returns the Latin-1 character equivalent to the QChar, or 0. This
is mainly useful for non-internationalized software.
\sa toAscii(), unicode(), QTextCodec::codecForCStrings()
\sa toAscii(), unicode()
*/
/*!
Returns the character value of the QChar obtained using the current
codec used to read C strings, or 0 if the character is not representable
using this codec. The default codec handles Latin-1 encoded text,
but this can be changed to assist developers writing source code using
other encodings.
\fn char QChar::toAscii() const
Returns the Latin-1 character value of the QChar, or 0 if the character is not
representable.
The main purpose of this function is to preserve ASCII characters used
in C strings. This is mainly useful for developers of non-internationalized
software.
\sa toLatin1(), unicode(), QTextCodec::codecForCStrings()
\note It is not possible to distinguish a non-Latin 1 character from an ASCII 0
(NUL) character. Prefer to use unicode(), which does not have this ambiguity.
\sa toLatin1(), unicode()
*/
char QChar::toAscii() const
{
#ifndef QT_NO_CODEC_FOR_C_STRINGS
if (QTextCodec::codecForCStrings())
// #####
return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0);
#endif
return ucs > 0xff ? 0 : char(ucs);
}
/*!
\fn QChar QChar::fromAscii(char)
Converts the ASCII character \a c to it's equivalent QChar. This
is mainly useful for non-internationalized software.
An alternative is to use QLatin1Char.
\sa fromLatin1(), unicode(), QTextCodec::codecForCStrings()
\sa fromLatin1(), unicode()
*/
QChar QChar::fromAscii(char c)
{
#ifndef QT_NO_CODEC_FOR_C_STRINGS
if (QTextCodec::codecForCStrings())
// #####
return QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
#endif
return QChar(ushort((uchar)c));
}
#ifndef QT_NO_DATASTREAM
/*!

View File

@ -86,9 +86,8 @@ public:
Q_DECL_CONSTEXPR QChar(QLatin1Char ch) : ucs(ch.unicode()) {} // implicit
#ifndef QT_NO_CAST_FROM_ASCII
// these two constructors are NOT inline const_expr!
QT_ASCII_CAST_WARN_CONSTRUCTOR explicit QChar(char c);
QT_ASCII_CAST_WARN_CONSTRUCTOR explicit QChar(uchar c);
QT_ASCII_CAST_WARN_CONSTRUCTOR Q_DECL_CONSTEXPR explicit QChar(char c) : ucs(uchar(c)) { }
QT_ASCII_CAST_WARN_CONSTRUCTOR Q_DECL_CONSTEXPR explicit QChar(uchar c) : ucs(c) { }
#endif
// Unicode information
@ -222,13 +221,13 @@ public:
UnicodeVersion unicodeVersion() const;
char toAscii() const;
inline char toAscii() const;
inline char toLatin1() const;
inline ushort unicode() const { return ucs; }
inline ushort &unicode() { return ucs; }
static QChar fromAscii(char c);
static QChar fromLatin1(char c);
static inline QChar fromAscii(char c);
static inline QChar fromLatin1(char c);
inline bool isNull() const { return ucs == 0; }
bool isPrint() const;
@ -344,8 +343,10 @@ private:
Q_DECLARE_TYPEINFO(QChar, Q_MOVABLE_TYPE);
inline char QChar::toAscii() const { return ucs > 0xff ? 0 : char(ucs); }
inline char QChar::toLatin1() const { return ucs > 0xff ? '\0' : char(ucs); }
inline QChar QChar::fromLatin1(char c) { return QChar(ushort(uchar(c))); }
inline QChar QChar::fromAscii(char c) { return QChar(ushort(uchar(c))); }
inline void QChar::setCell(uchar acell)
{ ucs = ushort((ucs & 0xff00) + acell); }

View File

@ -96,10 +96,6 @@
QT_BEGIN_NAMESPACE
#ifndef QT_NO_TEXTCODEC
QTextCodec *QString::codecForCStrings;
#endif
#ifdef QT_USE_ICU
// qlocale_icu.cpp
extern bool qt_ucol_strcoll(const QChar *source, int sourceLength, const QChar *target, int targetLength, int *result);
@ -471,9 +467,8 @@ const QString::Null QString::null = { };
\snippet doc/src/snippets/qstring/main.cpp 0
QString converts the \c{const char *} data into Unicode using the
fromAscii() function. By default, fromAscii() treats character
above 128 as Latin-1 characters, but this can be changed by
calling QTextCodec::setCodecForCStrings().
fromAscii() function. fromAscii() treats ordinals above 128 as Latin-1
characters.
In all of the QString functions that take \c{const char *}
parameters, the \c{const char *} is interpreted as a classic
@ -611,9 +606,7 @@ const QString::Null QString::null = { };
toLatin1(), toUtf8(), and toLocal8Bit().
\list
\o toAscii() returns an 8-bit string encoded using the codec
specified by QTextCodec::codecForCStrings (by default, that is
Latin 1).
\o toAscii() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
\o toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
\o toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
superset of US-ASCII (ANSI X3.4-1986) that supports the entire
@ -721,11 +714,11 @@ const QString::Null QString::null = { };
\section1 More Efficient String Construction
Many strings are known at compile time. But the trivial
constructor QString("Hello"), will convert the string literal
to a QString using the codecForCStrings(). To avoid this one
can use the QStringLiteral macro to directly create the required
data at compile time. Constructing a QString out of the literal
does then not cause any overhead at runtime.
constructor QString("Hello"), will copy the contents of the string,
treating the contents as Latin-1. To avoid this one can use the
QStringLiteral macro to directly create the required data at compile
time. Constructing a QString out of the literal does then not cause
any overhead at runtime.
A slightly less efficient way is to use QLatin1String. This class wraps
a C string literal, precalculates it length at compile time and can
@ -3652,9 +3645,7 @@ QByteArray QString::toLatin1() const
/*!
Returns an 8-bit representation of the string as a QByteArray.
If a codec has been set using QTextCodec::setCodecForCStrings(),
it is used to convert Unicode to 8-bit char; otherwise this
function does the same as toLatin1().
This function does the same as toLatin1().
Note that, despite the name, this function does not necessarily return an US-ASCII
(ANSI X3.4-1986) string and its result may not be US-ASCII compatible.
@ -3663,10 +3654,6 @@ QByteArray QString::toLatin1() const
*/
QByteArray QString::toAscii() const
{
#ifndef QT_NO_TEXTCODEC
if (codecForCStrings)
return codecForCStrings->fromUnicode(*this);
#endif // QT_NO_TEXTCODEC
return toLatin1();
}
@ -3800,23 +3787,6 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
QString::Data *QString::fromAscii_helper(const char *str, int size)
{
#ifndef QT_NO_TEXTCODEC
if (codecForCStrings) {
Data *d;
if (!str) {
d = const_cast<Data *>(&shared_null.str);
} else if (size == 0 || (!*str && size < 0)) {
d = const_cast<Data *>(&shared_empty.str);
} else {
if (size < 0)
size = qstrlen(str);
QString s = codecForCStrings->toUnicode(str, size);
d = s.d;
d->ref.ref();
}
return d;
}
#endif
return fromLatin1_helper(str, size);
}
@ -3865,11 +3835,7 @@ QString QString::fromLocal8Bit_helper(const char *str, int size)
If \a size is -1 (default), it is taken to be strlen(\a
str).
Note that, despite the name, this function actually uses the codec
defined by QTextCodec::setCodecForCStrings() to convert \a str to
Unicode. Depending on the codec, it may not accept valid US-ASCII (ANSI
X3.4-1986) input. If no codec has been set, this function does the same
as fromLatin1().
This function does the same as fromLatin1().
\sa toAscii(), fromLatin1(), fromUtf8(), fromLocal8Bit()
*/
@ -5138,19 +5104,8 @@ QString &QString::vsprintf(const char* cformat, va_list ap)
const char *c = cformat;
for (;;) {
// Copy non-escape chars to result
#ifndef QT_NO_TEXTCODEC
int i = 0;
while (*(c + i) != '\0' && *(c + i) != '%')
++i;
if (codecForCStrings)
result.append(codecForCStrings->toUnicode(c, i));
else
result.append(fromLatin1(c, i));
c += i;
#else
while (*c != '\0' && *c != '%')
result.append(QLatin1Char(*c++));
#endif
if (*c == '\0')
break;
@ -7062,8 +7017,7 @@ bool QString::isRightToLeft() const
This operator is mostly useful to pass a QString to a function
that accepts a std::string object.
If the QString contains Unicode characters that the
QTextCodec::codecForCStrings() codec cannot handle, using this operator
If the QString contains non-Latin1 Unicode characters, using this
can lead to loss of information.
This operator is only available if Qt is configured with STL
@ -8736,9 +8690,7 @@ QByteArray QStringRef::toLatin1() const
Returns an 8-bit representation of the string as a QByteArray.
If a codec has been set using QTextCodec::setCodecForCStrings(),
it is used to convert Unicode to 8-bit char; otherwise this
function does the same as toLatin1().
This function does the same as toLatin1().
Note that, despite the name, this function does not necessarily return an US-ASCII
(ANSI X3.4-1986) string and its result may not be US-ASCII compatible.
@ -8747,10 +8699,6 @@ QByteArray QStringRef::toLatin1() const
*/
QByteArray QStringRef::toAscii() const
{
#ifndef QT_NO_TEXTCODEC
if (QString::codecForCStrings)
return QString::codecForCStrings->fromUnicode(unicode(), length());
#endif // QT_NO_TEXTCODEC
return toLatin1();
}

View File

@ -605,9 +605,6 @@ private:
Data *d;
inline QString(Data *dd, int /*dummy*/) : d(dd) {}
#ifndef QT_NO_TEXTCODEC
static QTextCodec *codecForCStrings;
#endif
static int grow(int);
static void free(Data *);
void realloc();
@ -927,9 +924,6 @@ inline bool operator!=(const QString &s, QString::Null) { return !s.isNull(); }
#ifndef QT_NO_CAST_FROM_ASCII
inline bool qStringComparisonHelper(const QString &s1, const char *s2)
{
# ifndef QT_NO_TEXTCODEC
if (QString::codecForCStrings) return (s1 == QString::fromAscii(s2, s2 ? int(strlen(s2)) : -1));
# endif
return (s1 == QLatin1String(s2));
}
inline bool QString::operator==(const char *s) const
@ -1216,9 +1210,6 @@ inline bool operator>=(const QStringRef &s1, const QStringRef &s2)
inline bool qStringComparisonHelper(const QStringRef &s1, const char *s2)
{
# ifndef QT_NO_TEXTCODEC
if (QString::codecForCStrings) return (s1 == QString::fromAscii(s2, s2 ? int(strlen(s2)) : -1));
# endif
return (s1 == QLatin1String(s2));
}

View File

@ -105,14 +105,6 @@ QT_BEGIN_NAMESPACE
*/
void QAbstractConcatenable::convertFromAscii(const char *a, int len, QChar *&out)
{
#ifndef QT_NO_TEXTCODEC
if (QString::codecForCStrings && len) {
QString tmp = QString::fromAscii(a, len > 0 ? len : -1);
memcpy(out, reinterpret_cast<const char *>(tmp.constData()), sizeof(QChar) * tmp.size());
out += tmp.length();
return;
}
#endif
if (len == -1) {
if (!a)
return;
@ -127,14 +119,6 @@ void QAbstractConcatenable::convertFromAscii(const char *a, int len, QChar *&out
/*! \internal */
void QAbstractConcatenable::convertToAscii(const QChar* a, int len, char*& out)
{
#ifndef QT_NO_TEXTCODEC
if (QString::codecForCStrings) {
QByteArray tmp = QString::codecForCStrings->fromUnicode(a, len);
memcpy(out, tmp.constData(), tmp.size());
out += tmp.length();
return;
}
#endif
if (len == -1) {
while (a->unicode())
convertToLatin1(*a++, out);

View File

@ -65,22 +65,12 @@ protected:
static void convertToAscii(const QChar *a, int len, char *&out);
static inline void convertFromAscii(char a, QChar *&out)
{
#ifndef QT_NO_TEXTCODEC
if (QString::codecForCStrings)
*out++ = QChar::fromAscii(a);
else
#endif
*out++ = QLatin1Char(a);
*out++ = QLatin1Char(a);
}
static inline void convertToAscii(QChar a, char *&out)
{
#ifndef QT_NO_TEXTCODEC
if (QString::codecForCStrings)
*out++ = a.toAscii(); //###
else
#endif
convertToLatin1(a, out);
convertToLatin1(a, out);
}
static inline void convertToLatin1(QChar a, char *&out)

View File

@ -808,10 +808,7 @@ void tst_QString::constructorQByteArray()
QCOMPARE(str1.length(), expected.length());
QCOMPARE( str1, expected );
QTextCodec::setCodecForCStrings( QTextCodec::codecForMib(4) ); // Latin 1
QString strBA(src);
QTextCodec::setCodecForCStrings( 0 );
QCOMPARE( strBA, expected );
}
@ -928,12 +925,7 @@ void tst_QString::sprintf()
QCOMPARE(a.sprintf("%-5.5s", "Hello" ),(QString)"Hello");
// Check utf8 conversion for %s
QCOMPARE(a.sprintf("%s", "\303\266\303\244\303\274\303\226\303\204\303\234\303\270\303\246\303\245\303\230\303\206\303\205"), QString("\366\344\374\326\304\334\370\346\345\330\306\305"));
// Check codecForCStrings is used to read non-modifier sequences in the format string
QTextCodec::setCodecForCStrings(QTextCodec::codecForName("UTF-8"));
QCOMPARE(a.sprintf("\303\251\303\250\303\240 %s", "\303\251\303\250\303\240"), QString("\303\251\303\250\303\240 \303\251\303\250\303\240"));
QTextCodec::setCodecForCStrings(0);
QCOMPARE(a.sprintf("%s", "\303\266\303\244\303\274\303\226\303\204\303\234\303\270\303\246\303\245\303\230\303\206\303\205"), QString::fromLatin1("\366\344\374\326\304\334\370\346\345\330\306\305"));
int n1;
a.sprintf("%s%n%s", "hello", &n1, "goodbye");
@ -1871,9 +1863,7 @@ void tst_QString::append_bytearray()
QFETCH( QString, str );
QFETCH( QByteArray, ba );
QTextCodec::setCodecForCStrings( QTextCodec::codecForMib(4) ); // Latin 1
str.append( ba );
QTextCodec::setCodecForCStrings( 0 );
QTEST( str, "res" );
}
@ -1898,9 +1888,7 @@ void tst_QString::operator_pluseq_bytearray()
QFETCH( QString, str );
QFETCH( QByteArray, ba );
QTextCodec::setCodecForCStrings( QTextCodec::codecForMib(4) ); // Latin 1
str += ba;
QTextCodec::setCodecForCStrings( 0 );
QTEST( str, "res" );
}
@ -1960,9 +1948,7 @@ void tst_QString::prepend_bytearray()
QFETCH( QString, str );
QFETCH( QByteArray, ba );
QTextCodec::setCodecForCStrings( QTextCodec::codecForMib(4) ); // Latin 1
str.prepend( ba );
QTextCodec::setCodecForCStrings( 0 );
QTEST( str, "res" );
}
@ -3211,7 +3197,7 @@ void tst_QString::utf8()
QFETCH( QByteArray, utf8 );
QFETCH( QString, res );
QCOMPARE( utf8, QByteArray(res.toUtf8()) );
QCOMPARE(res.toUtf8(), utf8);
}
void tst_QString::stringRef_utf8_data()

View File

@ -66,10 +66,8 @@
void runScenario()
{
// set codec for C strings to 0, enforcing Latin1
QTextCodec::setCodecForCStrings(0);
QVERIFY(!QTextCodec::codecForCStrings());
// this code is latin1. TODO: replace it with the utf8 block below, once
// strings default to utf8.
QLatin1Literal l1literal(LITERAL);
QLatin1String l1string(LITERAL);
QString string(l1string);
@ -130,7 +128,10 @@ void runScenario()
r = string P ba;
QCOMPARE(r, r2);
#if 0
// now test with codec for C strings set
// TODO: to be re-enabled once strings default to utf8, in place of the
// latin1 code above.
QTextCodec::setCodecForCStrings(QTextCodec::codecForName("UTF-8"));
QVERIFY(QTextCodec::codecForCStrings());
QCOMPARE(QTextCodec::codecForCStrings()->name(), QByteArray("UTF-8"));
@ -153,6 +154,7 @@ void runScenario()
QCOMPARE(r, r3);
r = string P ba;
QCOMPARE(r, r3);
#endif
ba = QByteArray(); // empty
r = ba P string;
@ -212,8 +214,11 @@ void runScenario()
str += QLatin1String(LITERAL) P str;
QCOMPARE(str, QString::fromUtf8(UTF8_LITERAL LITERAL UTF8_LITERAL));
#ifndef QT_NO_CAST_FROM_ASCII
#if 0
// TODO: this relies on strings defaulting to utf8, so disable this for now.
str = (QString::fromUtf8(UTF8_LITERAL) += QLatin1String(LITERAL) P UTF8_LITERAL);
QCOMPARE(str, QString::fromUtf8(UTF8_LITERAL LITERAL UTF8_LITERAL));
#endif
#endif
}
@ -229,10 +234,13 @@ void runScenario()
ba2 += ba2 P withZero;
QCOMPARE(ba2, QByteArray(withZero + withZero + withZero));
#ifndef QT_NO_CAST_TO_ASCII
#if 0
// TODO: this relies on strings defaulting to utf8, so disable this for now.
ba = UTF8_LITERAL;
ba2 = (ba += QLatin1String(LITERAL) + QString::fromUtf8(UTF8_LITERAL));
QCOMPARE(ba2, ba);
QCOMPARE(ba, QByteArray(UTF8_LITERAL LITERAL UTF8_LITERAL));
#endif
#endif
}

View File

@ -2124,11 +2124,10 @@ void tst_Collections::qstring()
QVERIFY(s.toAscii().isNull());
s = "ascii";
s += (uchar) 0xb0;
s += QChar((uchar) 0xb0);
QVERIFY(s.toUtf8() != s.toLatin1());
QString sa = s.toLatin1().constData();
QVERIFY(sa[sa.length()-1] == (ushort) 0xb0);
QVERIFY(sa.left(sa.length()-1) == "ascii");
QCOMPARE(s[s.length()-1].unicode(), (ushort)0xb0);
QVERIFY(s.left(s.length()-1) == "ascii");
QVERIFY(s == QString::fromUtf8(s.toUtf8().constData()));