QByteArray: fix isUpper/isLower

577d698b8e changed QString::isUpper /
isLower behaviors to match Unicode semantics: a string is uppercase
if it's identical to its own toLower/toUpper folding. These semantics
come from Unicode so they're not up for debate.

That commit however left QByteArray untouched. Generally speaking, we
want to move away from QByteArray as "text storage" -- this has
partially happened between Qt 5 and Qt 6, where QByteArray went from
Latin-1 semantics to ASCII semantics. Still, QByteArray offers
toUpper/toLower and isUpper/isLower and all this family of functions
should be consistent in behavior.

Apply the same fix that was applied to QString.

[ChangeLog][Important Behavior Changes] The semantics of
QByteArray::isLower() and QByteArray::isUpper() have been changed. Now
lowercase (resp. uppercase) byte arrays are allowed to contain any
character; a byte array is considered lowercase (resp. uppercase) if
it's equal to its own toLower() (resp. toUpper()) folding. For instance,
the "abc123" byte array is now considered to be lowercase.
Previously, the isLower() (resp. isUpper()) functions checked whether
the byte array only contained ASCII lowercase (resp. uppercase)
characters, and was at least 1 character long. This had the side effect
that byte array containing ASCII non-letters (e.g. numbers, symbols,
etc.) were not lowercase nor uppercase.

[ChangeLog][QtCore][QByteArray] QByteArray::isLower() and
QByteArray::isUpper() now work correctly with empty byte arrays. The
semantics of these functions have been changed.

Pick-to: 6.3 6.2
Fixes: QTBUG-100107
Change-Id: Id56a42f01b2d1af5387bf0e6ccff0f824f757155
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Giuseppe D'Angelo 2022-01-24 00:38:27 +01:00
parent 32caaacee3
commit 2d95b75345
2 changed files with 44 additions and 48 deletions

View File

@ -64,6 +64,8 @@
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#include <algorithm>
#define IS_RAW_DATA(d) ((d)->flags() & QArrayData::RawDataType) #define IS_RAW_DATA(d) ((d)->flags() & QArrayData::RawDataType)
QT_BEGIN_NAMESPACE QT_BEGIN_NAMESPACE
@ -2743,28 +2745,6 @@ static constexpr inline bool isUpperCaseAscii(char c)
return c >= 'A' && c <= 'Z'; return c >= 'A' && c <= 'Z';
} }
/*!
Returns \c true if this byte array contains only ASCII uppercase letters,
otherwise returns \c false.
\since 5.12
\sa isLower(), toUpper()
*/
bool QByteArray::isUpper() const
{
if (isEmpty())
return false;
const char *d = data();
for (qsizetype i = 0, max = size(); i < max; ++i) {
if (!isUpperCaseAscii(d[i]))
return false;
}
return true;
}
/* /*
Returns true if \a c is an lowercase ASCII letter. Returns true if \a c is an lowercase ASCII letter.
*/ */
@ -2774,25 +2754,35 @@ static constexpr inline bool isLowerCaseAscii(char c)
} }
/*! /*!
Returns \c true if this byte array contains only lowercase ASCII letters, Returns \c true if this byte array is uppercase, that is, if
otherwise returns \c false. it's identical to its toUpper() folding.
Note that this does \e not mean that the byte array only contains
uppercase letters; only that it contains no ASCII lowercase letters.
\since 5.12
\sa isLower(), toUpper()
*/
bool QByteArray::isUpper() const
{
return std::none_of(begin(), end(), isLowerCaseAscii);
}
/*!
Returns \c true if this byte array is lowercase, that is, if
it's identical to its toLower() folding.
Note that this does \e not mean that the byte array only contains
lowercase letters; only that it contains no ASCII uppercase letters.
\since 5.12 \since 5.12
\sa isUpper(), toLower() \sa isUpper(), toLower()
*/ */
bool QByteArray::isLower() const bool QByteArray::isLower() const
{ {
if (isEmpty()) return std::none_of(begin(), end(), isUpperCaseAscii);
return false;
const char *d = data();
for (qsizetype i = 0, max = size(); i < max; ++i) {
if (!isLowerCaseAscii(d[i]))
return false;
}
return true;
} }
/*! /*!

View File

@ -2234,10 +2234,16 @@ void tst_QByteArray::toUpperLower()
QFETCH(QByteArray, input); QFETCH(QByteArray, input);
QFETCH(QByteArray, upper); QFETCH(QByteArray, upper);
QFETCH(QByteArray, lower); QFETCH(QByteArray, lower);
QVERIFY(upper.isUpper());
QVERIFY(lower.isLower());
QCOMPARE(lower.toLower(), lower); QCOMPARE(lower.toLower(), lower);
QVERIFY(lower.toLower().isLower());
QCOMPARE(upper.toUpper(), upper); QCOMPARE(upper.toUpper(), upper);
QVERIFY(upper.toUpper().isUpper());
QCOMPARE(input.toUpper(), upper); QCOMPARE(input.toUpper(), upper);
QVERIFY(input.toUpper().isUpper());
QCOMPARE(input.toLower(), lower); QCOMPARE(input.toLower(), lower);
QVERIFY(input.toLower().isLower());
QByteArray copy = input; QByteArray copy = input;
QCOMPARE(std::move(copy).toUpper(), upper); QCOMPARE(std::move(copy).toUpper(), upper);
@ -2266,12 +2272,12 @@ void tst_QByteArray::toUpperLower()
void tst_QByteArray::isUpper() void tst_QByteArray::isUpper()
{ {
QVERIFY(!QByteArray().isUpper()); QVERIFY(QByteArray().isUpper());
QVERIFY(!QByteArray("").isUpper()); QVERIFY(QByteArray("").isUpper());
QVERIFY(QByteArray("TEXT").isUpper()); QVERIFY(QByteArray("TEXT").isUpper());
QVERIFY(!QByteArray("\xD0\xDE").isUpper()); // non-ASCII is neither upper nor lower QVERIFY(QByteArray("\xD0\xDE").isUpper());
QVERIFY(!QByteArray("\xD7").isUpper()); QVERIFY(QByteArray("\xD7").isUpper());
QVERIFY(!QByteArray("\xDF").isUpper()); QVERIFY(QByteArray("\xDF").isUpper());
QVERIFY(!QByteArray("text").isUpper()); QVERIFY(!QByteArray("text").isUpper());
QVERIFY(!QByteArray("Text").isUpper()); QVERIFY(!QByteArray("Text").isUpper());
QVERIFY(!QByteArray("tExt").isUpper()); QVERIFY(!QByteArray("tExt").isUpper());
@ -2281,19 +2287,19 @@ void tst_QByteArray::isUpper()
QVERIFY(!QByteArray("teXT").isUpper()); QVERIFY(!QByteArray("teXT").isUpper());
QVERIFY(!QByteArray("tEXt").isUpper()); QVERIFY(!QByteArray("tEXt").isUpper());
QVERIFY(!QByteArray("tExT").isUpper()); QVERIFY(!QByteArray("tExT").isUpper());
QVERIFY(!QByteArray("@ABYZ[").isUpper()); QVERIFY(QByteArray("@ABYZ[").isUpper());
QVERIFY(!QByteArray("@abyz[").isUpper()); QVERIFY(!QByteArray("@abyz[").isUpper());
QVERIFY(!QByteArray("`ABYZ{").isUpper()); QVERIFY(QByteArray("`ABYZ{").isUpper());
QVERIFY(!QByteArray("`abyz{").isUpper()); QVERIFY(!QByteArray("`abyz{").isUpper());
} }
void tst_QByteArray::isLower() void tst_QByteArray::isLower()
{ {
QVERIFY(!QByteArray().isLower()); QVERIFY(QByteArray().isLower());
QVERIFY(!QByteArray("").isLower()); QVERIFY(QByteArray("").isLower());
QVERIFY(QByteArray("text").isLower()); QVERIFY(QByteArray("text").isLower());
QVERIFY(!QByteArray("\xE0\xFF").isLower()); // non-ASCII is neither upper nor lower QVERIFY(QByteArray("\xE0\xFF").isLower());
QVERIFY(!QByteArray("\xF7").isLower()); QVERIFY(QByteArray("\xF7").isLower());
QVERIFY(!QByteArray("Text").isLower()); QVERIFY(!QByteArray("Text").isLower());
QVERIFY(!QByteArray("tExt").isLower()); QVERIFY(!QByteArray("tExt").isLower());
QVERIFY(!QByteArray("teXt").isLower()); QVERIFY(!QByteArray("teXt").isLower());
@ -2304,9 +2310,9 @@ void tst_QByteArray::isLower()
QVERIFY(!QByteArray("tExT").isLower()); QVERIFY(!QByteArray("tExT").isLower());
QVERIFY(!QByteArray("TEXT").isLower()); QVERIFY(!QByteArray("TEXT").isLower());
QVERIFY(!QByteArray("@ABYZ[").isLower()); QVERIFY(!QByteArray("@ABYZ[").isLower());
QVERIFY(!QByteArray("@abyz[").isLower()); QVERIFY(QByteArray("@abyz[").isLower());
QVERIFY(!QByteArray("`ABYZ{").isLower()); QVERIFY(!QByteArray("`ABYZ{").isLower());
QVERIFY(!QByteArray("`abyz{").isLower()); QVERIFY(QByteArray("`abyz{").isLower());
} }
void tst_QByteArray::macTypes() void tst_QByteArray::macTypes()