QChar: add fromUcs{2,4}()

The fromUcs2() named ctor is designed to replace all the non-char
integral-type constructors of QChar which make it very hard to control
the implicit QChar conversions, which have caused a few bugs in Qt
itself. As a classical named contructor, it simply returns QChar.

The fromUcs4() named "ctor", however, needs to expand surrogate pairs,
and thus can't just return QChar. Instead, it returns a small struct
that contains one or two char16_t's, can be iterated over and be
implicitly converted to QStringView.  To avoid bikeshedding the name
(FromUcs4Result, of course :), it's defined inline and thus can't be
named outside the function. This function replaces most uses of
QChar::requiresSurrogates() in QtBase.

[ChangeLog][QtCore][QChar] Added fromUcs2(), fromUcs4().

Change-Id: I803708c14001040f75cb599e33c24a3fb8d2579c
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Marc Mutz 2020-04-29 11:36:19 +02:00
parent 19e7c0d2b5
commit 928d57d8da
4 changed files with 104 additions and 32 deletions

View File

@ -710,6 +710,46 @@ QT_BEGIN_NAMESPACE
Constructs a QChar for the character with Unicode code point \a code. Constructs a QChar for the character with Unicode code point \a code.
*/ */
/*!
\fn static QChar QChar::fromUcs2(char16_t c)
\since 6.0
Constructs a QChar from UTF-16 character \a c.
\sa fromUcs4()
*/
/*!
\fn static auto QChar::fromUcs4(char32_t c)
\since 6.0
Returns an anonymous struct that
\list
\li contains a \c{char16_t chars[2]} array,
\li can be implicitly converted to a QStringView, and
\li iterated over with a C++11 ranged for loop.
\endlist
If \a c requires surrogates, \c{chars[0]} contains the high surrogate
and \c{chars[1]} the low surrogate, and the QStringView has size 2.
Otherwise, \c{chars[0]} contains \a c and \c{chars[1]} is
\l{QChar::isNull}{null}, and the QStringView has size 1.
This allows easy use of the result:
\code
QString s;
s += QChar::fromUcs4(ch);
\endcode
\code
for (char16_t c16 : QChar::fromUcs4(ch))
use(c16);
\endcode
\sa fromUcs2(), requiresSurrogates()
*/
/*! /*!
\fn bool QChar::isNull() const \fn bool QChar::isNull() const
@ -1564,12 +1604,10 @@ static FullConvertCaseResult fullConvertCase(char32_t uc, QUnicodeTables::Case w
auto length = *specialCase++; auto length = *specialCase++;
while (length--) while (length--)
*pp++ = *specialCase++; *pp++ = *specialCase++;
} else if (Q_UNLIKELY(QChar::requiresSurrogates(uc))) {
// so far, case convertion never changes planes (guaranteed by the qunicodetables generator)
*pp++ = QChar::highSurrogate(uc);
*pp++ = QChar::lowSurrogate(uc + caseDiff);
} else { } else {
*pp++ = uc + caseDiff; // so far, case convertion never changes planes (guaranteed by the qunicodetables generator)
for (char16_t c : QChar::fromUcs4(uc + caseDiff))
*pp++ = c;
} }
return result; return result;
} }
@ -2002,14 +2040,10 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, int from)
stcode = ligature; stcode = ligature;
QChar *d = s.data(); QChar *d = s.data();
// ligatureHelper() never changes planes // ligatureHelper() never changes planes
if (QChar::requiresSurrogates(ligature)) { int j = 0;
d[starter] = QChar(QChar::highSurrogate(ligature)); for (QChar ch : QChar::fromUcs4(ligature))
d[starter + 1] = QChar(QChar::lowSurrogate(ligature)); d[starter + j++] = ch;
s.remove(i, 2); s.remove(i, j);
} else {
d[starter] = QChar(ligature);
s.remove(i, 1);
}
continue; continue;
} }
} }
@ -2079,18 +2113,10 @@ static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, in
QChar *uc = s.data(); QChar *uc = s.data();
int p = pos; int p = pos;
// exchange characters // exchange characters
if (!QChar::requiresSurrogates(u2)) { for (QChar ch : QChar::fromUcs4(u2))
uc[p++] = QChar(u2); uc[p++] = ch;
} else { for (QChar ch : QChar::fromUcs4(u1))
uc[p++] = QChar(QChar::highSurrogate(u2)); uc[p++] = ch;
uc[p++] = QChar(QChar::lowSurrogate(u2));
}
if (!QChar::requiresSurrogates(u1)) {
uc[p++] = QChar(u1);
} else {
uc[p++] = QChar(QChar::highSurrogate(u1));
uc[p++] = QChar(QChar::lowSurrogate(u1));
}
if (pos > 0) if (pos > 0)
--pos; --pos;
if (pos > 0 && s.at(pos).isLowSurrogate()) if (pos > 0 && s.at(pos).isLowSurrogate())

View File

@ -120,6 +120,10 @@ public:
QT_ASCII_CAST_WARN Q_DECL_CONSTEXPR QChar(uchar c) noexcept : ucs(c) { } QT_ASCII_CAST_WARN Q_DECL_CONSTEXPR QChar(uchar c) noexcept : ucs(c) { }
#endif #endif
#endif #endif
static constexpr QChar fromUcs2(char16_t c) noexcept { return QChar{c}; }
static constexpr inline auto fromUcs4(char32_t c) noexcept;
// Unicode information // Unicode information
enum Category enum Category
@ -680,3 +684,5 @@ struct hash<QT_PREPEND_NAMESPACE(QChar)>
} // namespace std } // namespace std
#endif // QCHAR_H #endif // QCHAR_H
#include <QtCore/qstringview.h> // for QChar::fromUcs4() definition

View File

@ -351,6 +351,22 @@ template <typename QStringLike, typename std::enable_if<
inline QStringView qToStringViewIgnoringNull(const QStringLike &s) noexcept inline QStringView qToStringViewIgnoringNull(const QStringLike &s) noexcept
{ return QStringView(s.data(), s.size()); } { return QStringView(s.data(), s.size()); }
// QChar inline functions:
Q_REQUIRED_RESULT constexpr auto QChar::fromUcs4(char32_t c) noexcept
{
struct R {
char16_t chars[2];
Q_REQUIRED_RESULT constexpr operator QStringView() const noexcept { return {begin(), end()}; }
Q_REQUIRED_RESULT constexpr qsizetype size() const noexcept { return chars[1] ? 2 : 1; }
Q_REQUIRED_RESULT constexpr const char16_t *begin() const noexcept { return chars; }
Q_REQUIRED_RESULT constexpr const char16_t *end() const noexcept { return begin() + size(); }
};
return requiresSurrogates(c) ? R{{QChar::highSurrogate(c),
QChar::lowSurrogate(c)}} :
R{{char16_t(c), u'\0'}} ;
}
QT_END_NAMESPACE QT_END_NAMESPACE
#endif /* QSTRINGVIEW_H */ #endif /* QSTRINGVIEW_H */

View File

@ -37,6 +37,8 @@ class tst_QChar : public QObject
Q_OBJECT Q_OBJECT
private slots: private slots:
void fromChar16_t(); void fromChar16_t();
void fromUcs4_data();
void fromUcs4();
void fromWchar_t(); void fromWchar_t();
void operator_eqeq_null(); void operator_eqeq_null();
void operators_data(); void operators_data();
@ -89,6 +91,34 @@ void tst_QChar::fromChar16_t()
#endif #endif
} }
void tst_QChar::fromUcs4_data()
{
QTest::addColumn<uint>("ucs4");
auto row = [](uint ucs4) {
QTest::addRow("0x%08X", ucs4) << ucs4;
};
row(0x2f868);
row(0x1D157);
row(0x1D157);
}
void tst_QChar::fromUcs4()
{
QFETCH(const uint, ucs4);
const auto result = QChar::fromUcs4(ucs4);
if (QChar::requiresSurrogates(ucs4)) {
QCOMPARE(result.chars[0], QChar::highSurrogate(ucs4));
QCOMPARE(result.chars[1], QChar::lowSurrogate(ucs4));
QCOMPARE(QStringView{result}.size(), 2);
} else {
QCOMPARE(result.chars[0], ucs4);
QCOMPARE(result.chars[1], 0u);
QCOMPARE(QStringView{result}.size(), 1);
}
}
void tst_QChar::fromWchar_t() void tst_QChar::fromWchar_t()
{ {
#if defined(Q_OS_WIN) #if defined(Q_OS_WIN)
@ -835,13 +865,7 @@ void tst_QChar::normalization_data()
for (int j = 0; j < c.size(); ++j) { for (int j = 0; j < c.size(); ++j) {
bool ok; bool ok;
uint uc = c.at(j).toInt(&ok, 16); uint uc = c.at(j).toInt(&ok, 16);
if (!QChar::requiresSurrogates(uc)) { columns[i].append(QChar::fromUcs4(uc));
columns[i].append(QChar(uc));
} else {
// convert to utf16
columns[i].append(QChar(QChar::highSurrogate(uc)));
columns[i].append(QChar(QChar::lowSurrogate(uc)));
}
} }
} }