QChar: add isSurrogate() and isNonCharacter() to the public API
+ QChar::LastValidCodePoint enum value that supercede the UNICODE_LAST_CODEPOINT macro replace uses of hardcoded values with the new API; remove leftovers Change-Id: I1395c9840b85fcb6b08e241b131794a98773c952 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
e8199b599f
commit
ba300f42bd
@ -43,7 +43,6 @@
|
||||
#include "qlist.h"
|
||||
#include "qendian.h"
|
||||
#include "qchar.h"
|
||||
#include <private/qunicodetables_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
@ -108,7 +107,7 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve
|
||||
*cursor++ = 0xc0 | ((uchar) (u >> 6));
|
||||
} else {
|
||||
// is it one of the Unicode non-characters?
|
||||
if (QUnicodeTables::isNonCharacter(u)) {
|
||||
if (QChar::isNonCharacter(u)) {
|
||||
*cursor++ = replacement;
|
||||
++ch;
|
||||
++invalid;
|
||||
@ -184,12 +183,12 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
|
||||
bool nonCharacter;
|
||||
if (!headerdone && uc == 0xfeff) {
|
||||
// don't do anything, just skip the BOM
|
||||
} else if (!(nonCharacter = QUnicodeTables::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) {
|
||||
} else if (!(nonCharacter = QChar::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
|
||||
// surrogate pair
|
||||
Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
|
||||
*qch++ = QChar::highSurrogate(uc);
|
||||
*qch++ = QChar::lowSurrogate(uc);
|
||||
} else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) {
|
||||
} else if ((uc < min_uc) || QChar::isSurrogate(uc) || nonCharacter || uc > QChar::LastValidCodePoint) {
|
||||
// error: overlong sequence, UTF16 surrogate or non-character
|
||||
*qch++ = replacement;
|
||||
++invalid;
|
||||
|
@ -285,19 +285,6 @@ static void ensureDetached(QString &result, ushort *&output, const ushort *begin
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool isUnicodeNonCharacter(uint ucs4)
|
||||
{
|
||||
// Unicode has a couple of "non-characters" that one can use internally,
|
||||
// but are not allowed to be used for text interchange.
|
||||
//
|
||||
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
|
||||
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
|
||||
// U+FDEF (inclusive)
|
||||
|
||||
return (ucs4 & 0xfffe) == 0xfffe
|
||||
|| (ucs4 - 0xfdd0U) < 16;
|
||||
}
|
||||
|
||||
// returns true if we performed an UTF-8 decoding
|
||||
static bool encodedUtf8ToUtf16(QString &result, ushort *&output, const ushort *begin, const ushort *&input,
|
||||
const ushort *end, ushort decoded)
|
||||
@ -370,7 +357,7 @@ static bool encodedUtf8ToUtf16(QString &result, ushort *&output, const ushort *b
|
||||
// we've decoded something; safety-check it
|
||||
if (uc < min_uc)
|
||||
return false;
|
||||
if (isUnicodeNonCharacter(uc) || (uc >= 0xD800 && uc <= 0xDFFF) || uc >= 0x110000)
|
||||
if (QChar::isSurrogate(uc) || QChar::isNonCharacter(uc) || uc > QChar::LastValidCodePoint)
|
||||
return false;
|
||||
|
||||
if (!QChar::requiresSurrogates(uc)) {
|
||||
|
@ -45,7 +45,6 @@
|
||||
#include <qdebug.h>
|
||||
#include "qjsonparser_p.h"
|
||||
#include "qjson_p.h"
|
||||
#include <private/qunicodetables_p.h>
|
||||
|
||||
//#define PARSER_DEBUG
|
||||
#ifdef PARSER_DEBUG
|
||||
@ -769,8 +768,8 @@ static inline bool scanUtf8Char(const char *&json, const char *end, uint *result
|
||||
uc = (uc << 6) | (ch & 0x3f);
|
||||
}
|
||||
|
||||
if (uc < min_uc || QUnicodeTables::isNonCharacter(uc) ||
|
||||
(uc >= 0xd800 && uc <= 0xdfff) || uc >= 0x110000) {
|
||||
if (uc < min_uc || QChar::isNonCharacter(uc) ||
|
||||
QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,6 @@
|
||||
|
||||
#include "qjsonwriter_p.h"
|
||||
#include "qjson_p.h"
|
||||
#include <private/qunicodetables_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
@ -140,7 +139,7 @@ static QByteArray escapedString(const QString &s)
|
||||
*cursor++ = 0xc0 | ((uchar) (u >> 6));
|
||||
} else {
|
||||
// is it one of the Unicode non-characters?
|
||||
if (QUnicodeTables::isNonCharacter(u)) {
|
||||
if (QChar::isNonCharacter(u)) {
|
||||
*cursor++ = replacement;
|
||||
++ch;
|
||||
continue;
|
||||
|
@ -377,6 +377,7 @@ QT_BEGIN_NAMESPACE
|
||||
\value ByteOrderSwapped
|
||||
\value ParagraphSeparator
|
||||
\value LineSeparator
|
||||
\value LastValidCodePoint
|
||||
*/
|
||||
|
||||
/*!
|
||||
@ -499,7 +500,7 @@ QT_BEGIN_NAMESPACE
|
||||
*/
|
||||
bool QChar::isPrint(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return false;
|
||||
const int test = FLAG(Other_Control) |
|
||||
FLAG(Other_Format) |
|
||||
@ -532,7 +533,7 @@ bool QChar::isPrint(uint ucs4)
|
||||
*/
|
||||
bool QT_FASTCALL QChar::isSpace_helper(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return false;
|
||||
const int test = FLAG(Separator_Space) |
|
||||
FLAG(Separator_Line) |
|
||||
@ -558,7 +559,7 @@ bool QT_FASTCALL QChar::isSpace_helper(uint ucs4)
|
||||
*/
|
||||
bool QChar::isMark(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return false;
|
||||
const int test = FLAG(Mark_NonSpacing) |
|
||||
FLAG(Mark_SpacingCombining) |
|
||||
@ -582,7 +583,7 @@ bool QChar::isMark(uint ucs4)
|
||||
*/
|
||||
bool QChar::isPunct(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return false;
|
||||
const int test = FLAG(Punctuation_Connector) |
|
||||
FLAG(Punctuation_Dash) |
|
||||
@ -610,7 +611,7 @@ bool QChar::isPunct(uint ucs4)
|
||||
*/
|
||||
bool QChar::isSymbol(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return false;
|
||||
const int test = FLAG(Symbol_Math) |
|
||||
FLAG(Symbol_Currency) |
|
||||
@ -640,7 +641,7 @@ bool QChar::isSymbol(uint ucs4)
|
||||
*/
|
||||
bool QT_FASTCALL QChar::isLetter_helper(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return false;
|
||||
const int test = FLAG(Letter_Uppercase) |
|
||||
FLAG(Letter_Lowercase) |
|
||||
@ -675,7 +676,7 @@ bool QT_FASTCALL QChar::isLetter_helper(uint ucs4)
|
||||
*/
|
||||
bool QT_FASTCALL QChar::isNumber_helper(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return false;
|
||||
const int test = FLAG(Number_DecimalDigit) |
|
||||
FLAG(Number_Letter) |
|
||||
@ -704,7 +705,7 @@ bool QT_FASTCALL QChar::isNumber_helper(uint ucs4)
|
||||
*/
|
||||
bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return false;
|
||||
const int test = FLAG(Letter_Uppercase) |
|
||||
FLAG(Letter_Lowercase) |
|
||||
@ -737,18 +738,55 @@ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4)
|
||||
\sa isNumber()
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn bool QChar::isNonCharacter() const
|
||||
\since 5.0
|
||||
|
||||
Returns true if the QChar is a non-character; false otherwise.
|
||||
|
||||
Unicode has a certain number of code points that are classified
|
||||
as "non-characters:" that is, they can be used for internal purposes
|
||||
in applications but cannot be used for text interchange.
|
||||
Those are the last two entries each Unicode Plane ([0xfffe..0xffff],
|
||||
[0x1fffe..0x1ffff], etc.) as well as the entries in range [0xfdd0..0xfdef].
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn bool QChar::isHighSurrogate() const
|
||||
|
||||
Returns true if the QChar is the high part of a UTF16 surrogate
|
||||
(i.e. if it's code point in range [0xd800..0xdbff]).
|
||||
(i.e. if its code point is in range [0xd800..0xdbff]); false otherwise.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn bool QChar::isLowSurrogate() const
|
||||
|
||||
Returns true if the QChar is the low part of a UTF16 surrogate
|
||||
(i.e. if it's code point in range [0xdc00..0xdfff]).
|
||||
(i.e. if its code point is in range [0xdc00..0xdfff]); false otherwise.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn bool QChar::isSurrogate() const
|
||||
\since 5.0
|
||||
|
||||
Returns true if the QChar contains a code point that is in either
|
||||
the high or the low part of the UTF-16 surrogate range
|
||||
(i.e. if its code point is in range [0xd800..0xdfff]); false otherwise.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn static bool isNonCharacter(uint ucs4)
|
||||
\overload
|
||||
\since 5.0
|
||||
|
||||
Returns true if the UCS-4-encoded character specified by \a ucs4
|
||||
is a non-character; false otherwise.
|
||||
|
||||
Unicode has a certain number of code points that are classified
|
||||
as "non-characters:" that is, they can be used for internal purposes
|
||||
in applications but cannot be used for text interchange.
|
||||
Those are the last two entries each Unicode Plane ([0xfffe..0xffff],
|
||||
[0x1fffe..0x1ffff], etc.) as well as the entries in range [0xfdd0..0xfdef].
|
||||
*/
|
||||
|
||||
/*!
|
||||
@ -757,7 +795,7 @@ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4)
|
||||
|
||||
Returns true if the UCS-4-encoded character specified by \a ucs4
|
||||
is the high part of a UTF16 surrogate
|
||||
(i.e. if it's code point in range [0xd800..0xdbff]).
|
||||
(i.e. if its code point is in range [0xd800..0xdbff]); false otherwise.
|
||||
*/
|
||||
|
||||
/*!
|
||||
@ -766,7 +804,18 @@ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4)
|
||||
|
||||
Returns true if the UCS-4-encoded character specified by \a ucs4
|
||||
is the low part of a UTF16 surrogate
|
||||
(i.e. if it's code point in range [0xdc00..0xdfff]).
|
||||
(i.e. if its code point is in range [0xdc00..0xdfff]); false otherwise.
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn static bool QChar::isSurrogate(uint ucs4)
|
||||
\overload
|
||||
\since 5.0
|
||||
|
||||
Returns true if the UCS-4-encoded character specified by \a ucs4
|
||||
contains a code point that is in either the high or the low part of the
|
||||
UTF-16 surrogate range (i.e. if its code point is in range [0xd800..0xdfff]);
|
||||
false otherwise.
|
||||
*/
|
||||
|
||||
/*!
|
||||
@ -774,7 +823,8 @@ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4)
|
||||
|
||||
Returns true if the UCS-4-encoded character specified by \a ucs4
|
||||
can be split into the high and low parts of a UTF16 surrogate
|
||||
(i.e. if it's code point is greater than or equals to 0x10000).
|
||||
(i.e. if its code point is greater than or equals to 0x10000);
|
||||
false otherwise.
|
||||
*/
|
||||
|
||||
/*!
|
||||
@ -818,7 +868,7 @@ bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4)
|
||||
*/
|
||||
int QChar::digitValue(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return -1;
|
||||
return qGetProp(ucs4)->digitValue;
|
||||
}
|
||||
@ -835,7 +885,7 @@ int QChar::digitValue(uint ucs4)
|
||||
*/
|
||||
QChar::Category QChar::category(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return QChar::Other_NotAssigned;
|
||||
return (QChar::Category) qGetProp(ucs4)->category;
|
||||
}
|
||||
@ -852,7 +902,7 @@ QChar::Category QChar::category(uint ucs4)
|
||||
*/
|
||||
QChar::Direction QChar::direction(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return QChar::DirL;
|
||||
return (QChar::Direction) qGetProp(ucs4)->direction;
|
||||
}
|
||||
@ -871,7 +921,7 @@ QChar::Direction QChar::direction(uint ucs4)
|
||||
*/
|
||||
QChar::Joining QChar::joining(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return QChar::OtherJoining;
|
||||
return (QChar::Joining) qGetProp(ucs4)->joining;
|
||||
}
|
||||
@ -900,7 +950,7 @@ QChar::Joining QChar::joining(uint ucs4)
|
||||
*/
|
||||
bool QChar::hasMirrored(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return false;
|
||||
return qGetProp(ucs4)->mirrorDiff != 0;
|
||||
}
|
||||
@ -950,7 +1000,7 @@ bool QChar::hasMirrored(uint ucs4)
|
||||
*/
|
||||
uint QChar::mirroredChar(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return ucs4;
|
||||
return ucs4 + qGetProp(ucs4)->mirrorDiff;
|
||||
}
|
||||
@ -1060,7 +1110,7 @@ QChar::Decomposition QChar::decompositionTag(uint ucs4)
|
||||
*/
|
||||
unsigned char QChar::combiningClass(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return 0;
|
||||
return (unsigned char) qGetProp(ucs4)->combiningClass;
|
||||
}
|
||||
@ -1078,7 +1128,7 @@ unsigned char QChar::combiningClass(uint ucs4)
|
||||
*/
|
||||
QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return QChar::Unicode_Unassigned;
|
||||
return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
|
||||
}
|
||||
@ -1155,7 +1205,7 @@ static inline T toCaseFolded_helper(T uc)
|
||||
*/
|
||||
uint QChar::toLower(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return ucs4;
|
||||
return toLowerCase_helper<uint>(ucs4);
|
||||
}
|
||||
@ -1175,7 +1225,7 @@ uint QChar::toLower(uint ucs4)
|
||||
*/
|
||||
uint QChar::toUpper(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return ucs4;
|
||||
return toUpperCase_helper<uint>(ucs4);
|
||||
}
|
||||
@ -1195,7 +1245,7 @@ uint QChar::toUpper(uint ucs4)
|
||||
*/
|
||||
uint QChar::toTitleCase(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return ucs4;
|
||||
return toTitleCase_helper<uint>(ucs4);
|
||||
}
|
||||
@ -1236,7 +1286,7 @@ static inline ushort foldCase(ushort ch)
|
||||
*/
|
||||
uint QChar::toCaseFolded(uint ucs4)
|
||||
{
|
||||
if (ucs4 > UNICODE_LAST_CODEPOINT)
|
||||
if (ucs4 > LastValidCodePoint)
|
||||
return ucs4;
|
||||
return toCaseFolded_helper<uint>(ucs4);
|
||||
}
|
||||
|
@ -73,7 +73,8 @@ public:
|
||||
ByteOrderMark = 0xfeff,
|
||||
ByteOrderSwapped = 0xfffe,
|
||||
ParagraphSeparator = 0x2029,
|
||||
LineSeparator = 0x2028
|
||||
LineSeparator = 0x2028,
|
||||
LastValidCodePoint = 0x10ffff
|
||||
};
|
||||
|
||||
Q_DECL_CONSTEXPR QChar() : ucs(0) {}
|
||||
@ -245,24 +246,28 @@ public:
|
||||
inline bool isUpper() const { return QChar::isUpper(ucs); }
|
||||
inline bool isTitleCase() const { return QChar::isTitleCase(ucs); }
|
||||
|
||||
inline bool isHighSurrogate() const {
|
||||
return ((ucs & 0xfc00) == 0xd800);
|
||||
}
|
||||
inline bool isLowSurrogate() const {
|
||||
return ((ucs & 0xfc00) == 0xdc00);
|
||||
}
|
||||
inline bool isNonCharacter() const { return QChar::isNonCharacter(ucs); }
|
||||
inline bool isHighSurrogate() const { return QChar::isHighSurrogate(ucs); }
|
||||
inline bool isLowSurrogate() const { return QChar::isLowSurrogate(ucs); }
|
||||
inline bool isSurrogate() const { return QChar::isSurrogate(ucs); }
|
||||
|
||||
inline uchar cell() const { return uchar(ucs & 0xff); }
|
||||
inline uchar row() const { return uchar((ucs>>8)&0xff); }
|
||||
inline void setCell(uchar cell);
|
||||
inline void setRow(uchar row);
|
||||
|
||||
static inline bool isNonCharacter(uint ucs4) {
|
||||
return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe);
|
||||
}
|
||||
static inline bool isHighSurrogate(uint ucs4) {
|
||||
return ((ucs4 & 0xfffffc00) == 0xd800);
|
||||
}
|
||||
static inline bool isLowSurrogate(uint ucs4) {
|
||||
return ((ucs4 & 0xfffffc00) == 0xdc00);
|
||||
}
|
||||
static inline bool isSurrogate(uint ucs4) {
|
||||
return (ucs4 - 0xd800u < 2048u);
|
||||
}
|
||||
static inline bool requiresSurrogates(uint ucs4) {
|
||||
return (ucs4 >= 0x10000);
|
||||
}
|
||||
|
@ -61,8 +61,6 @@ QT_BEGIN_NAMESPACE
|
||||
|
||||
#define UNICODE_DATA_VERSION QChar::Unicode_5_0
|
||||
|
||||
#define UNICODE_LAST_CODEPOINT 0x10ffff
|
||||
|
||||
namespace QUnicodeTables {
|
||||
|
||||
struct Properties {
|
||||
@ -237,18 +235,6 @@ namespace QUnicodeTables {
|
||||
inline int script(QChar ch)
|
||||
{ return script(ch.unicode()); }
|
||||
|
||||
|
||||
inline bool isNonCharacter(uint ucs4)
|
||||
{
|
||||
// Noncharacter_Code_Point:
|
||||
// Unicode has a couple of "non-characters" that one can use internally,
|
||||
// but are not allowed to be used for text interchange.
|
||||
// Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF,
|
||||
// U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF
|
||||
|
||||
return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe);
|
||||
}
|
||||
|
||||
} // namespace QUnicodeTables
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
@ -986,7 +986,7 @@ bool QXmlStreamReaderPrivate::scanUntil(const char *str, short tokenToInject)
|
||||
textBuffer += QChar(c);
|
||||
continue;
|
||||
default:
|
||||
if(c < 0x20 || (c > 0xFFFD && c < 0x10000) || c > 0x10FFFF ) {
|
||||
if (c < 0x20 || (c > 0xFFFD && c < 0x10000) || c > QChar::LastValidCodePoint ) {
|
||||
raiseWellFormedError(QXmlStream::tr("Invalid XML character."));
|
||||
lineNumber = oldLineNumber;
|
||||
return false;
|
||||
@ -1718,7 +1718,7 @@ uint QXmlStreamReaderPrivate::resolveCharRef(int symbolIndex)
|
||||
s = symString(symbolIndex).toString().toUInt(&ok, 10);
|
||||
|
||||
ok &= (s == 0x9 || s == 0xa || s == 0xd || (s >= 0x20 && s <= 0xd7ff)
|
||||
|| (s >= 0xe000 && s <= 0xfffd) || (s >= 0x10000 && s <= 0x10ffff));
|
||||
|| (s >= 0xe000 && s <= 0xfffd) || (s >= 0x10000 && s <= QChar::LastValidCodePoint));
|
||||
|
||||
return ok ? s : 0;
|
||||
}
|
||||
|
@ -339,7 +339,7 @@ void tst_QChar::isUpper()
|
||||
QVERIFY(QChar(0xC2).isUpper()); // A with ^
|
||||
QVERIFY(!QChar(0xE2).isUpper()); // a with ^
|
||||
|
||||
for (uint codepoint = 0; codepoint <= UNICODE_LAST_CODEPOINT; ++codepoint) {
|
||||
for (uint codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
|
||||
if (QChar::isUpper(codepoint))
|
||||
QVERIFY(codepoint == QChar::toUpper(codepoint));
|
||||
}
|
||||
@ -355,7 +355,7 @@ void tst_QChar::isLower()
|
||||
QVERIFY(!QChar(0xC2).isLower()); // A with ^
|
||||
QVERIFY(QChar(0xE2).isLower()); // a with ^
|
||||
|
||||
for (uint codepoint = 0; codepoint <= UNICODE_LAST_CODEPOINT; ++codepoint) {
|
||||
for (uint codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
|
||||
if (QChar::isLower(codepoint))
|
||||
QVERIFY(codepoint == QChar::toLower(codepoint));
|
||||
}
|
||||
@ -363,7 +363,7 @@ void tst_QChar::isLower()
|
||||
|
||||
void tst_QChar::isTitleCase()
|
||||
{
|
||||
for (uint codepoint = 0; codepoint <= UNICODE_LAST_CODEPOINT; ++codepoint) {
|
||||
for (uint codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
|
||||
if (QChar::isTitleCase(codepoint))
|
||||
QVERIFY(codepoint == QChar::toTitleCase(codepoint));
|
||||
}
|
||||
@ -575,7 +575,7 @@ void tst_QChar::digitValue()
|
||||
QVERIFY(QChar::digitValue((uint)0x1040) == 0);
|
||||
|
||||
QVERIFY(QChar::digitValue((ushort)0xd800) == -1);
|
||||
QVERIFY(QChar::digitValue((uint)UNICODE_LAST_CODEPOINT + 1) == -1);
|
||||
QVERIFY(QChar::digitValue((uint)0x110000u) == -1);
|
||||
}
|
||||
|
||||
void tst_QChar::mirroredChar()
|
||||
|
@ -1934,19 +1934,6 @@ int fromUtf8_latin1_sse2_improved(ushort *dst, const char *chars, int len)
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool isUnicodeNonCharacter(uint ucs4)
|
||||
{
|
||||
// Unicode has a couple of "non-characters" that one can use internally,
|
||||
// but are not allowed to be used for text interchange.
|
||||
//
|
||||
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
|
||||
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
|
||||
// U+FDEF (inclusive)
|
||||
|
||||
return (ucs4 & 0xfffe) == 0xfffe
|
||||
|| (ucs4 - 0xfdd0U) < 32;
|
||||
}
|
||||
|
||||
int fromUtf8_qt47(ushort *dst, const char *chars, int len)
|
||||
{
|
||||
// this is almost the code found in Qt 4.7's qutfcodec.cpp QUtf8Codec::convertToUnicode
|
||||
@ -1996,12 +1983,12 @@ int fromUtf8_qt47(ushort *dst, const char *chars, int len)
|
||||
bool nonCharacter;
|
||||
if (!headerdone && uc == 0xfeff) {
|
||||
// don't do anything, just skip the BOM
|
||||
} else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) {
|
||||
} else if (!(nonCharacter = QChar::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
|
||||
// surrogate pair
|
||||
//Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
|
||||
*qch++ = QChar::highSurrogate(uc);
|
||||
*qch++ = QChar::lowSurrogate(uc);
|
||||
} else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) {
|
||||
} else if ((uc < min_uc) || QChar::isSurrogate(uc) || nonCharacter || uc > QChar::LastValidCodePoint) {
|
||||
// error: overlong sequence, UTF16 surrogate or non-character
|
||||
*qch++ = replacement;
|
||||
++invalid;
|
||||
@ -2102,12 +2089,12 @@ int fromUtf8_qt47_stateless(ushort *dst, const char *chars, int len)
|
||||
bool nonCharacter;
|
||||
if (!headerdone && uc == 0xfeff) {
|
||||
// don't do anything, just skip the BOM
|
||||
} else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) {
|
||||
} else if (!(nonCharacter = QChar::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
|
||||
// surrogate pair
|
||||
//Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
|
||||
*qch++ = QChar::highSurrogate(uc);
|
||||
*qch++ = QChar::lowSurrogate(uc);
|
||||
} else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) {
|
||||
} else if ((uc < min_uc) || QChar::isSurrogate(uc) || nonCharacter || uc > QChar::LastValidCodePoint) {
|
||||
// error: overlong sequence, UTF16 surrogate or non-character
|
||||
*qch++ = replacement;
|
||||
++invalid;
|
||||
@ -2227,7 +2214,7 @@ static inline void extract_utf8_multibyte(ushort *&dst, const char *&chars, qptr
|
||||
chars += 2;
|
||||
len -= 2;
|
||||
if (!trusted &&
|
||||
(ucs < 0x800 || isUnicodeNonCharacter(ucs) || (ucs >= 0xd800 && ucs <= 0xdfff)))
|
||||
(ucs < 0x800 || QChar::isNonCharacter(ucs) || QChar::isSurrogate(ucs)))
|
||||
dst[counter] = QChar::ReplacementCharacter;
|
||||
else
|
||||
dst[counter] = ucs;
|
||||
@ -2258,7 +2245,7 @@ static inline void extract_utf8_multibyte(ushort *&dst, const char *&chars, qptr
|
||||
// dst[counter] will correspond to chars[counter..counter+2], so adjust
|
||||
chars += 3;
|
||||
len -= 3;
|
||||
if (trusted || (QChar::requiresSurrogates(ucs) && ucs < 0x110000 && !isUnicodeNonCharacter(ucs))) {
|
||||
if (trusted || (QChar::requiresSurrogates(ucs) && ucs <= QChar::LastValidCodePoint && !QChar::isNonCharacter(ucs))) {
|
||||
dst[counter + 0] = QChar::highSurrogate(ucs);
|
||||
dst[counter + 1] = QChar::lowSurrogate(ucs);
|
||||
counter += 2;
|
||||
|
@ -54,9 +54,6 @@
|
||||
#define DATA_VERSION_S "5.0"
|
||||
#define DATA_VERSION_STR "QChar::Unicode_5_0"
|
||||
|
||||
#define LAST_CODEPOINT 0x10ffff
|
||||
#define LAST_CODEPOINT_STR "0x10ffff"
|
||||
|
||||
|
||||
static QHash<QByteArray, QChar::UnicodeVersion> age_map;
|
||||
|
||||
@ -417,18 +414,6 @@ static const char *methods =
|
||||
" inline int script(QChar ch)\n"
|
||||
" { return script(ch.unicode()); }\n\n";
|
||||
|
||||
static const char *generated_methods =
|
||||
" inline bool isNonCharacter(uint ucs4)\n"
|
||||
" {\n"
|
||||
" // Noncharacter_Code_Point:\n"
|
||||
" // Unicode has a couple of \"non-characters\" that one can use internally,\n"
|
||||
" // but are not allowed to be used for text interchange.\n"
|
||||
" // Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF,\n"
|
||||
" // U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF\n"
|
||||
"\n"
|
||||
" return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe);\n"
|
||||
" }\n\n";
|
||||
|
||||
static const int SizeOfPropertiesStruct = 20;
|
||||
|
||||
struct PropertyFlags {
|
||||
@ -592,8 +577,8 @@ UnicodeData &UnicodeData::valueRef(int codepoint)
|
||||
{
|
||||
static bool initialized = false;
|
||||
if (!initialized) {
|
||||
unicodeData.reserve(LAST_CODEPOINT + 1);
|
||||
for (int uc = 0; uc <= LAST_CODEPOINT; ++uc)
|
||||
unicodeData.reserve(QChar::LastValidCodePoint + 1);
|
||||
for (int uc = 0; uc <= QChar::LastValidCodePoint; ++uc)
|
||||
unicodeData.append(UnicodeData(uc));
|
||||
initialized = true;
|
||||
}
|
||||
@ -795,7 +780,7 @@ static void readUnicodeData()
|
||||
bool ok;
|
||||
int codepoint = properties[UD_Value].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
Q_ASSERT(codepoint <= LAST_CODEPOINT);
|
||||
Q_ASSERT(codepoint <= QChar::LastValidCodePoint);
|
||||
int lastCodepoint = codepoint;
|
||||
|
||||
QByteArray name = properties[UD_Name];
|
||||
@ -807,7 +792,7 @@ static void readUnicodeData()
|
||||
Q_ASSERT(properties[UD_Name].startsWith('<') && properties[UD_Name].contains("Last"));
|
||||
lastCodepoint = properties[UD_Value].toInt(&ok, 16);
|
||||
Q_ASSERT(ok);
|
||||
Q_ASSERT(lastCodepoint <= LAST_CODEPOINT);
|
||||
Q_ASSERT(lastCodepoint <= QChar::LastValidCodePoint);
|
||||
}
|
||||
|
||||
UnicodeData &data = UnicodeData::valueRef(codepoint);
|
||||
@ -1106,7 +1091,7 @@ static void readDerivedNormalizationProps()
|
||||
}
|
||||
}
|
||||
|
||||
for (int codepoint = 0; codepoint <= LAST_CODEPOINT; ++codepoint) {
|
||||
for (int codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
|
||||
UnicodeData &d = UnicodeData::valueRef(codepoint);
|
||||
if (!d.excludedComposition
|
||||
&& d.decompositionType == QChar::Canonical
|
||||
@ -1208,9 +1193,8 @@ static QList<PropertyFlags> uniqueProperties;
|
||||
static void computeUniqueProperties()
|
||||
{
|
||||
qDebug("computeUniqueProperties:");
|
||||
for (int codepoint = 0; codepoint <= LAST_CODEPOINT; ++codepoint) {
|
||||
for (int codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
|
||||
UnicodeData &d = UnicodeData::valueRef(codepoint);
|
||||
|
||||
int index = uniqueProperties.indexOf(d.p);
|
||||
if (index == -1) {
|
||||
index = uniqueProperties.size();
|
||||
@ -2898,7 +2882,6 @@ int main(int, char **)
|
||||
"#include <QtCore/qchar.h>\n\n"
|
||||
"QT_BEGIN_NAMESPACE\n\n");
|
||||
f.write("#define UNICODE_DATA_VERSION "DATA_VERSION_STR"\n\n");
|
||||
f.write("#define UNICODE_LAST_CODEPOINT "LAST_CODEPOINT_STR"\n\n");
|
||||
f.write("namespace QUnicodeTables {\n\n");
|
||||
f.write(property_string);
|
||||
f.write("\n");
|
||||
@ -2913,8 +2896,6 @@ int main(int, char **)
|
||||
f.write(line_break_class_string);
|
||||
f.write("\n");
|
||||
f.write(methods);
|
||||
f.write("\n");
|
||||
f.write(generated_methods);
|
||||
f.write("} // namespace QUnicodeTables\n\n"
|
||||
"QT_END_NAMESPACE\n\n"
|
||||
"#endif // QUNICODETABLES_P_H\n");
|
||||
|
Loading…
Reference in New Issue
Block a user