QUrl: Implement UTS #46
UTS #46 (https://unicode.org/reports/tr46/) is a successor to IDNA 2003/2008 standards from Unicode. The current implementation uses nontransitional processing by default. An optional argument is added to QUrl::toAce() and QUrl::fromAce() to allow using transitional processing and to ignore the IDN whitelist. [ChangeLog][QtCore][QUrl] ACE processing is now performed according to the UTS #46 standard based on IDNA 2008 instead of IDNA 2003. Task-number: QTBUG-85371 Change-Id: I46b2e86792bc9699cb6961bae8e283fbff72f874 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
parent
f5360b7c72
commit
4bf3010378
@ -389,6 +389,25 @@
|
||||
\sa fromUserInput()
|
||||
*/
|
||||
|
||||
/*!
|
||||
\enum QUrl::AceProcessingOption
|
||||
\since 6.3
|
||||
|
||||
The ACE processing options control the way URLs are transformed to and from
|
||||
ASCII-Compatible Encoding.
|
||||
|
||||
\value IgnoreIDNWhitelist Ignore the IDN whitelist when converting URLs
|
||||
to Unicode.
|
||||
\value AceTransitionalProcessing Use transitional processing described in UTS #46.
|
||||
This allows better compatibility with IDNA 2003
|
||||
specification.
|
||||
|
||||
The default is to use nontransitional processing and to allow non-ASCII
|
||||
characters only inside URLs whose top-level domains are listed in the IDN whitelist.
|
||||
|
||||
\sa toAce(), fromAce(), idnWhitelist()
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QUrl::QUrl(QUrl &&other)
|
||||
|
||||
@ -1177,7 +1196,7 @@ inline void QUrlPrivate::appendHost(QString &appendTo, QUrl::FormattingOptions o
|
||||
// this is either an IPv4Address or a reg-name
|
||||
// if it is a reg-name, it is already stored in Unicode form
|
||||
if (options & QUrl::EncodeUnicode && !(options & 0x4000000))
|
||||
appendTo += qt_ACE_do(host, ToAceOnly, AllowLeadingDot);
|
||||
appendTo += qt_ACE_do(host, ToAceOnly, AllowLeadingDot, {});
|
||||
else
|
||||
appendTo += host;
|
||||
}
|
||||
@ -1339,7 +1358,7 @@ inline bool QUrlPrivate::setHost(const QString &value, int from, int iend, QUrl:
|
||||
// Unicode encoding (some non-ASCII characters case-fold to digits
|
||||
// when nameprepping is done)
|
||||
//
|
||||
// The qt_ACE_do function below applies nameprepping and the STD3 check.
|
||||
// The qt_ACE_do function below does IDNA normalization and the STD3 check.
|
||||
// That means a Unicode string may become an IPv4 address, but it cannot
|
||||
// produce a '[' or a '%'.
|
||||
|
||||
@ -1358,7 +1377,7 @@ inline bool QUrlPrivate::setHost(const QString &value, int from, int iend, QUrl:
|
||||
return setHost(s, 0, s.length(), QUrl::StrictMode);
|
||||
}
|
||||
|
||||
s = qt_ACE_do(QStringView(begin, len), NormalizeAce, ForbidLeadingDot);
|
||||
s = qt_ACE_do(value.mid(from, iend - from), NormalizeAce, ForbidLeadingDot, {});
|
||||
if (s.isEmpty()) {
|
||||
setError(InvalidRegNameError, value);
|
||||
return false;
|
||||
@ -3013,50 +3032,72 @@ QByteArray QUrl::toPercentEncoding(const QString &input, const QByteArray &exclu
|
||||
}
|
||||
|
||||
/*!
|
||||
\since 4.2
|
||||
\since 6.3
|
||||
|
||||
Returns the Unicode form of the given domain name
|
||||
\a domain, which is encoded in the ASCII Compatible Encoding (ACE).
|
||||
The output can be customized by passing flags with \a options.
|
||||
The result of this function is considered equivalent to \a domain.
|
||||
|
||||
If the value in \a domain cannot be encoded, it will be converted
|
||||
to QString and returned.
|
||||
|
||||
The ASCII Compatible Encoding (ACE) is defined by RFC 3490, RFC 3491
|
||||
and RFC 3492. It is part of the Internationalizing Domain Names in
|
||||
Applications (IDNA) specification, which allows for domain names
|
||||
(like \c "example.com") to be written using international
|
||||
characters.
|
||||
The ASCII-Compatible Encoding (ACE) is defined by RFC 3490, RFC 3491
|
||||
and RFC 3492 and updated by the Unicode Technical Standard #46. It is part
|
||||
of the Internationalizing Domain Names in Applications (IDNA) specification,
|
||||
which allows for domain names (like \c "example.com") to be written using
|
||||
non-US-ASCII characters.
|
||||
*/
|
||||
QString QUrl::fromAce(const QByteArray &domain, QUrl::AceProcessingOptions options)
|
||||
{
|
||||
return qt_ACE_do(QString::fromLatin1(domain), NormalizeAce,
|
||||
ForbidLeadingDot /*FIXME: make configurable*/, options);
|
||||
}
|
||||
|
||||
#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
|
||||
/*!
|
||||
\since 4.2
|
||||
\overload
|
||||
*/
|
||||
QString QUrl::fromAce(const QByteArray &domain)
|
||||
{
|
||||
QVarLengthArray<char16_t> buffer;
|
||||
buffer.resize(domain.size());
|
||||
qt_from_latin1(buffer.data(), domain.data(), domain.size());
|
||||
return qt_ACE_do(QStringView{buffer.data(), buffer.size()},
|
||||
NormalizeAce, ForbidLeadingDot /*FIXME: make configurable*/);
|
||||
return fromAce(domain, {});
|
||||
}
|
||||
#endif
|
||||
|
||||
/*!
|
||||
\since 4.2
|
||||
\since 6.3
|
||||
|
||||
Returns the ASCII Compatible Encoding of the given domain name \a domain.
|
||||
The output can be customized by passing flags with \a options.
|
||||
The result of this function is considered equivalent to \a domain.
|
||||
|
||||
The ASCII-Compatible Encoding (ACE) is defined by RFC 3490, RFC 3491
|
||||
and RFC 3492. It is part of the Internationalizing Domain Names in
|
||||
Applications (IDNA) specification, which allows for domain names
|
||||
(like \c "example.com") to be written using international
|
||||
characters.
|
||||
and RFC 3492 and updated by the Unicode Technical Standard #46. It is part
|
||||
of the Internationalizing Domain Names in Applications (IDNA) specification,
|
||||
which allows for domain names (like \c "example.com") to be written using
|
||||
non-US-ASCII characters.
|
||||
|
||||
This function returns an empty QByteArray if \a domain is not a valid
|
||||
hostname. Note, in particular, that IPv6 literals are not valid domain
|
||||
names.
|
||||
*/
|
||||
QByteArray QUrl::toAce(const QString &domain, AceProcessingOptions options)
|
||||
{
|
||||
return qt_ACE_do(domain, ToAceOnly, ForbidLeadingDot /*FIXME: make configurable*/, options)
|
||||
.toLatin1();
|
||||
}
|
||||
|
||||
#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
|
||||
/*!
|
||||
\since 4.2
|
||||
\overload
|
||||
*/
|
||||
QByteArray QUrl::toAce(const QString &domain)
|
||||
{
|
||||
return qt_ACE_do(domain, ToAceOnly, ForbidLeadingDot /*FIXME: make configurable*/).toLatin1();
|
||||
return toAce(domain, {});
|
||||
}
|
||||
#endif
|
||||
|
||||
/*!
|
||||
\internal
|
||||
|
@ -281,8 +281,22 @@ public:
|
||||
NSURL *toNSURL() const Q_DECL_NS_RETURNS_AUTORELEASED;
|
||||
#endif
|
||||
|
||||
enum AceProcessingOption : unsigned int {
|
||||
IgnoreIDNWhitelist = 0x1,
|
||||
AceTransitionalProcessing = 0x2,
|
||||
};
|
||||
Q_DECLARE_FLAGS(AceProcessingOptions, AceProcessingOption)
|
||||
|
||||
#if QT_VERSION >= QT_VERSION_CHECK(7, 0, 0)
|
||||
static QString fromAce(const QByteArray &, AceProcessingOptions options = {});
|
||||
static QByteArray toAce(const QString &, AceProcessingOptions options = {});
|
||||
#else
|
||||
static QString fromAce(const QByteArray &);
|
||||
static QByteArray toAce(const QString &);
|
||||
static QString fromAce(const QByteArray &, AceProcessingOptions options);
|
||||
static QByteArray toAce(const QString &, AceProcessingOptions options);
|
||||
#endif
|
||||
|
||||
static QStringList idnWhitelist();
|
||||
static QStringList toStringList(const QList<QUrl> &uris, FormattingOptions options = FormattingOptions(PrettyDecoded));
|
||||
static QList<QUrl> fromStringList(const QStringList &uris, ParsingMode mode = TolerantMode);
|
||||
@ -302,6 +316,7 @@ public:
|
||||
Q_DECLARE_SHARED(QUrl)
|
||||
Q_DECLARE_OPERATORS_FOR_FLAGS(QUrl::ComponentFormattingOptions)
|
||||
//Q_DECLARE_OPERATORS_FOR_FLAGS(QUrl::FormattingOptions)
|
||||
Q_DECLARE_OPERATORS_FOR_FLAGS(QUrl::AceProcessingOptions)
|
||||
|
||||
#ifndef Q_QDOC
|
||||
constexpr inline QUrl::FormattingOptions operator|(QUrl::UrlFormattingOption f1, QUrl::UrlFormattingOption f2)
|
||||
|
@ -65,7 +65,8 @@ extern Q_AUTOTEST_EXPORT qsizetype qt_urlRecode(QString &appendTo, QStringView u
|
||||
// in qurlidna.cpp
|
||||
enum AceLeadingDot { AllowLeadingDot, ForbidLeadingDot };
|
||||
enum AceOperation { ToAceOnly, NormalizeAce };
|
||||
extern QString qt_ACE_do(QStringView domain, AceOperation op, AceLeadingDot dot);
|
||||
extern QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot,
|
||||
QUrl::AceProcessingOptions options);
|
||||
extern Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output);
|
||||
extern Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user