Remove util/publicSuffix
This program was replaced by psl-make-dafsa. Task-number: QTBUG-95889 Change-Id: If52d92734ad362364c6250473281886e1ea5545d Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
This commit is contained in:
parent
cba40055b1
commit
7a58ca2d3a
util/publicSuffix
@ -1,211 +0,0 @@
|
|||||||
/****************************************************************************
|
|
||||||
**
|
|
||||||
** Copyright (C) 2022 The Qt Company Ltd.
|
|
||||||
** Contact: https://www.qt.io/licensing/
|
|
||||||
**
|
|
||||||
** This file is part of the utils of the Qt Toolkit.
|
|
||||||
**
|
|
||||||
** $QT_BEGIN_LICENSE:GPL-EXCEPT$
|
|
||||||
** Commercial License Usage
|
|
||||||
** Licensees holding valid commercial Qt licenses may use this file in
|
|
||||||
** accordance with the commercial license agreement provided with the
|
|
||||||
** Software or, alternatively, in accordance with the terms contained in
|
|
||||||
** a written agreement between you and The Qt Company. For licensing terms
|
|
||||||
** and conditions see https://www.qt.io/terms-conditions. For further
|
|
||||||
** information use the contact form at https://www.qt.io/contact-us.
|
|
||||||
**
|
|
||||||
** GNU General Public License Usage
|
|
||||||
** Alternatively, this file may be used under the terms of the GNU
|
|
||||||
** General Public License version 3 as published by the Free Software
|
|
||||||
** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
|
|
||||||
** included in the packaging of this file. Please review the following
|
|
||||||
** information to ensure the GNU General Public License requirements will
|
|
||||||
** be met: https://www.gnu.org/licenses/gpl-3.0.html.
|
|
||||||
**
|
|
||||||
** $QT_END_LICENSE$
|
|
||||||
**
|
|
||||||
****************************************************************************/
|
|
||||||
|
|
||||||
#include <QtCore>
|
|
||||||
|
|
||||||
const QString quadQuote = QStringLiteral("\"\""); // Closes one string, opens a new one.
|
|
||||||
|
|
||||||
static QString utf8encode(const QByteArray &array) // turns e.g. tranøy.no to tran\xc3\xb8y.no
|
|
||||||
{
|
|
||||||
const auto isHexChar = [](char c) {
|
|
||||||
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
|
|
||||||
};
|
|
||||||
QString result;
|
|
||||||
result.reserve(array.length() + array.length() / 3);
|
|
||||||
bool wasHex = false;
|
|
||||||
for (int i = 0; i < array.length(); ++i) {
|
|
||||||
char c = array.at(i);
|
|
||||||
// if char is non-ascii, escape it
|
|
||||||
if (c < 0x20 || uchar(c) >= 0x7f) {
|
|
||||||
result += "\\x" + QString::number(uchar(c), 16);
|
|
||||||
wasHex = true;
|
|
||||||
} else {
|
|
||||||
// if previous char was escaped, we need to make sure the next char is not
|
|
||||||
// interpreted as part of the hex value, e.g. "äc.com" -> "\xabc.com"; this
|
|
||||||
// should be "\xab""c.com"
|
|
||||||
if (wasHex && isHexChar(c))
|
|
||||||
result += quadQuote;
|
|
||||||
result += c;
|
|
||||||
wasHex = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Q_ASSERT(array.isEmpty() == result.isEmpty());
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Digest public suffix data into efficiently-searchable form.
|
|
||||||
|
|
||||||
Takes the public suffix list (see usage message), a list of DNS domains
|
|
||||||
whose child domains should not be presumed to trust one another, and
|
|
||||||
converts it to a form that lets qtbase/src/network/kernel/qtldurl.cpp's query
|
|
||||||
functions find entries efficiently.
|
|
||||||
|
|
||||||
Each line of the suffix file (aside from comments and blanks) gives a suffix
|
|
||||||
(starting with a dot) with an optional prefix of '*' (to include every
|
|
||||||
immediate child) or of '!' (to exclude the suffix, e.g. from a '*' line for
|
|
||||||
a tail of it). A line with neither of these prefixes is an exact match.
|
|
||||||
|
|
||||||
Each line is hashed and the hash is reduced modulo the number of lines
|
|
||||||
(tldCount); lines are grouped by reduced hash and separated by '\0' bytes
|
|
||||||
within each group. Conceptually, the groups are then emitted to a single
|
|
||||||
huge string, along with a table (tldIndices[tldCount]) of indices into that
|
|
||||||
string of the starts of the the various groups.
|
|
||||||
|
|
||||||
However, that huge string would exceed the 64k limit at least one compiler
|
|
||||||
imposes on a single string literal, so we actually split up the huge string
|
|
||||||
into an array of chunks, each less than 64k in size. Each group is written
|
|
||||||
to a single chunk (so we start a new chunk if the next group would take the
|
|
||||||
present chunk over the limit). There are tldChunkCount chunks; their lengths
|
|
||||||
are saved in tldChunks[tldChunkCount]; the chunks themselves in
|
|
||||||
tldData[tldChunkCount]. See qtldurl.cpp's containsTLDEntry() for how to
|
|
||||||
search for a string in the resulting data.
|
|
||||||
*/
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
QCoreApplication app(argc, argv);
|
|
||||||
if (argc < 3) {
|
|
||||||
printf("\nUsage: ./%s inputFile outputFile\n\n", argv[0]);
|
|
||||||
printf("'inputFile' should be a list of effective TLDs, one per line,\n");
|
|
||||||
printf("as obtained from http://publicsuffix.org/. To create indices and data\n");
|
|
||||||
printf("file, do the following:\n\n");
|
|
||||||
printf(" wget https://publicsuffix.org/list/public_suffix_list.dat -O suffixes.dat\n");
|
|
||||||
printf(" ./%s suffixes.dat public_suffix_list.cpp\n\n", argv[0]);
|
|
||||||
printf("Then replace the code in qtbase/src/network/kernel/qurltlds_p.h\n"
|
|
||||||
"with public_suffix_list.cpp's contents\n\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
QFile file(argv[1]);
|
|
||||||
if (!file.open(QIODevice::ReadOnly)) {
|
|
||||||
fprintf(stderr, "Failed to open input file (%s); see %s -usage", argv[1], argv[0]);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
QFile outFile(argv[2]);
|
|
||||||
if (!outFile.open(QIODevice::WriteOnly)) {
|
|
||||||
file.close();
|
|
||||||
fprintf(stderr, "Failed to open output file (%s); see %s -usage", argv[2], argv[0]);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write tldData[] and tldIndices[] in one scan of the (input) file, but
|
|
||||||
// buffer tldData[] so we don'te interleave them in the outFile.
|
|
||||||
QByteArray outDataBufferBA;
|
|
||||||
QBuffer outDataBuffer(&outDataBufferBA);
|
|
||||||
outDataBuffer.open(QIODevice::WriteOnly);
|
|
||||||
|
|
||||||
int lineCount = 0;
|
|
||||||
while (!file.atEnd()) {
|
|
||||||
QString st = QString::fromUtf8(file.readLine()).trimmed();
|
|
||||||
if (st.isEmpty() || st.startsWith(u"//"))
|
|
||||||
continue;
|
|
||||||
lineCount++;
|
|
||||||
}
|
|
||||||
outFile.write("static const quint16 tldCount = ");
|
|
||||||
outFile.write(QByteArray::number(lineCount));
|
|
||||||
outFile.write(";\n");
|
|
||||||
|
|
||||||
file.reset();
|
|
||||||
QStringList strings(lineCount);
|
|
||||||
while (!file.atEnd()) {
|
|
||||||
QString st = QString::fromUtf8(file.readLine()).trimmed();
|
|
||||||
if (st.isEmpty() || st.startsWith(u"//"))
|
|
||||||
continue;
|
|
||||||
int num = qt_hash(st) % lineCount;
|
|
||||||
QString &entry = strings[num];
|
|
||||||
st = utf8encode(st.toUtf8());
|
|
||||||
|
|
||||||
// For domain 1.com, we could get something like a.com\01.com, which
|
|
||||||
// would be misinterpreted as octal 01, so we need to separate such
|
|
||||||
// strings with quotes:
|
|
||||||
if (!entry.isEmpty() && st.at(0).isDigit())
|
|
||||||
entry.append(quadQuote);
|
|
||||||
|
|
||||||
entry.append(st);
|
|
||||||
entry.append("\\0");
|
|
||||||
}
|
|
||||||
|
|
||||||
outFile.write("// After the tldCount \"real\" entries in tldIndices, include a final entry\n");
|
|
||||||
outFile.write("// that records the sum of the lengths of all the chunks, i.e. the index\n");
|
|
||||||
outFile.write("// just past the end of tldChunks.\n");
|
|
||||||
outFile.write("static constexpr quint32 tldIndices[tldCount + 1] = {\n");
|
|
||||||
outDataBuffer.write("static const char * const tldData[tldChunkCount] = {");
|
|
||||||
|
|
||||||
int totalUtf8Size = 0;
|
|
||||||
int chunkSize = 0; // strlen of the current chunk (sizeof is bigger by 1)
|
|
||||||
QStringList chunks;
|
|
||||||
for (int a = 0; a < lineCount; a++) {
|
|
||||||
outFile.write(QByteArray::number(totalUtf8Size));
|
|
||||||
outFile.write(",\n");
|
|
||||||
const QString &entry = strings.at(a);
|
|
||||||
if (!entry.isEmpty()) {
|
|
||||||
const int zeroCount = entry.count(QLatin1String("\\0"));
|
|
||||||
const int utf8CharsCount = entry.count(QLatin1String("\\x"));
|
|
||||||
const int quoteCount = entry.count('"');
|
|
||||||
const int stringUtf8Size = entry.count() - (zeroCount + quoteCount + utf8CharsCount * 3);
|
|
||||||
chunkSize += stringUtf8Size;
|
|
||||||
// MSVC 2015 chokes if sizeof(a single string) > 0xffff
|
|
||||||
if (chunkSize >= 0xffff) {
|
|
||||||
static int chunkCount = 0;
|
|
||||||
qWarning() << "chunk" << ++chunkCount << "has length" << chunkSize - stringUtf8Size;
|
|
||||||
outDataBuffer.write(",\n");
|
|
||||||
chunks.append(QString::number(totalUtf8Size));
|
|
||||||
chunkSize = 0;
|
|
||||||
}
|
|
||||||
totalUtf8Size += stringUtf8Size;
|
|
||||||
|
|
||||||
outDataBuffer.write("\n\"");
|
|
||||||
outDataBuffer.write(entry.toUtf8());
|
|
||||||
outDataBuffer.write("\"");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
chunks.append(QString::number(totalUtf8Size));
|
|
||||||
|
|
||||||
// Write one extra entry, at tldIndices[tldCount], that contains the total size.
|
|
||||||
outFile.write(QByteArray::number(totalUtf8Size));
|
|
||||||
outFile.write("\n};\n");
|
|
||||||
|
|
||||||
outDataBuffer.write("\n};\n");
|
|
||||||
outDataBuffer.close();
|
|
||||||
|
|
||||||
// First we have to define tldChunkCount.
|
|
||||||
outFile.write("\nstatic const quint16 tldChunkCount = ");
|
|
||||||
outFile.write(QByteArray::number(chunks.count()));
|
|
||||||
outFile.write(";\n");
|
|
||||||
|
|
||||||
// Write tldData[tldChunkCount] = {...}.
|
|
||||||
outFile.write(outDataBufferBA);
|
|
||||||
|
|
||||||
outFile.write("static constexpr quint32 tldChunks[tldChunkCount] = {");
|
|
||||||
outFile.write(chunks.join(", ").toLatin1());
|
|
||||||
outFile.write("};\n");
|
|
||||||
outFile.close();
|
|
||||||
printf("Data generated to %s - now revise qtbase/src/network/kernel/qurltlds_p.h to use this data.\n", argv[2]);
|
|
||||||
return 0;
|
|
||||||
}
|
|
@ -1,4 +0,0 @@
|
|||||||
QT = core
|
|
||||||
CONFIG += cmdline
|
|
||||||
|
|
||||||
SOURCES += main.cpp
|
|
Loading…
Reference in New Issue
Block a user