QStringList: use local storage in removeDuplicates()

If available, use a C++17 std::pmr::unordered_set with a monotonic
buffer resource and a 256-byte stack buffer to avoid the per-element
allocations of QSet.

Results on my machine:

 RESULT : tst_QStringList::removeDuplicates():"empty":
-     0.00014 msecs per iteration (total: 74, iterations: 524288)
+     0.000031 msecs per iteration (total: 66, iterations: 2097152)
 RESULT : tst_QStringList::removeDuplicates():"short-dup-0.00":
-     0.00043 msecs per iteration (total: 57, iterations: 131072)
+     0.00013 msecs per iteration (total: 69, iterations: 524288)
 RESULT : tst_QStringList::removeDuplicates():"short-dup-0.50":
-     0.00049 msecs per iteration (total: 65, iterations: 131072)
+     0.00032 msecs per iteration (total: 85, iterations: 262144)
 RESULT : tst_QStringList::removeDuplicates():"short-dup-0.66":
-     0.00057 msecs per iteration (total: 75, iterations: 131072)
+     0.00039 msecs per iteration (total: 52, iterations: 131072)
 RESULT : tst_QStringList::removeDuplicates():"short-dup-0.75":
-     0.00064 msecs per iteration (total: 85, iterations: 131072)
+     0.00048 msecs per iteration (total: 63, iterations: 131072)
 RESULT : tst_QStringList::removeDuplicates():"long-dup-0.00":
-     0.083 msecs per iteration (total: 85, iterations: 1024)
+     0.039 msecs per iteration (total: 80, iterations: 2048)
 RESULT : tst_QStringList::removeDuplicates():"long-dup-0.50":
-     0.11 msecs per iteration (total: 58, iterations: 512)
+     0.078 msecs per iteration (total: 80, iterations: 1024)
 RESULT : tst_QStringList::removeDuplicates():"long-dup-0.66":
-     0.13 msecs per iteration (total: 70, iterations: 512)
+     0.10 msecs per iteration (total: 53, iterations: 512)
 RESULT : tst_QStringList::removeDuplicates():"long-dup-0.75":
-     0.16 msecs per iteration (total: 86, iterations: 512)
+     0.13 msecs per iteration (total: 69, iterations: 512)

When interpreting the data, take into account that each iteration
contains _also_ a deep copy of the QStringList d/t the detach from
'input'.

The pattern is used elsewhere in Qt, so I've put the class that
implements the seen set into a private header file and used in some
other places I found.

Change-Id: I1f71a82008a16d5a3818f91f290ade21d837805e
Reviewed-by: Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
This commit is contained in:
Marc Mutz 2019-12-18 15:43:24 +01:00
parent bf330a8f03
commit f21a6d409e
8 changed files with 153 additions and 16 deletions

View File

@ -43,9 +43,9 @@
#if QT_CONFIG(regularexpression)
# include <qregularexpression.h>
#endif
#include <private/qduplicatetracker_p.h>
#include <algorithm>
QT_BEGIN_NAMESPACE
/*! \typedef QStringListIterator
@ -885,15 +885,13 @@ int QtPrivate::QStringList_removeDuplicates(QStringList *that)
{
int n = that->size();
int j = 0;
QSet<QString> seen;
QDuplicateTracker<QString> seen;
seen.reserve(n);
int setSize = 0;
for (int i = 0; i < n; ++i) {
const QString &s = that->at(i);
seen.insert(s);
if (setSize == seen.size()) // unchanged size => was already seen
if (seen.hasSeen(s))
continue;
++setSize;
if (j != i)
that->swapItemsAt(i, j);
++j;

View File

@ -0,0 +1,94 @@
/****************************************************************************
**
** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QDUPLICATETRACKER_P_H
#define QDUPLICATETRACKER_P_H
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists purely as an
// implementation detail. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
#include <qglobal.h>
#if QT_HAS_INCLUDE(<memory_resource>) && __cplusplus > 201402L
# include <unordered_set>
# include <memory_resource>
#else
# include <qset.h>
#endif
QT_BEGIN_NAMESPACE
template <typename T, size_t Prealloc = 32>
class QDuplicateTracker {
#ifdef __cpp_lib_memory_resource
char buffer[Prealloc * sizeof(T)];
std::pmr::monotonic_buffer_resource res{buffer, sizeof buffer};
std::pmr::unordered_set<T> set{&res};
#else
QSet<T> set;
int setSize = 0;
#endif
Q_DISABLE_COPY_MOVE(QDuplicateTracker);
public:
QDuplicateTracker() = default;
void reserve(int n) { set.reserve(n); }
Q_REQUIRED_RESULT bool hasSeen(const T &s)
{
bool inserted;
#ifdef __cpp_lib_memory_resource
inserted = set.insert(s).second;
#else
set.insert(s);
const int n = set.size();
inserted = qExchange(setSize, n) != n;
#endif
return !inserted;
}
};
QT_END_NAMESPACE
#endif /* QDUPLICATETRACKER_P_H */

View File

@ -12,6 +12,7 @@ HEADERS += \
tools/qcontainerfwd.h \
tools/qcontainertools_impl.h \
tools/qcryptographichash.h \
tools/qduplicatetracker_p.h \
tools/qfreelist_p.h \
tools/qhash.h \
tools/qhashfunctions.h \

View File

@ -56,6 +56,8 @@
#include <QtGui/qguiapplication.h>
#include <QtCore/private/qduplicatetracker_p.h>
#include <fontconfig/fontconfig.h>
#if FC_VERSION >= 20402
#include <fontconfig/fcfreetype.h>
@ -778,9 +780,9 @@ QStringList QFontconfigDatabase::fallbacksForFamily(const QString &family, QFont
FcPatternDestroy(pattern);
if (fontSet) {
QSet<QString> duplicates;
QDuplicateTracker<QString> duplicates;
duplicates.reserve(fontSet->nfont + 1);
duplicates.insert(family.toCaseFolded());
(void)duplicates.hasSeen(family.toCaseFolded());
for (int i = 0; i < fontSet->nfont; i++) {
FcChar8 *value = nullptr;
if (FcPatternGetString(fontSet->fonts[i], FC_FAMILY, 0, &value) != FcResultMatch)
@ -788,9 +790,8 @@ QStringList QFontconfigDatabase::fallbacksForFamily(const QString &family, QFont
// capitalize(value);
const QString familyName = QString::fromUtf8((const char *)value);
const QString familyNameCF = familyName.toCaseFolded();
if (!duplicates.contains(familyNameCF)) {
if (!duplicates.hasSeen(familyNameCF)) {
fallbackFamilies << familyName;
duplicates.insert(familyNameCF);
}
}
FcFontSetDestroy(fontSet);

View File

@ -48,6 +48,8 @@
#include <private/qwidgetitemdata_p.h>
#include <private/qtreewidgetitemiterator_p.h>
#include <QtCore/private/qduplicatetracker_p.h>
#include <algorithm>
QT_BEGIN_NAMESPACE
@ -3175,13 +3177,12 @@ QList<QTreeWidgetItem*> QTreeWidget::selectedItems() const
const QModelIndexList indexes = selectionModel()->selectedIndexes();
QList<QTreeWidgetItem*> items;
items.reserve(indexes.count());
QSet<QTreeWidgetItem *> seen;
QDuplicateTracker<QTreeWidgetItem *> seen;
seen.reserve(indexes.count());
for (const auto &index : indexes) {
QTreeWidgetItem *item = d->item(index);
if (item->isHidden() || seen.contains(item))
if (item->isHidden() || seen.hasSeen(item))
continue;
seen.insert(item);
items.append(item);
}
return items;

View File

@ -60,6 +60,9 @@
#include <qshareddata.h>
#include <qdebug.h>
#include <qxmlstream.h>
#include <private/qduplicatetracker_p.h>
#include <stdio.h>
QT_BEGIN_NAMESPACE
@ -4081,10 +4084,10 @@ void QDomElementPrivate::save(QTextStream& s, int depth, int indent) const
}
s << '<' << qName << nsDecl;
QSet<QString> outputtedPrefixes;
/* Write out attributes. */
if (!m_attr->map.isEmpty()) {
QDuplicateTracker<QString> outputtedPrefixes;
QHash<QString, QDomNodePrivate *>::const_iterator it = m_attr->map.constBegin();
for (; it != m_attr->map.constEnd(); ++it) {
s << ' ';
@ -4105,9 +4108,8 @@ void QDomElementPrivate::save(QTextStream& s, int depth, int indent) const
* arrive in those situations. */
if((!it.value()->ownerNode ||
it.value()->ownerNode->prefix != it.value()->prefix) &&
!outputtedPrefixes.contains(it.value()->prefix)) {
!outputtedPrefixes.hasSeen(it.value()->prefix)) {
s << " xmlns:" << it.value()->prefix << "=\"" << encodeText(it.value()->namespaceURI, s, true, true) << '\"';
outputtedPrefixes.insert(it.value()->prefix);
}
}
}

View File

@ -41,6 +41,9 @@ private slots:
void join() const;
void join_data() const;
void removeDuplicates() const;
void removeDuplicates_data() const;
void split_qlist_qbytearray() const;
void split_qlist_qbytearray_data() const { return split_data(); }
@ -116,6 +119,42 @@ void tst_QStringList::join_data() const
<< QString();
}
void tst_QStringList::removeDuplicates() const
{
QFETCH(const QStringList, input);
QBENCHMARK {
auto copy = input;
copy.removeDuplicates();
}
}
void tst_QStringList::removeDuplicates_data() const
{
QTest::addColumn<QStringList>("input");
const QStringList s = {"one", "two", "three"};
QTest::addRow("empty") << QStringList();
QTest::addRow("short-dup-0.00") << s;
QTest::addRow("short-dup-0.50") << (s + s);
QTest::addRow("short-dup-0.66") << (s + s + s);
QTest::addRow("short-dup-0.75") << (s + s + s + s);
const QStringList l = []() {
QStringList result;
const int n = 1000;
result.reserve(n);
for (int i = 0; i < n; ++i)
result.push_back(QString::number(i));
return result;
}();
QTest::addRow("long-dup-0.00") << l;
QTest::addRow("long-dup-0.50") << (l + l);
QTest::addRow("long-dup-0.66") << (l + l + l);
QTest::addRow("long-dup-0.75") << (l + l + l + l);
}
void tst_QStringList::split_data() const
{
QTest::addColumn<QString>("input");

View File

@ -1,5 +1,6 @@
TARGET = tst_bench_qstringlist
CONFIG -= debug
CONFIG += release
CONFIG += benchmark
QT = core testlib
SOURCES += main.cpp