QStringView: De-inline the length calculation so we can use SSE2

Performance is more important in this case than the theoretical benefit
of constexpr. This commit implements the SSE2 search for 16-bit null and
it might be possible to implement the equivalent for AArch64
(investigation required). It also adds a fallback to wcslen() for
systems where wchar_t is short (non-x86 Windows or 32-bit x86 build with
-no-sse2).

We can re-add the constexpr loop once the C++ language has a way of
overloading constexpr and non-constexpr. GCC has a non-standard way to
do that with __builtin_constant_p, which is also implemented in this
commit, but note that the inline function is still not constexpr.

Change-Id: I6e9274c1e7444ad48c81fffd14dcaacafda5ebdc
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Thiago Macieira 2017-08-20 14:22:44 -07:00
parent ad8a48e8f1
commit 3b61cd6ad7
4 changed files with 96 additions and 10 deletions

View File

@ -72,6 +72,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <wchar.h>
#include "qchar.cpp"
#include "qstringmatcher.cpp"
@ -159,6 +160,43 @@ static inline bool qt_ends_with(QStringView haystack, QStringView needle, Qt::Ca
static inline bool qt_ends_with(QStringView haystack, QLatin1String needle, Qt::CaseSensitivity cs);
static inline bool qt_ends_with(QStringView haystack, QChar needle, Qt::CaseSensitivity cs);
qssize_t qustrlen(const ushort *str) Q_DECL_NOTHROW
{
qssize_t result = 0;
#ifdef __SSE2__
// progress until we get an aligned pointer
const ushort *ptr = str;
while (*ptr && quintptr(ptr) % 16)
++ptr;
if (*ptr == 0)
return ptr - str;
// load 16 bytes and see if we have a null
// (aligned loads can never segfault)
int mask;
const __m128i zeroes = _mm_setzero_si128();
do {
__m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
ptr += 8;
__m128i comparison = _mm_cmpeq_epi16(data, zeroes);
mask = _mm_movemask_epi8(comparison);
} while (mask == 0);
// found a null
uint idx = qCountTrailingZeroBits(quint32(mask));
return ptr - str - 8 + idx / 2;
#endif
if (sizeof(wchar_t) == sizeof(ushort))
return wcslen(reinterpret_cast<const wchar_t *>(str));
while (*str++)
++result;
return result;
}
#if defined(Q_COMPILER_LAMBDA) && !defined(__OPTIMIZE_SIZE__)
namespace {
template <uint MaxCount> struct UnrollTailLoop

View File

@ -53,6 +53,8 @@ class QLatin1String;
class QStringView;
template <typename T> class QVector;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION qssize_t qustrlen(const ushort *str) Q_DECL_NOTHROW;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int qCompareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) Q_DECL_NOTHROW;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int qCompareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) Q_DECL_NOTHROW;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int qCompareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) Q_DECL_NOTHROW;

View File

@ -143,20 +143,22 @@ private:
{
return qssize_t(N - 1);
}
template <typename Char>
static Q_DECL_RELAXED_CONSTEXPR qssize_t lengthHelperPointer(const Char *str) Q_DECL_NOTHROW
static qssize_t lengthHelperPointer(const Char *str) Q_DECL_NOTHROW
{
qssize_t result = 0;
while (*str++)
++result;
return result;
#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL)
if (__builtin_constant_p(*str)) {
qssize_t result = 0;
while (*str++)
++result;
}
#endif
return qustrlen(reinterpret_cast<const ushort *>(str));
}
static Q_DECL_RELAXED_CONSTEXPR qssize_t lengthHelperPointer(const QChar *str) Q_DECL_NOTHROW
static qssize_t lengthHelperPointer(const QChar *str) Q_DECL_NOTHROW
{
qssize_t result = 0;
while (!str++->isNull())
++result;
return result;
return qustrlen(reinterpret_cast<const ushort *>(str));
}
template <typename Char>

View File

@ -136,6 +136,7 @@ class tst_QStringView : public QObject
private Q_SLOTS:
void constExpr() const;
void basics() const;
void literals() const;
void at() const;
void fromQString() const;
@ -305,6 +306,12 @@ void tst_QStringView::constExpr() const
Q_STATIC_ASSERT(!sv2.isNull());
Q_STATIC_ASSERT(!sv2.empty());
Q_STATIC_ASSERT(sv2.size() == 5);
constexpr char16_t *null = nullptr;
constexpr QStringView sv3(null);
Q_STATIC_ASSERT(sv3.isNull());
Q_STATIC_ASSERT(sv3.isEmpty());
Q_STATIC_ASSERT(sv3.size() == 0);
}
#else // storage_type is wchar_t
{
@ -328,6 +335,12 @@ void tst_QStringView::constExpr() const
Q_STATIC_ASSERT(!sv2.isNull());
Q_STATIC_ASSERT(!sv2.empty());
Q_STATIC_ASSERT(sv2.size() == 5);
constexpr wchar_t *null = nullptr;
constexpr QStringView sv3(null);
Q_STATIC_ASSERT(sv3.isNull());
Q_STATIC_ASSERT(sv3.isEmpty());
Q_STATIC_ASSERT(sv3.size() == 0);
}
#endif
#endif
@ -348,6 +361,37 @@ void tst_QStringView::basics() const
QVERIFY(!(sv2 != sv1));
}
void tst_QStringView::literals() const
{
#if !defined(Q_OS_WIN) || defined(Q_COMPILER_UNICODE_STRINGS)
// the + ensures it's a pointer, not an array
QCOMPARE(QStringView(+u"Hello").size(), 5);
QStringView sv = u"Hello";
#else // storage_type is wchar_t
// the + ensures it's a pointer, not an array
QCOMPARE(QStringView(+L"Hello").size(), 5);
QStringView sv = L"Hello";
#endif
QCOMPARE(sv.size(), 5);
QVERIFY(!sv.empty());
QVERIFY(!sv.isEmpty());
QVERIFY(!sv.isNull());
QCOMPARE(*sv.utf16(), 'H');
QCOMPARE(sv[0], QLatin1Char('H'));
QCOMPARE(sv.at(0), QLatin1Char('H'));
QCOMPARE(sv.front(), QLatin1Char('H'));
QCOMPARE(sv.first(), QLatin1Char('H'));
QCOMPARE(sv[4], QLatin1Char('o'));
QCOMPARE(sv.at(4), QLatin1Char('o'));
QCOMPARE(sv.back(), QLatin1Char('o'));
QCOMPARE(sv.last(), QLatin1Char('o'));
QStringView sv2(sv.utf16(), sv.utf16() + sv.size());
QVERIFY(!sv2.isNull());
QVERIFY(!sv2.empty());
QCOMPARE(sv2.size(), 5);
}
void tst_QStringView::at() const
{
QString hello("Hello");