Make qYieldCpu() public API
Rewritten to be a bit simpler, added a few more yield/YieldProcessor alternatives, added RISC-V support. [ChangeLog][QtCore] Added qYieldCpu() function. Fixes: QTBUG-103014 Change-Id: I53335f845a1345299031fffd176f59032e7400f5 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
This commit is contained in:
parent
aaa8c38353
commit
a7f227f56c
@ -3,8 +3,6 @@
|
||||
|
||||
#include "qtconcurrentthreadengine.h"
|
||||
|
||||
#include <QtCore/private/qsimd_p.h>
|
||||
|
||||
#if !defined(QT_NO_CONCURRENT) || defined(Q_QDOC)
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
@ -251,6 +251,7 @@ qt_internal_add_module(Core
|
||||
thread/qthreadstorage.h
|
||||
thread/qtsan_impl.h
|
||||
thread/qwaitcondition.h thread/qwaitcondition_p.h
|
||||
thread/qyieldcpu.h
|
||||
time/qcalendar.cpp time/qcalendar.h
|
||||
time/qcalendarbackend_p.h
|
||||
time/qcalendarmath_p.h
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include "qdebug.h"
|
||||
#include "qmutex.h"
|
||||
#include <QtCore/private/qlocking_p.h>
|
||||
#include <QtCore/private/qsimd_p.h>
|
||||
#include "qloggingcategory.h"
|
||||
#ifndef QT_BOOTSTRAPPED
|
||||
#include "qelapsedtimer.h"
|
||||
|
@ -378,49 +378,6 @@ static inline uint64_t qCpuFeatures()
|
||||
#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \
|
||||
|| ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature))
|
||||
|
||||
/*
|
||||
Small wrapper around x86's PAUSE and ARM's YIELD instructions.
|
||||
|
||||
This is completely different from QThread::yieldCurrentThread(), which is
|
||||
an OS-level operation that takes the whole thread off the CPU.
|
||||
|
||||
This is just preventing one SMT thread from filling a core's pipeline with
|
||||
speculated further loop iterations (which need to be expensively flushed on
|
||||
final success) when it could just give those pipeline slots to a second SMT
|
||||
thread that can do something useful with the core, such as unblocking this
|
||||
SMT thread :)
|
||||
|
||||
So, instead of
|
||||
|
||||
while (!condition)
|
||||
;
|
||||
|
||||
it's better to use
|
||||
|
||||
while (!condition)
|
||||
qYieldCpu();
|
||||
*/
|
||||
static inline void qYieldCpu()
|
||||
{
|
||||
#if defined(Q_PROCESSOR_X86)
|
||||
_mm_pause();
|
||||
#elif defined(Q_PROCESSOR_ARM) && Q_PROCESSOR_ARM >= 7 /* yield was added in ARMv7 */
|
||||
# if __has_builtin(__builtin_arm_yield) /* e.g. Clang */
|
||||
__builtin_arm_yield();
|
||||
# elif defined(Q_OS_INTEGRITY) || defined(Q_CC_GNU_ONLY)
|
||||
/*
|
||||
- Integrity is missing the arm_acle.h header
|
||||
- GCC doesn't have __yield() in arm_acle.h
|
||||
https://stackoverflow.com/a/70076751/134841
|
||||
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105416
|
||||
*/
|
||||
asm volatile("yield"); /* this works everywhere */
|
||||
# else
|
||||
__yield(); /* this is what should work everywhere */
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define QATOMIC_CXX11_H
|
||||
|
||||
#include <QtCore/qgenericatomic.h>
|
||||
#include <QtCore/qyieldcpu.h>
|
||||
#include <atomic>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <QtCore/qcoreapplication.h>
|
||||
#include <QtCore/qthread.h>
|
||||
#include <QtCore/qvarlengtharray.h>
|
||||
#include <QtCore/private/qsimd_p.h> // for qYieldCpu()
|
||||
#include <private/qthreadpool_p.h>
|
||||
#include <private/qobject_p.h>
|
||||
|
||||
|
66
src/corelib/thread/qyieldcpu.h
Normal file
66
src/corelib/thread/qyieldcpu.h
Normal file
@ -0,0 +1,66 @@
|
||||
// Copyright (C) 2023 The Qt Company Ltd.
|
||||
// Copyright (C) 2023 Intel Corporation.
|
||||
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
|
||||
|
||||
#ifndef QYIELDCPU_H
|
||||
#define QYIELDCPU_H
|
||||
|
||||
#include <QtCore/qcompilerdetection.h>
|
||||
#include <QtCore/qprocessordetection.h>
|
||||
#include <QtCore/qtconfigmacros.h>
|
||||
|
||||
#ifdef Q_CC_MSVC_ONLY
|
||||
// MSVC defines _YIELD_PROCESSOR() in <xatomic.h>, but as that is a private
|
||||
// header, we include the public ones
|
||||
# ifdef __cplusplus
|
||||
# include <atomic>
|
||||
extern "C"
|
||||
# endif
|
||||
void _mm_pause(void); // the compiler recognizes as intrinsic
|
||||
#endif
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
#ifdef Q_CC_GNU
|
||||
__attribute__((artificial))
|
||||
#endif
|
||||
Q_ALWAYS_INLINE void qYieldCpu(void) Q_DECL_NOEXCEPT;
|
||||
|
||||
void qYieldCpu(void)
|
||||
#ifdef __cplusplus
|
||||
noexcept
|
||||
#endif
|
||||
{
|
||||
#if __has_builtin(__yield)
|
||||
__yield(); // Generic
|
||||
#elif defined(_YIELD_PROCESSOR) && defined(Q_CC_MSVC)
|
||||
_YIELD_PROCESSOR(); // Generic; MSVC's <atomic>
|
||||
|
||||
#elif __has_builtin(__builtin_ia32_pause)
|
||||
__builtin_ia32_pause();
|
||||
#elif defined(Q_PROCESSOR_X86) && defined(Q_CC_GNU)
|
||||
// GCC < 10 didn't have __has_builtin()
|
||||
__builtin_ia32_pause();
|
||||
#elif defined(Q_PROCESSOR_X86) && defined(Q_CC_MSVC)
|
||||
_mm_pause();
|
||||
#elif defined(Q_PROCESSOR_X86)
|
||||
asm("pause"); // hopefully asm() works in this compiler
|
||||
|
||||
#elif __has_builtin(__builtin_arm_yield)
|
||||
__builtin_arm_yield();
|
||||
#elif defined(Q_PROCESSOR_ARM) && Q_PROCESSOR_ARM >= 7
|
||||
asm("yield"); // this works everywhere
|
||||
|
||||
#elif __has_builtin(__builtin_riscv_pause)
|
||||
__builtin_riscv_pause(); // Zihintpause extension
|
||||
#elif defined(Q_PROCESSOR_RISCV)
|
||||
asm("fence w, 0"); // a.k.a. "pause"
|
||||
|
||||
#elif defined(_YIELD_PROCESSOR) && defined(Q_CC_GHS)
|
||||
_YIELD_PROCESSOR; // Green Hills (INTEGRITY), but only on ARM
|
||||
#endif
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QYIELDCPU_H
|
59
src/corelib/thread/qyieldcpu.qdoc
Normal file
59
src/corelib/thread/qyieldcpu.qdoc
Normal file
@ -0,0 +1,59 @@
|
||||
// Copyright (C) 2023 The Qt Company Ltd.
|
||||
// Copyright (C) 2023 Intel Corporation.
|
||||
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
|
||||
|
||||
/*!
|
||||
\fn qYieldCpu()
|
||||
\inmodule QtCore
|
||||
\ingroup thread
|
||||
\relates QAtomicInteger
|
||||
\relatesalso QAtomicPointer
|
||||
\since 6.7
|
||||
|
||||
Pauses the execution of the current thread for an unspecified time, using
|
||||
hardware instructions, without de-scheduling this thread. This function is
|
||||
meant to be used in high-throughput loops where the code expects another
|
||||
thread to modify an atomic variable. This is completely different from
|
||||
QThread::yieldCurrentThread(), which is an OS-level operation that may take
|
||||
the whole thread off the CPU and allow other threads (possibly belonging to
|
||||
other processes) to run.
|
||||
|
||||
So, instead of
|
||||
\code
|
||||
while (!condition)
|
||||
;
|
||||
\endcode
|
||||
|
||||
one should write
|
||||
\code
|
||||
while (!condition)
|
||||
qYieldCpu();
|
||||
\endcode
|
||||
|
||||
This is useful both with and without hardware multithreading on the same
|
||||
core. In the case of hardware threads, it serves to prevent further
|
||||
speculative execution filling up the pipeline, which could starve the
|
||||
sibling thread of resources. Across cores and higher levels of separation,
|
||||
it allows the cache coherency protocol to allocate the cache line being
|
||||
modified and inspected to the logical processor whose result this code is
|
||||
expecting.
|
||||
|
||||
It is also recommended to loop around code that does not modify the global
|
||||
variable, to avoid contention in exclusively obtaining the memory location.
|
||||
Therefore, an atomic modification loop such as a spinlock acquisition
|
||||
should be:
|
||||
|
||||
\code
|
||||
while (true) {
|
||||
while (!readOnlyCondition(atomic))
|
||||
qYieldCpu();
|
||||
if (modify(atomic))
|
||||
break;
|
||||
}
|
||||
\endcode
|
||||
|
||||
On x86 processors and on RISC-V processors with the \c{Zihintpause}
|
||||
extension, this will emit the \c PAUSE instruction, which is ignored on
|
||||
processors that don't support it; on ARMv7 or later ARM processors, it will
|
||||
emit the \c{YIELD} instruction.
|
||||
*/
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include <QtCore/qglobal.h>
|
||||
#include <QtCore/qtversion.h>
|
||||
#include <QtCore/qyieldcpu.h>
|
||||
|
||||
#ifdef Q_COMPILER_THREAD_LOCAL
|
||||
# include <threads.h>
|
||||
@ -62,6 +63,12 @@ const char *tst_qVersion()
|
||||
#endif
|
||||
}
|
||||
|
||||
void tst_qYieldCpu(void) Q_DECL_NOEXCEPT;
|
||||
void tst_qYieldCpu(void)
|
||||
{
|
||||
qYieldCpu();
|
||||
}
|
||||
|
||||
/* Static assertion */
|
||||
Q_STATIC_ASSERT(true);
|
||||
Q_STATIC_ASSERT(1);
|
||||
|
Loading…
Reference in New Issue
Block a user