Make qYieldCpu() public API
Rewritten to be a bit simpler, added a few more yield/YieldProcessor alternatives, added RISC-V support. [ChangeLog][QtCore] Added qYieldCpu() function. Fixes: QTBUG-103014 Change-Id: I53335f845a1345299031fffd176f59032e7400f5 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
This commit is contained in:
parent
aaa8c38353
commit
a7f227f56c
@ -3,8 +3,6 @@
|
|||||||
|
|
||||||
#include "qtconcurrentthreadengine.h"
|
#include "qtconcurrentthreadengine.h"
|
||||||
|
|
||||||
#include <QtCore/private/qsimd_p.h>
|
|
||||||
|
|
||||||
#if !defined(QT_NO_CONCURRENT) || defined(Q_QDOC)
|
#if !defined(QT_NO_CONCURRENT) || defined(Q_QDOC)
|
||||||
|
|
||||||
QT_BEGIN_NAMESPACE
|
QT_BEGIN_NAMESPACE
|
||||||
|
@ -251,6 +251,7 @@ qt_internal_add_module(Core
|
|||||||
thread/qthreadstorage.h
|
thread/qthreadstorage.h
|
||||||
thread/qtsan_impl.h
|
thread/qtsan_impl.h
|
||||||
thread/qwaitcondition.h thread/qwaitcondition_p.h
|
thread/qwaitcondition.h thread/qwaitcondition_p.h
|
||||||
|
thread/qyieldcpu.h
|
||||||
time/qcalendar.cpp time/qcalendar.h
|
time/qcalendar.cpp time/qcalendar.h
|
||||||
time/qcalendarbackend_p.h
|
time/qcalendarbackend_p.h
|
||||||
time/qcalendarmath_p.h
|
time/qcalendarmath_p.h
|
||||||
|
@ -14,7 +14,6 @@
|
|||||||
#include "qdebug.h"
|
#include "qdebug.h"
|
||||||
#include "qmutex.h"
|
#include "qmutex.h"
|
||||||
#include <QtCore/private/qlocking_p.h>
|
#include <QtCore/private/qlocking_p.h>
|
||||||
#include <QtCore/private/qsimd_p.h>
|
|
||||||
#include "qloggingcategory.h"
|
#include "qloggingcategory.h"
|
||||||
#ifndef QT_BOOTSTRAPPED
|
#ifndef QT_BOOTSTRAPPED
|
||||||
#include "qelapsedtimer.h"
|
#include "qelapsedtimer.h"
|
||||||
|
@ -378,49 +378,6 @@ static inline uint64_t qCpuFeatures()
|
|||||||
#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \
|
#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \
|
||||||
|| ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature))
|
|| ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature))
|
||||||
|
|
||||||
/*
|
|
||||||
Small wrapper around x86's PAUSE and ARM's YIELD instructions.
|
|
||||||
|
|
||||||
This is completely different from QThread::yieldCurrentThread(), which is
|
|
||||||
an OS-level operation that takes the whole thread off the CPU.
|
|
||||||
|
|
||||||
This is just preventing one SMT thread from filling a core's pipeline with
|
|
||||||
speculated further loop iterations (which need to be expensively flushed on
|
|
||||||
final success) when it could just give those pipeline slots to a second SMT
|
|
||||||
thread that can do something useful with the core, such as unblocking this
|
|
||||||
SMT thread :)
|
|
||||||
|
|
||||||
So, instead of
|
|
||||||
|
|
||||||
while (!condition)
|
|
||||||
;
|
|
||||||
|
|
||||||
it's better to use
|
|
||||||
|
|
||||||
while (!condition)
|
|
||||||
qYieldCpu();
|
|
||||||
*/
|
|
||||||
static inline void qYieldCpu()
|
|
||||||
{
|
|
||||||
#if defined(Q_PROCESSOR_X86)
|
|
||||||
_mm_pause();
|
|
||||||
#elif defined(Q_PROCESSOR_ARM) && Q_PROCESSOR_ARM >= 7 /* yield was added in ARMv7 */
|
|
||||||
# if __has_builtin(__builtin_arm_yield) /* e.g. Clang */
|
|
||||||
__builtin_arm_yield();
|
|
||||||
# elif defined(Q_OS_INTEGRITY) || defined(Q_CC_GNU_ONLY)
|
|
||||||
/*
|
|
||||||
- Integrity is missing the arm_acle.h header
|
|
||||||
- GCC doesn't have __yield() in arm_acle.h
|
|
||||||
https://stackoverflow.com/a/70076751/134841
|
|
||||||
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105416
|
|
||||||
*/
|
|
||||||
asm volatile("yield"); /* this works everywhere */
|
|
||||||
# else
|
|
||||||
__yield(); /* this is what should work everywhere */
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#define QATOMIC_CXX11_H
|
#define QATOMIC_CXX11_H
|
||||||
|
|
||||||
#include <QtCore/qgenericatomic.h>
|
#include <QtCore/qgenericatomic.h>
|
||||||
|
#include <QtCore/qyieldcpu.h>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
|
||||||
QT_BEGIN_NAMESPACE
|
QT_BEGIN_NAMESPACE
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
#include <QtCore/qcoreapplication.h>
|
#include <QtCore/qcoreapplication.h>
|
||||||
#include <QtCore/qthread.h>
|
#include <QtCore/qthread.h>
|
||||||
#include <QtCore/qvarlengtharray.h>
|
#include <QtCore/qvarlengtharray.h>
|
||||||
#include <QtCore/private/qsimd_p.h> // for qYieldCpu()
|
|
||||||
#include <private/qthreadpool_p.h>
|
#include <private/qthreadpool_p.h>
|
||||||
#include <private/qobject_p.h>
|
#include <private/qobject_p.h>
|
||||||
|
|
||||||
|
66
src/corelib/thread/qyieldcpu.h
Normal file
66
src/corelib/thread/qyieldcpu.h
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// Copyright (C) 2023 The Qt Company Ltd.
|
||||||
|
// Copyright (C) 2023 Intel Corporation.
|
||||||
|
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
|
||||||
|
|
||||||
|
#ifndef QYIELDCPU_H
|
||||||
|
#define QYIELDCPU_H
|
||||||
|
|
||||||
|
#include <QtCore/qcompilerdetection.h>
|
||||||
|
#include <QtCore/qprocessordetection.h>
|
||||||
|
#include <QtCore/qtconfigmacros.h>
|
||||||
|
|
||||||
|
#ifdef Q_CC_MSVC_ONLY
|
||||||
|
// MSVC defines _YIELD_PROCESSOR() in <xatomic.h>, but as that is a private
|
||||||
|
// header, we include the public ones
|
||||||
|
# ifdef __cplusplus
|
||||||
|
# include <atomic>
|
||||||
|
extern "C"
|
||||||
|
# endif
|
||||||
|
void _mm_pause(void); // the compiler recognizes as intrinsic
|
||||||
|
#endif
|
||||||
|
|
||||||
|
QT_BEGIN_NAMESPACE
|
||||||
|
|
||||||
|
#ifdef Q_CC_GNU
|
||||||
|
__attribute__((artificial))
|
||||||
|
#endif
|
||||||
|
Q_ALWAYS_INLINE void qYieldCpu(void) Q_DECL_NOEXCEPT;
|
||||||
|
|
||||||
|
void qYieldCpu(void)
|
||||||
|
#ifdef __cplusplus
|
||||||
|
noexcept
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if __has_builtin(__yield)
|
||||||
|
__yield(); // Generic
|
||||||
|
#elif defined(_YIELD_PROCESSOR) && defined(Q_CC_MSVC)
|
||||||
|
_YIELD_PROCESSOR(); // Generic; MSVC's <atomic>
|
||||||
|
|
||||||
|
#elif __has_builtin(__builtin_ia32_pause)
|
||||||
|
__builtin_ia32_pause();
|
||||||
|
#elif defined(Q_PROCESSOR_X86) && defined(Q_CC_GNU)
|
||||||
|
// GCC < 10 didn't have __has_builtin()
|
||||||
|
__builtin_ia32_pause();
|
||||||
|
#elif defined(Q_PROCESSOR_X86) && defined(Q_CC_MSVC)
|
||||||
|
_mm_pause();
|
||||||
|
#elif defined(Q_PROCESSOR_X86)
|
||||||
|
asm("pause"); // hopefully asm() works in this compiler
|
||||||
|
|
||||||
|
#elif __has_builtin(__builtin_arm_yield)
|
||||||
|
__builtin_arm_yield();
|
||||||
|
#elif defined(Q_PROCESSOR_ARM) && Q_PROCESSOR_ARM >= 7
|
||||||
|
asm("yield"); // this works everywhere
|
||||||
|
|
||||||
|
#elif __has_builtin(__builtin_riscv_pause)
|
||||||
|
__builtin_riscv_pause(); // Zihintpause extension
|
||||||
|
#elif defined(Q_PROCESSOR_RISCV)
|
||||||
|
asm("fence w, 0"); // a.k.a. "pause"
|
||||||
|
|
||||||
|
#elif defined(_YIELD_PROCESSOR) && defined(Q_CC_GHS)
|
||||||
|
_YIELD_PROCESSOR; // Green Hills (INTEGRITY), but only on ARM
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
QT_END_NAMESPACE
|
||||||
|
|
||||||
|
#endif // QYIELDCPU_H
|
59
src/corelib/thread/qyieldcpu.qdoc
Normal file
59
src/corelib/thread/qyieldcpu.qdoc
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
// Copyright (C) 2023 The Qt Company Ltd.
|
||||||
|
// Copyright (C) 2023 Intel Corporation.
|
||||||
|
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\fn qYieldCpu()
|
||||||
|
\inmodule QtCore
|
||||||
|
\ingroup thread
|
||||||
|
\relates QAtomicInteger
|
||||||
|
\relatesalso QAtomicPointer
|
||||||
|
\since 6.7
|
||||||
|
|
||||||
|
Pauses the execution of the current thread for an unspecified time, using
|
||||||
|
hardware instructions, without de-scheduling this thread. This function is
|
||||||
|
meant to be used in high-throughput loops where the code expects another
|
||||||
|
thread to modify an atomic variable. This is completely different from
|
||||||
|
QThread::yieldCurrentThread(), which is an OS-level operation that may take
|
||||||
|
the whole thread off the CPU and allow other threads (possibly belonging to
|
||||||
|
other processes) to run.
|
||||||
|
|
||||||
|
So, instead of
|
||||||
|
\code
|
||||||
|
while (!condition)
|
||||||
|
;
|
||||||
|
\endcode
|
||||||
|
|
||||||
|
one should write
|
||||||
|
\code
|
||||||
|
while (!condition)
|
||||||
|
qYieldCpu();
|
||||||
|
\endcode
|
||||||
|
|
||||||
|
This is useful both with and without hardware multithreading on the same
|
||||||
|
core. In the case of hardware threads, it serves to prevent further
|
||||||
|
speculative execution filling up the pipeline, which could starve the
|
||||||
|
sibling thread of resources. Across cores and higher levels of separation,
|
||||||
|
it allows the cache coherency protocol to allocate the cache line being
|
||||||
|
modified and inspected to the logical processor whose result this code is
|
||||||
|
expecting.
|
||||||
|
|
||||||
|
It is also recommended to loop around code that does not modify the global
|
||||||
|
variable, to avoid contention in exclusively obtaining the memory location.
|
||||||
|
Therefore, an atomic modification loop such as a spinlock acquisition
|
||||||
|
should be:
|
||||||
|
|
||||||
|
\code
|
||||||
|
while (true) {
|
||||||
|
while (!readOnlyCondition(atomic))
|
||||||
|
qYieldCpu();
|
||||||
|
if (modify(atomic))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
\endcode
|
||||||
|
|
||||||
|
On x86 processors and on RISC-V processors with the \c{Zihintpause}
|
||||||
|
extension, this will emit the \c PAUSE instruction, which is ignored on
|
||||||
|
processors that don't support it; on ARMv7 or later ARM processors, it will
|
||||||
|
emit the \c{YIELD} instruction.
|
||||||
|
*/
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include <QtCore/qglobal.h>
|
#include <QtCore/qglobal.h>
|
||||||
#include <QtCore/qtversion.h>
|
#include <QtCore/qtversion.h>
|
||||||
|
#include <QtCore/qyieldcpu.h>
|
||||||
|
|
||||||
#ifdef Q_COMPILER_THREAD_LOCAL
|
#ifdef Q_COMPILER_THREAD_LOCAL
|
||||||
# include <threads.h>
|
# include <threads.h>
|
||||||
@ -62,6 +63,12 @@ const char *tst_qVersion()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tst_qYieldCpu(void) Q_DECL_NOEXCEPT;
|
||||||
|
void tst_qYieldCpu(void)
|
||||||
|
{
|
||||||
|
qYieldCpu();
|
||||||
|
}
|
||||||
|
|
||||||
/* Static assertion */
|
/* Static assertion */
|
||||||
Q_STATIC_ASSERT(true);
|
Q_STATIC_ASSERT(true);
|
||||||
Q_STATIC_ASSERT(1);
|
Q_STATIC_ASSERT(1);
|
||||||
|
Loading…
Reference in New Issue
Block a user