Atomics overhaul.

This merges and refactors SkAtomics.h and SkBarriers.h into SkAtomics.h and
some ports/ implementations. The major new feature is that we can express
memory orders explicitly rather than only through comments.

The porting layer is reduced to four template functions:
  - sk_atomic_load
  - sk_atomic_store
  - sk_atomic_fetch_add
  - sk_atomic_compare_exchange
From those four we can reconstruct all our previous sk_atomic_foo.

There are three ports:
  - SkAtomics_std:    uses C++11 <atomic>,             used with MSVC
  - SkAtomics_atomic: uses newer GCC/Clang intrinsics, used on not-MSVC where possible
  - SkAtomics_sync:   uses older GCC/Clang intrinsics, used where SkAtomics_atomic not supported

No public API changes.
TBR=reed@google.com

BUG=skia:

Review URL: https://codereview.chromium.org/896553002
This commit is contained in:
mtklein 2015-02-02 12:22:07 -08:00 committed by Commit bot
parent 465206af18
commit a669bc7a7a
13 changed files with 213 additions and 358 deletions

View File

@ -628,11 +628,9 @@
],
'paths_to_ignore': [
'<(skia_include_path)/gpu/gl/GrGLConfig_chrome.h',
'<(skia_include_path)/ports/SkAtomics_std.h',
'<(skia_include_path)/ports/SkAtomics_atomic.h',
'<(skia_include_path)/ports/SkAtomics_sync.h',
'<(skia_include_path)/ports/SkAtomics_win.h',
'<(skia_include_path)/ports/SkBarriers_arm.h',
'<(skia_include_path)/ports/SkBarriers_tsan.h',
'<(skia_include_path)/ports/SkBarriers_x86.h',
'<(skia_include_path)/ports/SkMutex_pthread.h',
'<(skia_include_path)/ports/SkMutex_win.h',
'<(skia_include_path)/ports/SkTypeface_mac.h',

View File

@ -4,10 +4,79 @@
// This file is not part of the public Skia API.
#include "SkTypes.h"
enum sk_memory_order {
sk_memory_order_relaxed,
sk_memory_order_consume,
sk_memory_order_acquire,
sk_memory_order_release,
sk_memory_order_acq_rel,
sk_memory_order_seq_cst,
};
template <typename T>
T sk_atomic_load(const T*, sk_memory_order = sk_memory_order_seq_cst);
template <typename T>
void sk_atomic_store(T*, T, sk_memory_order = sk_memory_order_seq_cst);
template <typename T>
T sk_atomic_fetch_add(T*, T, sk_memory_order = sk_memory_order_seq_cst);
template <typename T>
bool sk_atomic_compare_exchange(T*, T* expected, T desired,
sk_memory_order success = sk_memory_order_seq_cst,
sk_memory_order failure = sk_memory_order_seq_cst);
#if defined(_MSC_VER)
#include "../ports/SkAtomics_win.h"
#include "../ports/SkAtomics_std.h"
#elif !defined(SK_BUILD_FOR_IOS) && defined(__ATOMIC_RELAXED)
#include "../ports/SkAtomics_atomic.h"
#else
#include "../ports/SkAtomics_sync.h"
#endif
// From here down we have shims for our old atomics API, to be weaned off of.
// We use the default sequentially-consistent memory order to make things simple
// and to match the practical reality of our old _sync and _win implementations.
inline int32_t sk_atomic_inc(int32_t* ptr) { return sk_atomic_fetch_add(ptr, +1); }
inline int32_t sk_atomic_dec(int32_t* ptr) { return sk_atomic_fetch_add(ptr, -1); }
inline int32_t sk_atomic_add(int32_t* ptr, int32_t v) { return sk_atomic_fetch_add(ptr, v); }
inline int64_t sk_atomic_inc(int64_t* ptr) { return sk_atomic_fetch_add<int64_t>(ptr, +1); }
inline bool sk_atomic_cas(int32_t* ptr, int32_t expected, int32_t desired) {
return sk_atomic_compare_exchange(ptr, &expected, desired);
}
inline void* sk_atomic_cas(void** ptr, void* expected, void* desired) {
(void)sk_atomic_compare_exchange(ptr, &expected, desired);
return expected;
}
inline int32_t sk_atomic_conditional_inc(int32_t* ptr) {
int32_t prev = sk_atomic_load(ptr);
do {
if (0 == prev) {
break;
}
} while(!sk_atomic_compare_exchange(ptr, &prev, prev+1));
return prev;
}
template <typename T>
T sk_acquire_load(T* ptr) { return sk_atomic_load(ptr, sk_memory_order_acquire); }
template <typename T>
T sk_consume_load(T* ptr) {
// On every platform we care about, consume is the same as relaxed.
// If we pass consume here, some compilers turn that into acquire, which is overkill.
return sk_atomic_load(ptr, sk_memory_order_relaxed);
}
template <typename T>
void sk_release_store(T* ptr, T val) { sk_atomic_store(ptr, val, sk_memory_order_release); }
inline void sk_membar_acquire__after_atomic_dec() {}
inline void sk_membar_acquire__after_atomic_conditional_inc() {}
#endif//SkAtomics_DEFINED

View File

@ -1,15 +0,0 @@
#ifndef SkBarriers_DEFINED
#define SkBarriers_DEFINED
// This file is not part of the public Skia API.
#include "SkTypes.h"
#if SK_HAS_COMPILER_FEATURE(thread_sanitizer)
#include "../ports/SkBarriers_tsan.h"
#elif defined(SK_CPU_ARM32) || defined(SK_CPU_ARM64)
#include "../ports/SkBarriers_arm.h"
#else
#include "../ports/SkBarriers_x86.h"
#endif
#endif//SkBarriers_DEFINED

View File

@ -10,4 +10,44 @@
#include "../ports/SkMutex_pthread.h"
#endif
class SkAutoMutexAcquire : SkNoncopyable {
public:
explicit SkAutoMutexAcquire(SkBaseMutex& mutex) : fMutex(&mutex) {
SkASSERT(fMutex != NULL);
mutex.acquire();
}
explicit SkAutoMutexAcquire(SkBaseMutex* mutex) : fMutex(mutex) {
if (mutex) {
mutex->acquire();
}
}
/** If the mutex has not been released, release it now. */
~SkAutoMutexAcquire() {
if (fMutex) {
fMutex->release();
}
}
/** If the mutex has not been released, release it now. */
void release() {
if (fMutex) {
fMutex->release();
fMutex = NULL;
}
}
/** Assert that we're holding the mutex. */
void assertHeld() {
SkASSERT(fMutex);
fMutex->assertHeld();
}
private:
SkBaseMutex* fMutex;
};
#define SkAutoMutexAcquire(...) SK_REQUIRE_LOCAL_VAR(SkAutoMutexAcquire)
#endif//SkMutex_DEFINED

View File

@ -8,135 +8,9 @@
#ifndef SkThread_DEFINED
#define SkThread_DEFINED
#include "SkTypes.h"
// SkAtomics.h must provide inline implementations for the following declarations.
/** Atomically adds one to the int referenced by addr and returns the previous value.
* No additional memory barrier is required; this must act as a compiler barrier.
*/
static int32_t sk_atomic_inc(int32_t* addr);
static int64_t sk_atomic_inc(int64_t* addr);
/** Atomically adds inc to the int referenced by addr and returns the previous value.
* No additional memory barrier is required; this must act as a compiler barrier.
*/
static int32_t sk_atomic_add(int32_t* addr, int32_t inc);
/** Atomically subtracts one from the int referenced by addr and returns the previous value.
* This must act as a release (SL/S) memory barrier and as a compiler barrier.
*/
static int32_t sk_atomic_dec(int32_t* addr);
/** Atomic compare and set.
* If *addr == before, set *addr to after and return true, otherwise return false.
* This must act as a release (SL/S) memory barrier and as a compiler barrier.
*/
static bool sk_atomic_cas(int32_t* addr, int32_t before, int32_t after);
/** If sk_atomic_dec does not act as an acquire (L/SL) barrier,
* this must act as an acquire (L/SL) memory barrier and as a compiler barrier.
*/
static void sk_membar_acquire__after_atomic_dec();
/** If sk_atomic_conditional_inc does not act as an acquire (L/SL) barrier,
* this must act as an acquire (L/SL) memory barrier and as a compiler barrier.
*/
static void sk_membar_acquire__after_atomic_conditional_inc();
// TODO: delete this file, or maybe move the definition of SkThread here.
#include "SkAtomics.h"
/** Atomically adds one to the int referenced by addr iff the referenced int was not 0
* and returns the previous value.
* No additional memory barrier is required; this must act as a compiler barrier.
*/
template<typename INT_TYPE> static inline INT_TYPE sk_atomic_conditional_inc(INT_TYPE* addr) {
INT_TYPE prev;
do {
prev = *addr;
if (0 == prev) {
break;
}
} while (!sk_atomic_cas(addr, prev, prev+1));
return prev;
}
// SkBarriers.h must provide implementations for the following declarations:
/** Prevent the compiler from reordering across this barrier. */
static void sk_compiler_barrier();
/** Read T*, with at least an acquire barrier.
*
* Only needs to be implemented for T which can be atomically read.
*/
template <typename T> T sk_acquire_load(T*);
/** Write T*, with at least a release barrier.
*
* Only needs to be implemented for T which can be atomically written.
*/
template <typename T> void sk_release_store(T*, T);
#include "SkBarriers.h"
/** SkMutex.h must provide the following (or equivalent) declarations.
class SkBaseMutex {
public:
void acquire(); // Block until this thread owns the mutex.
void release(); // Assuming this thread owns the mutex, release it.
void assertHeld(); // If SK_DEBUG, assert this thread owns the mutex.
};
class SkMutex : SkBaseMutex {
public:
SkMutex();
~SkMutex();
};
#define SK_DECLARE_STATIC_MUTEX(name) static SkBaseMutex name = ...
*/
#include "SkMutex.h"
class SkAutoMutexAcquire : SkNoncopyable {
public:
explicit SkAutoMutexAcquire(SkBaseMutex& mutex) : fMutex(&mutex) {
SkASSERT(fMutex != NULL);
mutex.acquire();
}
explicit SkAutoMutexAcquire(SkBaseMutex* mutex) : fMutex(mutex) {
if (mutex) {
mutex->acquire();
}
}
/** If the mutex has not been released, release it now. */
~SkAutoMutexAcquire() {
if (fMutex) {
fMutex->release();
}
}
/** If the mutex has not been released, release it now. */
void release() {
if (fMutex) {
fMutex->release();
fMutex = NULL;
}
}
/** Assert that we're holding the mutex. */
void assertHeld() {
SkASSERT(fMutex);
fMutex->assertHeld();
}
private:
SkBaseMutex* fMutex;
};
#define SkAutoMutexAcquire(...) SK_REQUIRE_LOCAL_VAR(SkAutoMutexAcquire)
#endif

View File

@ -8,16 +8,7 @@
#ifndef SkThreadPriv_DEFINED
#define SkThreadPriv_DEFINED
#include "SkTypes.h"
// SkAtomics.h must provide inline implementations for the following declarations.
/** Atomic compare and set, for pointers.
* If *addr == before, set *addr to after. Always returns previous value of *addr.
* This must issue a release barrier on success, acquire on failure, and always a compiler barrier.
*/
static void* sk_atomic_cas(void** addr, void* before, void* after);
// TODO: delete this file
#include "SkAtomics.h"
#endif//SkThreadPriv_DEFINED

View File

@ -0,0 +1,26 @@
#ifndef SkAtomics_atomic_DEFINED
#define SkAtomics_atomic_DEFINED
template <typename T>
T sk_atomic_load(const T* ptr, sk_memory_order mo) {
return __atomic_load_n(ptr, mo);
}
template <typename T>
void sk_atomic_store(T* ptr, T val, sk_memory_order mo) {
__atomic_store_n(ptr, val, mo);
}
template <typename T>
T sk_atomic_fetch_add(T* ptr, T val, sk_memory_order mo) {
return __atomic_fetch_add(ptr, val, mo);
}
template <typename T>
bool sk_atomic_compare_exchange(T* ptr, T* expected, T desired,
sk_memory_order success,
sk_memory_order failure) {
return __atomic_compare_exchange_n(ptr, expected, desired, false/*weak?*/, success, failure);
}
#endif//SkAtomics_atomic_DEFINED

View File

@ -0,0 +1,36 @@
#ifndef SkAtomics_std_DEFINED
#define SkAtomics_std_DEFINED
// We try not to depend on the C++ standard library,
// but these uses of <atomic> should all inline, so we don't feel to bad here.
#include <atomic>
template <typename T>
T sk_atomic_load(const T* ptr, sk_memory_order mo) {
const std::atomic<T>* ap = reinterpret_cast<const std::atomic<T>*>(ptr);
return std::atomic_load_explicit(ap, (std::memory_order)mo);
}
template <typename T>
void sk_atomic_store(T* ptr, T val, sk_memory_order mo) {
std::atomic<T>* ap = reinterpret_cast<std::atomic<T>*>(ptr);
return std::atomic_store_explicit(ap, val, (std::memory_order)mo);
}
template <typename T>
T sk_atomic_fetch_add(T* ptr, T val, sk_memory_order mo) {
std::atomic<T>* ap = reinterpret_cast<std::atomic<T>*>(ptr);
return std::atomic_fetch_add_explicit(ap, val, (std::memory_order)mo);
}
template <typename T>
bool sk_atomic_compare_exchange(T* ptr, T* expected, T desired,
sk_memory_order success,
sk_memory_order failure) {
std::atomic<T>* ap = reinterpret_cast<std::atomic<T>*>(ptr);
return std::atomic_compare_exchange_strong_explicit(ap, expected, desired,
(std::memory_order)success,
(std::memory_order)failure);
}
#endif//SkAtomics_std_DEFINED

View File

@ -1,55 +1,51 @@
/*
* Copyright 2013 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkAtomics_sync_DEFINED
#define SkAtomics_sync_DEFINED
/** GCC/Clang __sync based atomics. */
// This file is mostly a shim. We'd like to delete it. Please don't put much
// effort into maintaining it, and if you find bugs in it, the right fix is to
// delete this file and upgrade your compiler to something that supports
// __atomic builtins or std::atomic.
#include <stdint.h>
static inline __attribute__((always_inline)) int32_t sk_atomic_inc(int32_t* addr) {
return __sync_fetch_and_add(addr, 1);
static inline void barrier(sk_memory_order mo) {
asm volatile("" : : : "memory"); // Prevents the compiler from reordering code.
#if SK_CPU_X86
// On x86, we generally don't need an extra memory barrier for loads or stores.
if (sk_memory_order_seq_cst == mo) { __sync_synchronize(); }
#else
// On other platforms (e.g. ARM) we do unless the memory order is relaxed.
if (sk_memory_order_relaxed != mo) { __sync_synchronize(); }
#endif
}
static inline __attribute__((always_inline)) int64_t sk_atomic_inc(int64_t* addr) {
#if defined(__mips__) && !defined(__LP64__) && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8)
/** Some versions of the GCC 32-bit MIPS toolchains (e.g. 4.8) for android are missing
* support for the __sync* functions that operate on 64-bit values. The workaround
* is to use __atomic* functions until we can move everything to <stdatomic.h>.
*/
return __atomic_fetch_add(addr, 1, __ATOMIC_SEQ_CST);
#else
return __sync_fetch_and_add(addr, 1);
#endif
// These barriers only support our majority use cases: acquire and relaxed loads, release stores.
// For anything more complicated, please consider deleting this file and upgrading your compiler.
template <typename T>
T sk_atomic_load(const T* ptr, sk_memory_order mo) {
T val = *ptr;
barrier(mo);
return val;
}
static inline __attribute__((always_inline)) int32_t sk_atomic_add(int32_t* addr, int32_t inc) {
return __sync_fetch_and_add(addr, inc);
template <typename T>
void sk_atomic_store(T* ptr, T val, sk_memory_order mo) {
barrier(mo);
*ptr = val;
}
static inline __attribute__((always_inline)) int32_t sk_atomic_dec(int32_t* addr) {
return __sync_fetch_and_add(addr, -1);
template <typename T>
T sk_atomic_fetch_add(T* ptr, T val, sk_memory_order) {
return __sync_fetch_and_add(ptr, val);
}
static inline __attribute__((always_inline)) void sk_membar_acquire__after_atomic_dec() { }
static inline __attribute__((always_inline)) bool sk_atomic_cas(int32_t* addr,
int32_t before,
int32_t after) {
return __sync_bool_compare_and_swap(addr, before, after);
template <typename T>
bool sk_atomic_compare_exchange(T* ptr, T* expected, T desired, sk_memory_order, sk_memory_order) {
T prev = __sync_val_compare_and_swap(ptr, *expected, desired);
if (prev == *expected) {
return true;
}
*expected = prev;
return false;
}
static inline __attribute__((always_inline)) void* sk_atomic_cas(void** addr,
void* before,
void* after) {
return __sync_val_compare_and_swap(addr, before, after);
}
static inline __attribute__((always_inline)) void sk_membar_acquire__after_atomic_conditional_inc() { }
#endif
#endif//SkAtomics_sync_DEFINED

View File

@ -1,54 +0,0 @@
/*
* Copyright 2013 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkAtomics_win_DEFINED
#define SkAtomics_win_DEFINED
/** Windows Interlocked atomics. */
#include <intrin.h>
#include <stdint.h>
//MSDN says in order to declare an interlocked function for use as an
//intrinsic, include intrin.h and put the function in a #pragma intrinsic
//directive.
//The pragma appears to be unnecessary, but doesn't hurt.
#pragma intrinsic(_InterlockedIncrement, _InterlockedExchangeAdd, _InterlockedDecrement)
#pragma intrinsic(_InterlockedCompareExchange)
static inline int32_t sk_atomic_inc(int32_t* addr) {
// InterlockedIncrement returns the new value, we want to return the old.
return _InterlockedIncrement(reinterpret_cast<long*>(addr)) - 1;
}
static inline int64_t sk_atomic_inc(int64_t* addr) {
// InterlockedIncrement returns the new value, we want to return the old.
return InterlockedIncrement64(addr) - 1;
}
static inline int32_t sk_atomic_add(int32_t* addr, int32_t inc) {
return _InterlockedExchangeAdd(reinterpret_cast<long*>(addr), static_cast<long>(inc));
}
static inline int32_t sk_atomic_dec(int32_t* addr) {
// InterlockedDecrement returns the new value, we want to return the old.
return _InterlockedDecrement(reinterpret_cast<long*>(addr)) + 1;
}
static inline void sk_membar_acquire__after_atomic_dec() { }
static inline bool sk_atomic_cas(int32_t* addr, int32_t before, int32_t after) {
return _InterlockedCompareExchange(reinterpret_cast<long*>(addr), after, before) == before;
}
static inline void* sk_atomic_cas(void** addr, void* before, void* after) {
return InterlockedCompareExchangePointer(addr, after, before);
}
static inline void sk_membar_acquire__after_atomic_conditional_inc() { }
#endif

View File

@ -1,36 +0,0 @@
/*
* Copyright 2014 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkBarriers_arm_DEFINED
#define SkBarriers_arm_DEFINED
static inline void sk_compiler_barrier() { asm volatile("" : : : "memory"); }
template <typename T>
T sk_acquire_load(T* ptr) {
T val = *ptr;
__sync_synchronize(); // Issue a full barrier, which is an overkill acquire barrier.
return val;
}
template <typename T>
T sk_consume_load(T* ptr) {
T val = *ptr;
// Unlike acquire, consume loads (data-dependent loads) are guaranteed not to reorder on ARM.
// No memory barrier is needed, so we just use a compiler barrier.
// C.f. http://preshing.com/20140709/the-purpose-of-memory_order_consume-in-cpp11/
sk_compiler_barrier();
return val;
}
template <typename T>
void sk_release_store(T* ptr, T val) {
__sync_synchronize(); // Issue a full barrier, which is an overkill release barrier.
*ptr = val;
}
#endif//SkBarriers_x86_DEFINED

View File

@ -1,31 +0,0 @@
/*
* Copyright 2014 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkBarriers_tsan_DEFINED
#define SkBarriers_tsan_DEFINED
static inline void sk_compiler_barrier() { asm volatile("" : : : "memory"); }
template <typename T>
T sk_acquire_load(T* ptr) {
SkASSERT(__atomic_always_lock_free(sizeof(T), ptr));
return __atomic_load_n(ptr, __ATOMIC_ACQUIRE);
}
template <typename T>
T sk_consume_load(T* ptr) {
SkASSERT(__atomic_always_lock_free(sizeof(T), ptr));
return __atomic_load_n(ptr, __ATOMIC_CONSUME);
}
template <typename T>
void sk_release_store(T* ptr, T val) {
SkASSERT(__atomic_always_lock_free(sizeof(T), ptr));
return __atomic_store_n(ptr, val, __ATOMIC_RELEASE);
}
#endif//SkBarriers_tsan_DEFINED

View File

@ -1,39 +0,0 @@
/*
* Copyright 2014 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkBarriers_x86_DEFINED
#define SkBarriers_x86_DEFINED
#ifdef SK_BUILD_FOR_WIN
# include <intrin.h>
static inline void sk_compiler_barrier() { _ReadWriteBarrier(); }
#else
static inline void sk_compiler_barrier() { asm volatile("" : : : "memory"); }
#endif
template <typename T>
T sk_acquire_load(T* ptr) {
T val = *ptr;
// On x86, all loads are acquire loads, so we only need a compiler barrier.
sk_compiler_barrier();
return val;
}
template <typename T>
T sk_consume_load(T* ptr) {
// On x86, consume is the same as acquire, i.e. a normal load.
return sk_acquire_load(ptr);
}
template <typename T>
void sk_release_store(T* ptr, T val) {
// On x86, all stores are release stores, so we only need a compiler barrier.
sk_compiler_barrier();
*ptr = val;
}
#endif//SkBarriers_x86_DEFINED