From ca6e601a9d4a72b3699cca15bad12ac1716bf49a Mon Sep 17 00:00:00 2001 From: Torvald Riegel Date: Wed, 30 Nov 2016 17:53:11 +0100 Subject: [PATCH] Use C11-like atomics instead of plain memory accesses in x86 lock elision. This uses atomic operations to access lock elision metadata that is accessed concurrently (ie, adapt_count fields). The size of the data is less than a word but accessed only with atomic loads and stores; therefore, we add support for shorter-size atomic load and stores too. * include/atomic.h (__atomic_check_size_ls): New. (atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed, atomic_store_release): Use it. * sysdeps/x86/elide.h (ACCESS_ONCE): Remove. (elision_adapt, ELIDE_LOCK): Use atomics. * sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use atomics and improve code comments. * sysdeps/unix/sysv/linux/x86/elision-trylock.c (__lll_trylock_elision): Likewise. --- ChangeLog | 12 ++++++++ include/atomic.h | 24 ++++++++++++--- sysdeps/unix/sysv/linux/x86/elision-lock.c | 28 ++++++++++++----- sysdeps/unix/sysv/linux/x86/elision-trylock.c | 18 ++++++----- sysdeps/x86/elide.h | 30 ++++++++++++------- 5 files changed, 82 insertions(+), 30 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6cbb0a8b98..9c0aaa6e43 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2016-12-05 Torvald Riegel + + * include/atomic.h (__atomic_check_size_ls): New. + (atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed, + atomic_store_release): Use it. + * sysdeps/x86/elide.h (ACCESS_ONCE): Remove. + (elision_adapt, ELIDE_LOCK): Use atomics. + * sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use + atomics and improve code comments. + * sysdeps/unix/sysv/linux/x86/elision-trylock.c + (__lll_trylock_elision): Likewise. + 2016-12-04 Samuel Thibault * hurd/hurd.h: Cast errno constants to error_t to fix usage in C++ diff --git a/include/atomic.h b/include/atomic.h index c8b46649c5..d14cbc5ca8 100644 --- a/include/atomic.h +++ b/include/atomic.h @@ -550,6 +550,20 @@ void __atomic_link_error (void); if (sizeof (*mem) != 4) \ __atomic_link_error (); # endif +/* We additionally provide 8b and 16b atomic loads and stores; we do not yet + need other atomic operations of such sizes, and restricting the support to + loads and stores makes this easier for archs that do not have native + support for atomic operations to less-than-word-sized data. */ +# if __HAVE_64B_ATOMICS == 1 +# define __atomic_check_size_ls(mem) \ + if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && (sizeof (*mem) != 4) \ + && (sizeof (*mem) != 8)) \ + __atomic_link_error (); +# else +# define __atomic_check_size_ls(mem) \ + if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \ + __atomic_link_error (); +# endif # define atomic_thread_fence_acquire() \ __atomic_thread_fence (__ATOMIC_ACQUIRE) @@ -559,18 +573,20 @@ void __atomic_link_error (void); __atomic_thread_fence (__ATOMIC_SEQ_CST) # define atomic_load_relaxed(mem) \ - ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_RELAXED); }) + ({ __atomic_check_size_ls((mem)); \ + __atomic_load_n ((mem), __ATOMIC_RELAXED); }) # define atomic_load_acquire(mem) \ - ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_ACQUIRE); }) + ({ __atomic_check_size_ls((mem)); \ + __atomic_load_n ((mem), __ATOMIC_ACQUIRE); }) # define atomic_store_relaxed(mem, val) \ do { \ - __atomic_check_size((mem)); \ + __atomic_check_size_ls((mem)); \ __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \ } while (0) # define atomic_store_release(mem, val) \ do { \ - __atomic_check_size((mem)); \ + __atomic_check_size_ls((mem)); \ __atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \ } while (0) diff --git a/sysdeps/unix/sysv/linux/x86/elision-lock.c b/sysdeps/unix/sysv/linux/x86/elision-lock.c index 5e66960123..c05ade4722 100644 --- a/sysdeps/unix/sysv/linux/x86/elision-lock.c +++ b/sysdeps/unix/sysv/linux/x86/elision-lock.c @@ -44,7 +44,13 @@ int __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) { - if (*adapt_count <= 0) + /* adapt_count can be accessed concurrently; these accesses can be both + inside of transactions (if critical sections are nested and the outer + critical section uses lock elision) and outside of transactions. Thus, + we need to use atomic accesses to avoid data races. However, the + value of adapt_count is just a hint, so relaxed MO accesses are + sufficient. */ + if (atomic_load_relaxed (adapt_count) <= 0) { unsigned status; int try_xbegin; @@ -70,15 +76,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) && _XABORT_CODE (status) == _ABORT_LOCK_BUSY) { /* Right now we skip here. Better would be to wait a bit - and retry. This likely needs some spinning. */ - if (*adapt_count != aconf.skip_lock_busy) - *adapt_count = aconf.skip_lock_busy; + and retry. This likely needs some spinning. See + above for why relaxed MO is sufficient. */ + if (atomic_load_relaxed (adapt_count) + != aconf.skip_lock_busy) + atomic_store_relaxed (adapt_count, aconf.skip_lock_busy); } /* Internal abort. There is no chance for retry. Use the normal locking and next time use lock. - Be careful to avoid writing to the lock. */ - else if (*adapt_count != aconf.skip_lock_internal_abort) - *adapt_count = aconf.skip_lock_internal_abort; + Be careful to avoid writing to the lock. See above for why + relaxed MO is sufficient. */ + else if (atomic_load_relaxed (adapt_count) + != aconf.skip_lock_internal_abort) + atomic_store_relaxed (adapt_count, + aconf.skip_lock_internal_abort); break; } } @@ -87,7 +98,8 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private) { /* Use a normal lock until the threshold counter runs out. Lost updates possible. */ - (*adapt_count)--; + atomic_store_relaxed (adapt_count, + atomic_load_relaxed (adapt_count) - 1); } /* Use a normal lock here. */ diff --git a/sysdeps/unix/sysv/linux/x86/elision-trylock.c b/sysdeps/unix/sysv/linux/x86/elision-trylock.c index 65d9c18584..2d44f50426 100644 --- a/sysdeps/unix/sysv/linux/x86/elision-trylock.c +++ b/sysdeps/unix/sysv/linux/x86/elision-trylock.c @@ -36,8 +36,10 @@ __lll_trylock_elision (int *futex, short *adapt_count) return an error. */ _xabort (_ABORT_NESTED_TRYLOCK); - /* Only try a transaction if it's worth it. */ - if (*adapt_count <= 0) + /* Only try a transaction if it's worth it. See __lll_lock_elision for + why we need atomic accesses. Relaxed MO is sufficient because this is + just a hint. */ + if (atomic_load_relaxed (adapt_count) <= 0) { unsigned status; @@ -55,16 +57,18 @@ __lll_trylock_elision (int *futex, short *adapt_count) if (!(status & _XABORT_RETRY)) { /* Internal abort. No chance for retry. For future - locks don't try speculation for some time. */ - if (*adapt_count != aconf.skip_trylock_internal_abort) - *adapt_count = aconf.skip_trylock_internal_abort; + locks don't try speculation for some time. See above for MO. */ + if (atomic_load_relaxed (adapt_count) + != aconf.skip_lock_internal_abort) + atomic_store_relaxed (adapt_count, aconf.skip_lock_internal_abort); } /* Could do some retries here. */ } else { - /* Lost updates are possible, but harmless. */ - (*adapt_count)--; + /* Lost updates are possible but harmless (see above). */ + atomic_store_relaxed (adapt_count, + atomic_load_relaxed (adapt_count) - 1); } return lll_trylock (*futex); diff --git a/sysdeps/x86/elide.h b/sysdeps/x86/elide.h index 8691e6673d..f7d5220c17 100644 --- a/sysdeps/x86/elide.h +++ b/sysdeps/x86/elide.h @@ -20,8 +20,8 @@ #include #include +#include -#define ACCESS_ONCE(x) (* (volatile typeof(x) *) &(x)) /* Adapt elision with ADAPT_COUNT and STATUS and decide retries. */ @@ -35,28 +35,35 @@ elision_adapt(signed char *adapt_count, unsigned int status) { /* Right now we skip here. Better would be to wait a bit and retry. This likely needs some spinning. Be careful - to avoid writing the lock. */ - if (*adapt_count != __elision_aconf.skip_lock_busy) - ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_busy; + to avoid writing the lock. + Using relaxed MO and separate atomic accesses is sufficient because + adapt_count is just a hint. */ + if (atomic_load_relaxed (adapt_count) != __elision_aconf.skip_lock_busy) + atomic_store_relaxed (adapt_count, __elision_aconf.skip_lock_busy); } /* Internal abort. There is no chance for retry. Use the normal locking and next time use lock. - Be careful to avoid writing to the lock. */ - else if (*adapt_count != __elision_aconf.skip_lock_internal_abort) - ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_internal_abort; + Be careful to avoid writing to the lock. See above for MO. */ + else if (atomic_load_relaxed (adapt_count) + != __elision_aconf.skip_lock_internal_abort) + atomic_store_relaxed (adapt_count, + __elision_aconf.skip_lock_internal_abort); return true; } /* is_lock_free must be executed inside the transaction */ /* Returns true if lock defined by IS_LOCK_FREE was elided. - ADAPT_COUNT is a pointer to per-lock state variable. */ + ADAPT_COUNT is a per-lock state variable; it must be accessed atomically + to avoid data races but is just a hint, so using relaxed MO and separate + atomic loads and stores instead of atomic read-modify-write operations is + sufficient. */ #define ELIDE_LOCK(adapt_count, is_lock_free) \ ({ \ int ret = 0; \ \ - if ((adapt_count) <= 0) \ + if (atomic_load_relaxed (&(adapt_count)) <= 0) \ { \ for (int i = __elision_aconf.retry_try_xbegin; i > 0; i--) \ { \ @@ -75,12 +82,13 @@ elision_adapt(signed char *adapt_count, unsigned int status) } \ } \ else \ - (adapt_count)--; /* missing updates ok */ \ + atomic_store_relaxed (&(adapt_count), \ + atomic_load_relaxed (&(adapt_count)) - 1); \ ret; \ }) /* Returns true if lock defined by IS_LOCK_FREE was try-elided. - ADAPT_COUNT is a pointer to per-lock state variable. */ + ADAPT_COUNT is a per-lock state variable. */ #define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) ({ \ int ret = 0; \