mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-22 19:00:07 +00:00
Use C11-like atomics instead of plain memory accesses in x86 lock elision.
This uses atomic operations to access lock elision metadata that is accessed concurrently (ie, adapt_count fields). The size of the data is less than a word but accessed only with atomic loads and stores; therefore, we add support for shorter-size atomic load and stores too. * include/atomic.h (__atomic_check_size_ls): New. (atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed, atomic_store_release): Use it. * sysdeps/x86/elide.h (ACCESS_ONCE): Remove. (elision_adapt, ELIDE_LOCK): Use atomics. * sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use atomics and improve code comments. * sysdeps/unix/sysv/linux/x86/elision-trylock.c (__lll_trylock_elision): Likewise.
This commit is contained in:
parent
71be79a25f
commit
ca6e601a9d
12
ChangeLog
12
ChangeLog
@ -1,3 +1,15 @@
|
||||
2016-12-05 Torvald Riegel <triegel@redhat.com>
|
||||
|
||||
* include/atomic.h (__atomic_check_size_ls): New.
|
||||
(atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed,
|
||||
atomic_store_release): Use it.
|
||||
* sysdeps/x86/elide.h (ACCESS_ONCE): Remove.
|
||||
(elision_adapt, ELIDE_LOCK): Use atomics.
|
||||
* sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use
|
||||
atomics and improve code comments.
|
||||
* sysdeps/unix/sysv/linux/x86/elision-trylock.c
|
||||
(__lll_trylock_elision): Likewise.
|
||||
|
||||
2016-12-04 Samuel Thibault <samuel.thibault@ens-lyon.org>
|
||||
|
||||
* hurd/hurd.h: Cast errno constants to error_t to fix usage in C++
|
||||
|
@ -550,6 +550,20 @@ void __atomic_link_error (void);
|
||||
if (sizeof (*mem) != 4) \
|
||||
__atomic_link_error ();
|
||||
# endif
|
||||
/* We additionally provide 8b and 16b atomic loads and stores; we do not yet
|
||||
need other atomic operations of such sizes, and restricting the support to
|
||||
loads and stores makes this easier for archs that do not have native
|
||||
support for atomic operations to less-than-word-sized data. */
|
||||
# if __HAVE_64B_ATOMICS == 1
|
||||
# define __atomic_check_size_ls(mem) \
|
||||
if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && (sizeof (*mem) != 4) \
|
||||
&& (sizeof (*mem) != 8)) \
|
||||
__atomic_link_error ();
|
||||
# else
|
||||
# define __atomic_check_size_ls(mem) \
|
||||
if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \
|
||||
__atomic_link_error ();
|
||||
# endif
|
||||
|
||||
# define atomic_thread_fence_acquire() \
|
||||
__atomic_thread_fence (__ATOMIC_ACQUIRE)
|
||||
@ -559,18 +573,20 @@ void __atomic_link_error (void);
|
||||
__atomic_thread_fence (__ATOMIC_SEQ_CST)
|
||||
|
||||
# define atomic_load_relaxed(mem) \
|
||||
({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_RELAXED); })
|
||||
({ __atomic_check_size_ls((mem)); \
|
||||
__atomic_load_n ((mem), __ATOMIC_RELAXED); })
|
||||
# define atomic_load_acquire(mem) \
|
||||
({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
|
||||
({ __atomic_check_size_ls((mem)); \
|
||||
__atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
|
||||
|
||||
# define atomic_store_relaxed(mem, val) \
|
||||
do { \
|
||||
__atomic_check_size((mem)); \
|
||||
__atomic_check_size_ls((mem)); \
|
||||
__atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \
|
||||
} while (0)
|
||||
# define atomic_store_release(mem, val) \
|
||||
do { \
|
||||
__atomic_check_size((mem)); \
|
||||
__atomic_check_size_ls((mem)); \
|
||||
__atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \
|
||||
} while (0)
|
||||
|
||||
|
@ -44,7 +44,13 @@
|
||||
int
|
||||
__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
|
||||
{
|
||||
if (*adapt_count <= 0)
|
||||
/* adapt_count can be accessed concurrently; these accesses can be both
|
||||
inside of transactions (if critical sections are nested and the outer
|
||||
critical section uses lock elision) and outside of transactions. Thus,
|
||||
we need to use atomic accesses to avoid data races. However, the
|
||||
value of adapt_count is just a hint, so relaxed MO accesses are
|
||||
sufficient. */
|
||||
if (atomic_load_relaxed (adapt_count) <= 0)
|
||||
{
|
||||
unsigned status;
|
||||
int try_xbegin;
|
||||
@ -70,15 +76,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
|
||||
&& _XABORT_CODE (status) == _ABORT_LOCK_BUSY)
|
||||
{
|
||||
/* Right now we skip here. Better would be to wait a bit
|
||||
and retry. This likely needs some spinning. */
|
||||
if (*adapt_count != aconf.skip_lock_busy)
|
||||
*adapt_count = aconf.skip_lock_busy;
|
||||
and retry. This likely needs some spinning. See
|
||||
above for why relaxed MO is sufficient. */
|
||||
if (atomic_load_relaxed (adapt_count)
|
||||
!= aconf.skip_lock_busy)
|
||||
atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
|
||||
}
|
||||
/* Internal abort. There is no chance for retry.
|
||||
Use the normal locking and next time use lock.
|
||||
Be careful to avoid writing to the lock. */
|
||||
else if (*adapt_count != aconf.skip_lock_internal_abort)
|
||||
*adapt_count = aconf.skip_lock_internal_abort;
|
||||
Be careful to avoid writing to the lock. See above for why
|
||||
relaxed MO is sufficient. */
|
||||
else if (atomic_load_relaxed (adapt_count)
|
||||
!= aconf.skip_lock_internal_abort)
|
||||
atomic_store_relaxed (adapt_count,
|
||||
aconf.skip_lock_internal_abort);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -87,7 +98,8 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
|
||||
{
|
||||
/* Use a normal lock until the threshold counter runs out.
|
||||
Lost updates possible. */
|
||||
(*adapt_count)--;
|
||||
atomic_store_relaxed (adapt_count,
|
||||
atomic_load_relaxed (adapt_count) - 1);
|
||||
}
|
||||
|
||||
/* Use a normal lock here. */
|
||||
|
@ -36,8 +36,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
|
||||
return an error. */
|
||||
_xabort (_ABORT_NESTED_TRYLOCK);
|
||||
|
||||
/* Only try a transaction if it's worth it. */
|
||||
if (*adapt_count <= 0)
|
||||
/* Only try a transaction if it's worth it. See __lll_lock_elision for
|
||||
why we need atomic accesses. Relaxed MO is sufficient because this is
|
||||
just a hint. */
|
||||
if (atomic_load_relaxed (adapt_count) <= 0)
|
||||
{
|
||||
unsigned status;
|
||||
|
||||
@ -55,16 +57,18 @@ __lll_trylock_elision (int *futex, short *adapt_count)
|
||||
if (!(status & _XABORT_RETRY))
|
||||
{
|
||||
/* Internal abort. No chance for retry. For future
|
||||
locks don't try speculation for some time. */
|
||||
if (*adapt_count != aconf.skip_trylock_internal_abort)
|
||||
*adapt_count = aconf.skip_trylock_internal_abort;
|
||||
locks don't try speculation for some time. See above for MO. */
|
||||
if (atomic_load_relaxed (adapt_count)
|
||||
!= aconf.skip_lock_internal_abort)
|
||||
atomic_store_relaxed (adapt_count, aconf.skip_lock_internal_abort);
|
||||
}
|
||||
/* Could do some retries here. */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Lost updates are possible, but harmless. */
|
||||
(*adapt_count)--;
|
||||
/* Lost updates are possible but harmless (see above). */
|
||||
atomic_store_relaxed (adapt_count,
|
||||
atomic_load_relaxed (adapt_count) - 1);
|
||||
}
|
||||
|
||||
return lll_trylock (*futex);
|
||||
|
@ -20,8 +20,8 @@
|
||||
|
||||
#include <hle.h>
|
||||
#include <elision-conf.h>
|
||||
#include <atomic.h>
|
||||
|
||||
#define ACCESS_ONCE(x) (* (volatile typeof(x) *) &(x))
|
||||
|
||||
/* Adapt elision with ADAPT_COUNT and STATUS and decide retries. */
|
||||
|
||||
@ -35,28 +35,35 @@ elision_adapt(signed char *adapt_count, unsigned int status)
|
||||
{
|
||||
/* Right now we skip here. Better would be to wait a bit
|
||||
and retry. This likely needs some spinning. Be careful
|
||||
to avoid writing the lock. */
|
||||
if (*adapt_count != __elision_aconf.skip_lock_busy)
|
||||
ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_busy;
|
||||
to avoid writing the lock.
|
||||
Using relaxed MO and separate atomic accesses is sufficient because
|
||||
adapt_count is just a hint. */
|
||||
if (atomic_load_relaxed (adapt_count) != __elision_aconf.skip_lock_busy)
|
||||
atomic_store_relaxed (adapt_count, __elision_aconf.skip_lock_busy);
|
||||
}
|
||||
/* Internal abort. There is no chance for retry.
|
||||
Use the normal locking and next time use lock.
|
||||
Be careful to avoid writing to the lock. */
|
||||
else if (*adapt_count != __elision_aconf.skip_lock_internal_abort)
|
||||
ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_internal_abort;
|
||||
Be careful to avoid writing to the lock. See above for MO. */
|
||||
else if (atomic_load_relaxed (adapt_count)
|
||||
!= __elision_aconf.skip_lock_internal_abort)
|
||||
atomic_store_relaxed (adapt_count,
|
||||
__elision_aconf.skip_lock_internal_abort);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* is_lock_free must be executed inside the transaction */
|
||||
|
||||
/* Returns true if lock defined by IS_LOCK_FREE was elided.
|
||||
ADAPT_COUNT is a pointer to per-lock state variable. */
|
||||
ADAPT_COUNT is a per-lock state variable; it must be accessed atomically
|
||||
to avoid data races but is just a hint, so using relaxed MO and separate
|
||||
atomic loads and stores instead of atomic read-modify-write operations is
|
||||
sufficient. */
|
||||
|
||||
#define ELIDE_LOCK(adapt_count, is_lock_free) \
|
||||
({ \
|
||||
int ret = 0; \
|
||||
\
|
||||
if ((adapt_count) <= 0) \
|
||||
if (atomic_load_relaxed (&(adapt_count)) <= 0) \
|
||||
{ \
|
||||
for (int i = __elision_aconf.retry_try_xbegin; i > 0; i--) \
|
||||
{ \
|
||||
@ -75,12 +82,13 @@ elision_adapt(signed char *adapt_count, unsigned int status)
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
(adapt_count)--; /* missing updates ok */ \
|
||||
atomic_store_relaxed (&(adapt_count), \
|
||||
atomic_load_relaxed (&(adapt_count)) - 1); \
|
||||
ret; \
|
||||
})
|
||||
|
||||
/* Returns true if lock defined by IS_LOCK_FREE was try-elided.
|
||||
ADAPT_COUNT is a pointer to per-lock state variable. */
|
||||
ADAPT_COUNT is a per-lock state variable. */
|
||||
|
||||
#define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) ({ \
|
||||
int ret = 0; \
|
||||
|
Loading…
Reference in New Issue
Block a user