Add adaptive elision to rwlocks

This patch relies on the C version of the rwlocks posted earlier.
With C rwlocks it is very straight forward to do adaptive elision
using TSX. It is based on the infrastructure added earlier
for mutexes, but uses its own elision macros. The macros
are fairly general purpose and could be used for other
elision purposes too.

This version is much cleaner than the earlier assembler based
version, and in particular implements adaptation which makes
it safer.

I changed the behavior slightly to not require any changes
in the test suite and fully conform to all expected
behaviors (generally at the cost of not eliding in
various situations). In particular this means the timedlock
variants are not elided.  Nested trylock aborts.
This commit is contained in:
Andi Kleen 2014-03-31 08:07:46 -07:00
parent a832bdd362
commit 8491ed6d70
23 changed files with 227 additions and 5 deletions

View File

@ -1,3 +1,51 @@
2014-06-13 Andi Kleen <ak@linux.intel.com>
* nptl/pthread_rwlock_rdlock.c: Include elide.h.
(pthread_rwlock_rdlock): Add elision.
* nptl/pthread_rwlock_wrlock.c: Include elide.h.
(pthread_rwlock_wrlock): Add elision.
* nptl/pthread_rwlock_trywrlock.c: Include elide.h.
(pthread_rwlock_trywrlock): Add elision.
* nptl/pthread_rwlock_tryrdlock.c: Include elide.h.
(pthread_rwlock_tryrdlock): Add elision.
* nptl/pthread_rwlock_unlock.c: Include elide.h.
(pthread_rwlock_tryrdlock): Add elision unlock.
* nptl/sysdeps/pthread/pthread.h:
(__PTHREAD_RWLOCK_ELISION_EXTRA): Handle new define
(PTHREAD_RWLOCK_INITIALIZER,
PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP):
Handle new elision field.
* sysdeps/x86/nptl/elide.h: New file. Add generic elision macros.
* sysdeps/arm/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/sh/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/tile/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/a/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/unix/sysv/linux/aarch64/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/unix/sysv/linux/alpha/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/unix/sysv/linux/hppa/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/unix/sysv/linux/ia64/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/unix/sysv/linux/m68k/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/unix/sysv/linux/microblaze/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/unix/sysv/linux/mips/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/unix/sysv/linux/powerpc/nptl/bits/pthreadtypes.h
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
* sysdeps/unix/sysv/linux/x86/elision-conf.c:
(elision_init): Set try_xbegin to zero when no RTM.
* sysdeps/x86/nptl/bits/pthreadtypes.h
(pthread_rwlock_t): Change __pad1 to __rwelision.
(__PTHREAD_RWLOCK_ELISION_EXTRA): Add.
2014-06-13 Andi Kleen <ak@linux.intel.com> 2014-06-13 Andi Kleen <ak@linux.intel.com>
* nptl/pthread_rwlock_rdlock (__pthread_rwlock_rdlock): * nptl/pthread_rwlock_rdlock (__pthread_rwlock_rdlock):

View File

@ -22,6 +22,7 @@
#include <pthread.h> #include <pthread.h>
#include <pthreadP.h> #include <pthreadP.h>
#include <stap-probe.h> #include <stap-probe.h>
#include <elide.h>
/* Acquire read lock for RWLOCK. Slow path. */ /* Acquire read lock for RWLOCK. Slow path. */
@ -102,6 +103,12 @@ __pthread_rwlock_rdlock (pthread_rwlock_t *rwlock)
LIBC_PROBE (rdlock_entry, 1, rwlock); LIBC_PROBE (rdlock_entry, 1, rwlock);
if (ELIDE_LOCK (rwlock->__data.__rwelision,
rwlock->__data.__lock == 0
&& rwlock->__data.__writer == 0
&& rwlock->__data.__nr_readers == 0))
return 0;
/* Make sure we are alone. */ /* Make sure we are alone. */
lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);

View File

@ -19,6 +19,7 @@
#include <errno.h> #include <errno.h>
#include "pthreadP.h" #include "pthreadP.h"
#include <lowlevellock.h> #include <lowlevellock.h>
#include <elide.h>
int int
@ -26,6 +27,12 @@ __pthread_rwlock_tryrdlock (pthread_rwlock_t *rwlock)
{ {
int result = EBUSY; int result = EBUSY;
if (ELIDE_TRYLOCK (rwlock->__data.__rwelision,
rwlock->__data.__lock == 0
&& rwlock->__data.__nr_readers == 0
&& rwlock->__data.__writer, 0))
return 0;
lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
if (rwlock->__data.__writer == 0 if (rwlock->__data.__writer == 0

View File

@ -19,6 +19,7 @@
#include <errno.h> #include <errno.h>
#include "pthreadP.h" #include "pthreadP.h"
#include <lowlevellock.h> #include <lowlevellock.h>
#include <elide.h>
int int
@ -26,6 +27,12 @@ __pthread_rwlock_trywrlock (pthread_rwlock_t *rwlock)
{ {
int result = EBUSY; int result = EBUSY;
if (ELIDE_TRYLOCK (rwlock->__data.__rwelision,
rwlock->__data.__lock == 0
&& rwlock->__data.__nr_readers == 0
&& rwlock->__data.__writer, 1))
return 0;
lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
if (rwlock->__data.__writer == 0 && rwlock->__data.__nr_readers == 0) if (rwlock->__data.__writer == 0 && rwlock->__data.__nr_readers == 0)

View File

@ -22,6 +22,8 @@
#include <pthread.h> #include <pthread.h>
#include <pthreadP.h> #include <pthreadP.h>
#include <stap-probe.h> #include <stap-probe.h>
#include <elide.h>
/* Unlock RWLOCK. */ /* Unlock RWLOCK. */
int int
@ -29,6 +31,10 @@ __pthread_rwlock_unlock (pthread_rwlock_t *rwlock)
{ {
LIBC_PROBE (rwlock_unlock, 1, rwlock); LIBC_PROBE (rwlock_unlock, 1, rwlock);
if (ELIDE_UNLOCK (rwlock->__data.__writer == 0
&& rwlock->__data.__nr_readers == 0))
return 0;
lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
if (rwlock->__data.__writer) if (rwlock->__data.__writer)
rwlock->__data.__writer = 0; rwlock->__data.__writer = 0;

View File

@ -22,6 +22,7 @@
#include <pthread.h> #include <pthread.h>
#include <pthreadP.h> #include <pthreadP.h>
#include <stap-probe.h> #include <stap-probe.h>
#include <elide.h>
/* Acquire write lock for RWLOCK. */ /* Acquire write lock for RWLOCK. */
@ -91,6 +92,12 @@ __pthread_rwlock_wrlock (pthread_rwlock_t *rwlock)
{ {
LIBC_PROBE (wrlock_entry, 1, rwlock); LIBC_PROBE (wrlock_entry, 1, rwlock);
if (ELIDE_LOCK (rwlock->__data.__rwelision,
rwlock->__data.__lock == 0
&& rwlock->__data.__writer == 0
&& rwlock->__data.__nr_readers == 0))
return 0;
/* Make sure we are alone. */ /* Make sure we are alone. */
lll_lock (rwlock->__data.__lock, rwlock->__data.__shared); lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);

View File

@ -214,6 +214,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -155,6 +155,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -132,17 +132,17 @@ enum
/* Read-write lock initializers. */ /* Read-write lock initializers. */
# define PTHREAD_RWLOCK_INITIALIZER \ # define PTHREAD_RWLOCK_INITIALIZER \
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } { { 0, 0, 0, 0, 0, 0, 0, 0, __PTHREAD_RWLOCK_ELISION_EXTRA, 0, 0 } }
# ifdef __USE_GNU # ifdef __USE_GNU
# ifdef __PTHREAD_RWLOCK_INT_FLAGS_SHARED # ifdef __PTHREAD_RWLOCK_INT_FLAGS_SHARED
# define PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP \ # define PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP \
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ { { 0, 0, 0, 0, 0, 0, 0, 0, __PTHREAD_RWLOCK_ELISION_EXTRA, 0, \
PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP } } PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP } }
# else # else
# if __BYTE_ORDER == __LITTLE_ENDIAN # if __BYTE_ORDER == __LITTLE_ENDIAN
# define PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP \ # define PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP \
{ { 0, 0, 0, 0, 0, 0, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP, \ { { 0, 0, 0, 0, 0, 0, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP, \
0, 0, 0, 0 } } 0, __PTHREAD_RWLOCK_ELISION_EXTRA, 0, 0 } }
# else # else
# define PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP \ # define PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP \
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP,\ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP,\

View File

@ -155,6 +155,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -194,6 +194,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -194,6 +194,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -141,6 +141,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -142,6 +142,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -196,6 +196,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -143,6 +143,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -146,6 +146,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -150,6 +150,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -203,6 +203,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -194,6 +194,8 @@ typedef union
long int __align; long int __align;
} pthread_rwlock_t; } pthread_rwlock_t;
#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
typedef union typedef union
{ {
char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T]; char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];

View File

@ -66,6 +66,8 @@ elision_init (int argc __attribute__ ((unused)),
#ifdef ENABLE_LOCK_ELISION #ifdef ENABLE_LOCK_ELISION
__pthread_force_elision = __libc_enable_secure ? 0 : __elision_available; __pthread_force_elision = __libc_enable_secure ? 0 : __elision_available;
#endif #endif
if (!HAS_RTM)
__elision_aconf.retry_try_xbegin = 0; /* Disable elision on rwlocks */
} }
#ifdef SHARED #ifdef SHARED

View File

@ -184,11 +184,13 @@ typedef union
unsigned int __nr_writers_queued; unsigned int __nr_writers_queued;
int __writer; int __writer;
int __shared; int __shared;
unsigned long int __pad1; signed char __rwelision;
unsigned char __pad1[7];
unsigned long int __pad2; unsigned long int __pad2;
/* FLAGS must stay at this position in the structure to maintain /* FLAGS must stay at this position in the structure to maintain
binary compatibility. */ binary compatibility. */
unsigned int __flags; unsigned int __flags;
# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, {0, 0, 0, 0, 0, 0, 0 }
# define __PTHREAD_RWLOCK_INT_FLAGS_SHARED 1 # define __PTHREAD_RWLOCK_INT_FLAGS_SHARED 1
} __data; } __data;
# else # else
@ -204,7 +206,8 @@ typedef union
binary compatibility. */ binary compatibility. */
unsigned char __flags; unsigned char __flags;
unsigned char __shared; unsigned char __shared;
unsigned char __pad1; signed char __rwelision;
# define __PTHREAD_RWLOCK_ELISION_EXTRA 0
unsigned char __pad2; unsigned char __pad2;
int __writer; int __writer;
} __data; } __data;

109
sysdeps/x86/nptl/elide.h Normal file
View File

@ -0,0 +1,109 @@
/* elide.h: Generic lock elision support.
Copyright (C) 2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#ifndef ELIDE_H
#define ELIDE_H 1
#include <hle.h>
#include <elision-conf.h>
#define ACCESS_ONCE(x) (* (volatile typeof(x) *) &(x))
/* Adapt elision with ADAPT_COUNT and STATUS and decide retries. */
static inline bool
elision_adapt(uint8_t *adapt_count, unsigned int status)
{
if (status & _XABORT_RETRY)
return false;
if ((status & _XABORT_EXPLICIT)
&& _XABORT_CODE (status) == _ABORT_LOCK_BUSY)
{
/* Right now we skip here. Better would be to wait a bit
and retry. This likely needs some spinning. Be careful
to avoid writing the lock. */
if (*adapt_count != __elision_aconf.skip_lock_busy)
ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_busy;
}
/* Internal abort. There is no chance for retry.
Use the normal locking and next time use lock.
Be careful to avoid writing to the lock. */
else if (*adapt_count != __elision_aconf.skip_lock_internal_abort)
ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_internal_abort;
return true;
}
/* is_lock_free must be executed inside the transaction */
/* Returns true if lock defined by IS_LOCK_FREE was elided.
ADAPT_COUNT is a pointer to per-lock state variable. */
#define ELIDE_LOCK(adapt_count, is_lock_free) \
({ \
int ret = 0; \
\
if ((adapt_count) <= 0) \
{ \
for (int i = __elision_aconf.retry_try_xbegin; i > 0; i--) \
{ \
unsigned int status; \
if ((status = _xbegin ()) == _XBEGIN_STARTED) \
{ \
if (is_lock_free) \
{ \
ret = 1; \
break; \
} \
_xabort (_ABORT_LOCK_BUSY); \
} \
if (!elision_adapt (&(adapt_count), status)) \
break; \
} \
} \
else \
(adapt_count)--; /* missing updates ok */ \
ret; \
})
/* Returns true if lock defined by IS_LOCK_FREE was try-elided.
ADAPT_COUNT is a pointer to per-lock state variable. */
#define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) ({ \
int ret = 0; \
if (__elision_aconf.retry_try_xbegin > 0) \
{ \
if (write) \
_xabort (_ABORT_NESTED_TRYLOCK); \
ret = ELIDE_LOCK (adapt_count, is_lock_free); \
} \
ret; \
})
/* Returns true if lock defined by IS_LOCK_FREE was elided. */
#define ELIDE_UNLOCK(is_lock_free) \
({ \
int ret = 0; \
if (is_lock_free) \
{ \
_xend (); \
ret = 1; \
} \
ret; \
})
#endif