powerpc __tls_get_addr call optimization

This patch is glibc support for a PowerPC TLS optimization, inspired
by Alexandre Oliva's TLS optimization for other processors,
http://www.lsd.ic.unicamp.br/~oliva/writeups/TLS/RFC-TLSDESC-x86.txt

In essence, this optimization uses a zero module id in the tls_index
GOT entry to indicate that a TLS variable is allocated space in the
static TLS area.  A special plt call linker stub for __tls_get_addr
checks for such a tls_index and if found, returns the offset
immediately.  The linker communicates the fact that the special
__tls_get_addr stub is used by setting a bit in the dynamic tag
DT_PPC64_OPT/DT_PPC_OPT.  glibc communicates to the linker that this
optimization is available by the presence of __tls_get_addr_opt.

tst-tlsmod2.so is built with -Wl,--no-tls-get-addr-optimize for
tst-tls-dlinfo, which otherwise would fail since it tests that no
static tls is allocated.  The ld option --no-tls-get-addr-optimize has
been available since binutils-2.20 so doesn't need a configure test.

	* NEWS: Advertise TLS optimization.
	* elf/elf.h (R_PPC_TLSGD, R_PPC_TLSLD, DT_PPC_OPT, PPC_OPT_TLS): Define.
	(DT_PPC_NUM): Increment.
	* elf/dynamic-link.h (HAVE_STATIC_TLS): Define.
	(CHECK_STATIC_TLS): Use here.
	* sysdeps/powerpc/powerpc32/dl-machine.h (elf_machine_rela): Optimize
	TLS descriptors.
	* sysdeps/powerpc/powerpc64/dl-machine.h (elf_machine_rela): Likewise.
	* sysdeps/powerpc/dl-tls.c: New file.
	* sysdeps/powerpc/Versions: Add __tls_get_addr_opt.
	* sysdeps/powerpc/tst-tlsopt-powerpc.c: New tls test.
	* sysdeps/unix/sysv/linux/powerpc/Makefile: Add new test.
	Build tst-tlsmod2.so with --no-tls-get-addr-optimize.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/ld.abilist: Update.
	* sysdeps/unix/sysv/linux/powerpc/powerpc64/ld.abilist: Likewise.
	* sysdeps/unix/sysv/linux/powerpc/powerpc64/ld-le.abilist: Likewise.
This commit is contained in:
Alan Modra 2015-03-25 15:53:47 +10:30
parent da9f333410
commit afcd9480fe
14 changed files with 228 additions and 4 deletions

View File

@ -1,3 +1,22 @@
2015-03-25 Alan Modra <amodra@gmail.com>
* NEWS: Advertise TLS optimization.
* elf/elf.h (R_PPC_TLSGD, R_PPC_TLSLD, DT_PPC_OPT, PPC_OPT_TLS): Define.
(DT_PPC_NUM): Increment.
* elf/dynamic-link.h (HAVE_STATIC_TLS): Define.
(CHECK_STATIC_TLS): Use here.
* sysdeps/powerpc/powerpc32/dl-machine.h (elf_machine_rela): Optimize
TLS descriptors.
* sysdeps/powerpc/powerpc64/dl-machine.h (elf_machine_rela): Likewise.
* sysdeps/powerpc/dl-tls.c: New file.
* sysdeps/powerpc/Versions: Add __tls_get_addr_opt.
* sysdeps/powerpc/tst-tlsopt-powerpc.c: New tls test.
* sysdeps/unix/sysv/linux/powerpc/Makefile: Add new test.
Build tst-tlsmod2.so with --no-tls-get-addr-optimize.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/ld.abilist: Update.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/ld.abilist: Likewise.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/ld-le.abilist: Likewise.
2015-03-25 Alan Modra <amodra@gmail.com>
* sysdeps/powerpc/powerpc64/configure.ac: Correct "linker support

4
NEWS
View File

@ -17,6 +17,10 @@ Version 2.22
18039, 18042, 18043, 18046, 18047, 18068, 18080, 18093, 18100, 18104,
18110, 18111, 18128, 18138.
* A powerpc and powerpc64 optimization for TLS, similar to TLS descriptors
for LD and GD on x86 and x86-64, has been implemented. You will need
binutils-2.24 or later to enable this optimization.
* Character encoding and ctype tables were updated to Unicode 7.0.0, using
new generator scripts contributed by Pravin Satpute and Mike FABIAN (Red
Hat). These updates cause user visible changes, such as the fix for bug

View File

@ -25,11 +25,14 @@
an attempt to allocate it in surplus space on the fly. If that
can't be done, we fall back to the error that DF_STATIC_TLS is
intended to produce. */
#define HAVE_STATIC_TLS(map, sym_map) \
(__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET \
&& ((sym_map)->l_tls_offset \
!= FORCED_DYNAMIC_TLS_OFFSET), 1))
#define CHECK_STATIC_TLS(map, sym_map) \
do { \
if (__builtin_expect ((sym_map)->l_tls_offset == NO_TLS_OFFSET \
|| ((sym_map)->l_tls_offset \
== FORCED_DYNAMIC_TLS_OFFSET), 0)) \
if (!HAVE_STATIC_TLS (map, sym_map)) \
_dl_allocate_static_tls (sym_map); \
} while (0)

View File

@ -2194,6 +2194,8 @@ enum
#define R_PPC_GOT_DTPREL16_LO 92 /* half16* (sym+add)@got@dtprel@l */
#define R_PPC_GOT_DTPREL16_HI 93 /* half16* (sym+add)@got@dtprel@h */
#define R_PPC_GOT_DTPREL16_HA 94 /* half16* (sym+add)@got@dtprel@ha */
#define R_PPC_TLSGD 95 /* none (sym+add)@tlsgd */
#define R_PPC_TLSLD 96 /* none (sym+add)@tlsld */
/* The remaining relocs are from the Embedded ELF ABI, and are not
in the SVR4 ELF ABI. */
@ -2237,7 +2239,11 @@ enum
/* PowerPC specific values for the Dyn d_tag field. */
#define DT_PPC_GOT (DT_LOPROC + 0)
#define DT_PPC_NUM 1
#define DT_PPC_OPT (DT_LOPROC + 1)
#define DT_PPC_NUM 2
/* PowerPC specific values for the DT_PPC_OPT Dyn entry. */
#define PPC_OPT_TLS 1
/* PowerPC64 relocations defined by the ABIs */
#define R_PPC64_NONE R_PPC_NONE

View File

@ -8,6 +8,9 @@ sysdep-dl-routines += dl-machine
sysdep_routines += dl-machine
# extra shared linker files to link only into dl-allobjs.so
sysdep-rtld-routines += dl-machine
# Don't optimize GD tls sequence to LE.
LDFLAGS-tst-tlsopt-powerpc += -Wl,--no-tls-optimize
tests += tst-tlsopt-powerpc
endif
ifeq ($(subdir),setjmp)

View File

@ -15,3 +15,9 @@ libc {
__vmx__libc_longjmp; __vmx__libc_siglongjmp;
}
}
ld {
GLIBC_2.22 {
__tls_get_addr_opt;
}
}

24
sysdeps/powerpc/dl-tls.c Normal file
View File

@ -0,0 +1,24 @@
/* Thread-local storage handling in the ELF dynamic linker. PowerPC version.
Copyright (C) 2009-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include "elf/dl-tls.c"
#ifdef SHARED
strong_alias(__tls_get_addr, __tls_get_addr_opt)
#endif

View File

@ -333,6 +333,32 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
# endif
case R_PPC_DTPMOD32:
if (map->l_info[DT_PPC(OPT)]
&& (map->l_info[DT_PPC(OPT)]->d_un.d_val & PPC_OPT_TLS))
{
if (!NOT_BOOTSTRAP)
{
reloc_addr[0] = 0;
reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ TLS_DTV_OFFSET);
break;
}
else if (sym_map != NULL)
{
# ifndef SHARED
CHECK_STATIC_TLS (map, sym_map);
# else
if (TRY_STATIC_TLS (map, sym_map))
# endif
{
reloc_addr[0] = 0;
/* Set up for local dynamic. */
reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ TLS_DTV_OFFSET);
break;
}
}
}
if (!NOT_BOOTSTRAP)
/* During startup the dynamic linker is always index 1. */
*reloc_addr = 1;
@ -342,6 +368,28 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
*reloc_addr = sym_map->l_tls_modid;
break;
case R_PPC_DTPREL32:
if (map->l_info[DT_PPC(OPT)]
&& (map->l_info[DT_PPC(OPT)]->d_un.d_val & PPC_OPT_TLS))
{
if (!NOT_BOOTSTRAP)
{
*reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
break;
}
else if (sym_map != NULL)
{
/* This reloc is always preceded by R_PPC_DTPMOD32. */
# ifndef SHARED
assert (HAVE_STATIC_TLS (map, sym_map));
# else
if (HAVE_STATIC_TLS (map, sym_map))
# endif
{
*reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
break;
}
}
}
/* During relocation all TLS symbols are defined and used.
Therefore the offset is already correct. */
if (NOT_BOOTSTRAP && sym_map != NULL)

View File

@ -701,6 +701,32 @@ elf_machine_rela (struct link_map *map,
return;
case R_PPC64_DTPMOD64:
if (map->l_info[DT_PPC64(OPT)]
&& (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_TLS))
{
#ifdef RTLD_BOOTSTRAP
reloc_addr[0] = 0;
reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ TLS_DTV_OFFSET);
return;
#else
if (sym_map != NULL)
{
# ifndef SHARED
CHECK_STATIC_TLS (map, sym_map);
# else
if (TRY_STATIC_TLS (map, sym_map))
# endif
{
reloc_addr[0] = 0;
/* Set up for local dynamic. */
reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ TLS_DTV_OFFSET);
return;
}
}
#endif
}
#ifdef RTLD_BOOTSTRAP
/* During startup the dynamic linker is always index 1. */
*reloc_addr = 1;
@ -713,6 +739,28 @@ elf_machine_rela (struct link_map *map,
return;
case R_PPC64_DTPREL64:
if (map->l_info[DT_PPC64(OPT)]
&& (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_TLS))
{
#ifdef RTLD_BOOTSTRAP
*reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
return;
#else
if (sym_map != NULL)
{
/* This reloc is always preceded by R_PPC64_DTPMOD64. */
# ifndef SHARED
assert (HAVE_STATIC_TLS (map, sym_map));
# else
if (HAVE_STATIC_TLS (map, sym_map))
# endif
{
*reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
return;
}
}
#endif
}
/* During relocation all TLS symbols are defined and used.
Therefore the offset is already correct. */
#ifndef RTLD_BOOTSTRAP

View File

@ -0,0 +1,52 @@
/* glibc test for __tls_get_addr optimization. */
#include <stdio.h>
#include "../../elf/tls-macros.h"
#include "dl-tls.h"
/* common 'int' variable in TLS. */
COMMON_INT_DEF(foo);
#define TEST_FUNCTION do_test ()
static int
do_test (void)
{
int result = 0;
/* Get variable using general dynamic model. */
int *ap = TLS_GD (foo);
if (*ap != 0)
{
printf ("foo = %d\n", *ap);
result = 1;
}
tls_index *tls_arg;
#ifdef __powerpc64__
register unsigned long thread_pointer __asm__ ("r13");
asm ("addi %0,2,foo@got@tlsgd" : "=r" (tls_arg));
#else
register unsigned long thread_pointer __asm__ ("r2");
asm ("bcl 20,31,1f\n1:\t"
"mflr %0\n\t"
"addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t"
"addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n\t"
"addi %0,%0,foo@got@tlsgd" : "=b" (tls_arg));
#endif
if (tls_arg->ti_module != 0)
{
printf ("tls_index not optimized, binutils too old?\n");
result = 1;
}
else if (tls_arg->ti_offset + thread_pointer != (unsigned long) ap)
{
printf ("tls_index->ti_offset wrong value\n");
result = 1;
}
return result;
}
#include "../../test-skeleton.c"

View File

@ -20,6 +20,8 @@ ifeq ($(build-shared),yes)
# This is needed for DSO loading from static binaries.
sysdep-dl-routines += dl-static
endif
# Otherwise tst-tls-dlinfo fails due to tst-tlsmod2.so using static tls.
LDFLAGS-tst-tlsmod2.so += -Wl,--no-tls-get-addr-optimize
endif
ifeq ($(subdir),misc)

View File

@ -10,6 +10,9 @@ GLIBC_2.1
GLIBC_2.1 A
__libc_stack_end D 0x4
_dl_mcount F
GLIBC_2.22
GLIBC_2.22 A
__tls_get_addr_opt F
GLIBC_2.3
GLIBC_2.3 A
__tls_get_addr F

View File

@ -9,3 +9,6 @@ GLIBC_2.17
free F
malloc F
realloc F
GLIBC_2.22
GLIBC_2.22 A
__tls_get_addr_opt F

View File

@ -1,3 +1,6 @@
GLIBC_2.22
GLIBC_2.22 A
__tls_get_addr_opt F
GLIBC_2.3
GLIBC_2.3 A
__libc_memalign F