LoongArch: Add vector implementation for _dl_runtime_resolve.

This commit is contained in:
caiyinyu 2023-07-05 16:38:05 +08:00
parent 0d341d09f2
commit 7f079fdc16
8 changed files with 246 additions and 71 deletions

View File

@ -270,6 +270,10 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* If using PLTs, fill in the first two entries of .got.plt. */
if (l->l_info[DT_JMPREL])
{
#if HAVE_LOONGARCH_VEC_ASM
extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
#endif
extern void _dl_runtime_resolve (void) attribute_hidden;
extern void _dl_runtime_profile (void) attribute_hidden;
@ -296,7 +300,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* This function will get called to fix up the GOT entry
indicated by the offset on the stack, and then jump to
the resolved address. */
gotplt[0] = (ElfW (Addr)) & _dl_runtime_resolve;
#if HAVE_LOONGARCH_VEC_ASM
if (SUPPORT_LASX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
else if (SUPPORT_LSX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
else
#endif
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve;
}
gotplt[1] = (ElfW (Addr)) l;
}

View File

@ -19,78 +19,26 @@
#include <sysdep.h>
#include <sys/asm.h>
#if HAVE_LOONGARCH_VEC_ASM
#define USE_LASX
#define _dl_runtime_resolve _dl_runtime_resolve_lasx
#include "dl-trampoline.h"
#undef FRAME_SIZE
#undef USE_LASX
#undef _dl_runtime_resolve
#define USE_LSX
#define _dl_runtime_resolve _dl_runtime_resolve_lsx
#include "dl-trampoline.h"
#undef FRAME_SIZE
#undef USE_LSX
#undef _dl_runtime_resolve
#endif
#include "dl-trampoline.h"
#include "dl-link.h"
/* Assembler veneer called from the PLT header code for lazy loading.
The PLT header passes its own args in t0-t2. */
#ifdef __loongarch_soft_float
#define FRAME_SIZE (-((-10 * SZREG) & ALMASK))
#else
#define FRAME_SIZE (-((-10 * SZREG - 8 * SZFREG) & ALMASK))
#endif
ENTRY (_dl_runtime_resolve)
/* Save arguments to stack. */
ADDI sp, sp, -FRAME_SIZE
REG_S ra, sp, 9*SZREG
REG_S a0, sp, 1*SZREG
REG_S a1, sp, 2*SZREG
REG_S a2, sp, 3*SZREG
REG_S a3, sp, 4*SZREG
REG_S a4, sp, 5*SZREG
REG_S a5, sp, 6*SZREG
REG_S a6, sp, 7*SZREG
REG_S a7, sp, 8*SZREG
#ifndef __loongarch_soft_float
FREG_S fa0, sp, 10*SZREG + 0*SZFREG
FREG_S fa1, sp, 10*SZREG + 1*SZFREG
FREG_S fa2, sp, 10*SZREG + 2*SZFREG
FREG_S fa3, sp, 10*SZREG + 3*SZFREG
FREG_S fa4, sp, 10*SZREG + 4*SZFREG
FREG_S fa5, sp, 10*SZREG + 5*SZFREG
FREG_S fa6, sp, 10*SZREG + 6*SZFREG
FREG_S fa7, sp, 10*SZREG + 7*SZFREG
#endif
/* Update .got.plt and obtain runtime address of callee */
SLLI a1, t1, 1
or a0, t0, zero
ADD a1, a1, t1
la a2, _dl_fixup
jirl ra, a2, 0
or t1, v0, zero
/* Restore arguments from stack. */
REG_L ra, sp, 9*SZREG
REG_L a0, sp, 1*SZREG
REG_L a1, sp, 2*SZREG
REG_L a2, sp, 3*SZREG
REG_L a3, sp, 4*SZREG
REG_L a4, sp, 5*SZREG
REG_L a5, sp, 6*SZREG
REG_L a6, sp, 7*SZREG
REG_L a7, sp, 8*SZREG
#ifndef __loongarch_soft_float
FREG_L fa0, sp, 10*SZREG + 0*SZFREG
FREG_L fa1, sp, 10*SZREG + 1*SZFREG
FREG_L fa2, sp, 10*SZREG + 2*SZFREG
FREG_L fa3, sp, 10*SZREG + 3*SZFREG
FREG_L fa4, sp, 10*SZREG + 4*SZFREG
FREG_L fa5, sp, 10*SZREG + 5*SZFREG
FREG_L fa6, sp, 10*SZREG + 6*SZFREG
FREG_L fa7, sp, 10*SZREG + 7*SZFREG
#endif
ADDI sp, sp, FRAME_SIZE
/* Invoke the callee. */
jirl zero, t1, 0
END (_dl_runtime_resolve)
ENTRY (_dl_runtime_profile)
/* LoongArch we get called with:
t0 linkr_map pointer

View File

@ -0,0 +1,129 @@
/* PLT trampolines.
Copyright (C) 2022-2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
/* Assembler veneer called from the PLT header code for lazy loading.
The PLT header passes its own args in t0-t2. */
#ifndef __loongarch_soft_float
# ifdef USE_LASX
# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZXREG) & ALMASK))
# elif defined USE_LSX
# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZVREG) & ALMASK))
# else
# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG) & ALMASK))
# endif
#else
# define FRAME_SIZE (-((-9 * SZREG) & ALMASK))
#endif
ENTRY (_dl_runtime_resolve)
/* Save arguments to stack. */
ADDI sp, sp, -FRAME_SIZE
REG_S ra, sp, 0*SZREG
REG_S a0, sp, 1*SZREG
REG_S a1, sp, 2*SZREG
REG_S a2, sp, 3*SZREG
REG_S a3, sp, 4*SZREG
REG_S a4, sp, 5*SZREG
REG_S a5, sp, 6*SZREG
REG_S a6, sp, 7*SZREG
REG_S a7, sp, 8*SZREG
#ifdef USE_LASX
xvst xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
xvst xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
xvst xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
xvst xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
xvst xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
xvst xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
xvst xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
xvst xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
#elif defined USE_LSX
vst vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
vst vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
vst vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
vst vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
vst vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
vst vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
vst vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
vst vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
#elif !defined __loongarch_soft_float
FREG_S fa0, sp, 9*SZREG + 0*SZFREG
FREG_S fa1, sp, 9*SZREG + 1*SZFREG
FREG_S fa2, sp, 9*SZREG + 2*SZFREG
FREG_S fa3, sp, 9*SZREG + 3*SZFREG
FREG_S fa4, sp, 9*SZREG + 4*SZFREG
FREG_S fa5, sp, 9*SZREG + 5*SZFREG
FREG_S fa6, sp, 9*SZREG + 6*SZFREG
FREG_S fa7, sp, 9*SZREG + 7*SZFREG
#endif
/* Update .got.plt and obtain runtime address of callee */
SLLI a1, t1, 1
or a0, t0, zero
ADD a1, a1, t1
la a2, _dl_fixup
jirl ra, a2, 0
or t1, v0, zero
/* Restore arguments from stack. */
REG_L ra, sp, 0*SZREG
REG_L a0, sp, 1*SZREG
REG_L a1, sp, 2*SZREG
REG_L a2, sp, 3*SZREG
REG_L a3, sp, 4*SZREG
REG_L a4, sp, 5*SZREG
REG_L a5, sp, 6*SZREG
REG_L a6, sp, 7*SZREG
REG_L a7, sp, 8*SZREG
#ifdef USE_LASX
xvld xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
xvld xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
xvld xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
xvld xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
xvld xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
xvld xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
xvld xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
xvld xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
#elif defined USE_LSX
vld vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
vld vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
vld vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
vld vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
vld vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
vld vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
vld vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
vld vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
#elif !defined __loongarch_soft_float
FREG_L fa0, sp, 9*SZREG + 0*SZFREG
FREG_L fa1, sp, 9*SZREG + 1*SZFREG
FREG_L fa2, sp, 9*SZREG + 2*SZFREG
FREG_L fa3, sp, 9*SZREG + 3*SZFREG
FREG_L fa4, sp, 9*SZREG + 4*SZFREG
FREG_L fa5, sp, 9*SZREG + 5*SZFREG
FREG_L fa6, sp, 9*SZREG + 6*SZFREG
FREG_L fa7, sp, 9*SZREG + 7*SZFREG
#endif
ADDI sp, sp, FRAME_SIZE
/* Invoke the callee. */
jirl zero, t1, 0
END (_dl_runtime_resolve)

View File

@ -20,6 +20,7 @@
#define _LOONGARCH_LDSODEFS_H 1
#include <elf.h>
#include <cpu-features.h>
struct La_loongarch_regs;
struct La_loongarch_retval;

View File

@ -25,6 +25,8 @@
/* Macros to handle different pointer/register sizes for 32/64-bit code. */
#define SZREG 8
#define SZFREG 8
#define SZVREG 16
#define SZXREG 32
#define REG_L ld.d
#define REG_S st.d
#define SRLI srli.d

View File

@ -90,4 +90,22 @@
#define fs6 $f30
#define fs7 $f31
#define vr0 $vr0
#define vr1 $vr1
#define vr2 $vr2
#define vr3 $vr3
#define vr4 $vr4
#define vr5 $vr5
#define vr6 $vr6
#define vr7 $vr7
#define xr0 $xr0
#define xr1 $xr1
#define xr2 $xr2
#define xr3 $xr3
#define xr4 $xr4
#define xr5 $xr5
#define xr6 $xr6
#define xr7 $xr7
#endif /* _SYS_REGDEF_H */

View File

@ -0,0 +1,37 @@
/* Defines for bits in AT_HWCAP. LoongArch64 Linux version.
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#if !defined (_SYS_AUXV_H)
# error "Never include <bits/hwcap.h> directly; use <sys/auxv.h> instead."
#endif
/* The following must match the kernel's <asm/hwcap.h>. */
/* HWCAP flags */
#define HWCAP_LOONGARCH_CPUCFG (1 << 0)
#define HWCAP_LOONGARCH_LAM (1 << 1)
#define HWCAP_LOONGARCH_UAL (1 << 2)
#define HWCAP_LOONGARCH_FPU (1 << 3)
#define HWCAP_LOONGARCH_LSX (1 << 4)
#define HWCAP_LOONGARCH_LASX (1 << 5)
#define HWCAP_LOONGARCH_CRC32 (1 << 6)
#define HWCAP_LOONGARCH_COMPLEX (1 << 7)
#define HWCAP_LOONGARCH_CRYPTO (1 << 8)
#define HWCAP_LOONGARCH_LVZ (1 << 9)
#define HWCAP_LOONGARCH_LBT_X86 (1 << 10)
#define HWCAP_LOONGARCH_LBT_ARM (1 << 11)
#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12)

View File

@ -0,0 +1,29 @@
/* Initialize CPU feature data. LoongArch64 version.
This file is part of the GNU C Library.
Copyright (C) 2022 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#ifndef _CPU_FEATURES_LOONGARCH64_H
#define _CPU_FEATURES_LOONGARCH64_H
#include <sys/auxv.h>
#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
#endif /* _CPU_FEATURES_LOONGARCH64_H */