LoongArch: Add vector implementation for _dl_runtime_resolve.

This commit is contained in:
caiyinyu 2023-07-05 16:38:05 +08:00
parent 0d341d09f2
commit 7f079fdc16
8 changed files with 246 additions and 71 deletions

View File

@ -270,6 +270,10 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* If using PLTs, fill in the first two entries of .got.plt. */ /* If using PLTs, fill in the first two entries of .got.plt. */
if (l->l_info[DT_JMPREL]) if (l->l_info[DT_JMPREL])
{ {
#if HAVE_LOONGARCH_VEC_ASM
extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
#endif
extern void _dl_runtime_resolve (void) attribute_hidden; extern void _dl_runtime_resolve (void) attribute_hidden;
extern void _dl_runtime_profile (void) attribute_hidden; extern void _dl_runtime_profile (void) attribute_hidden;
@ -296,7 +300,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* This function will get called to fix up the GOT entry /* This function will get called to fix up the GOT entry
indicated by the offset on the stack, and then jump to indicated by the offset on the stack, and then jump to
the resolved address. */ the resolved address. */
gotplt[0] = (ElfW (Addr)) & _dl_runtime_resolve; #if HAVE_LOONGARCH_VEC_ASM
if (SUPPORT_LASX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
else if (SUPPORT_LSX)
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
else
#endif
gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve;
} }
gotplt[1] = (ElfW (Addr)) l; gotplt[1] = (ElfW (Addr)) l;
} }

View File

@ -19,78 +19,26 @@
#include <sysdep.h> #include <sysdep.h>
#include <sys/asm.h> #include <sys/asm.h>
#if HAVE_LOONGARCH_VEC_ASM
#define USE_LASX
#define _dl_runtime_resolve _dl_runtime_resolve_lasx
#include "dl-trampoline.h"
#undef FRAME_SIZE
#undef USE_LASX
#undef _dl_runtime_resolve
#define USE_LSX
#define _dl_runtime_resolve _dl_runtime_resolve_lsx
#include "dl-trampoline.h"
#undef FRAME_SIZE
#undef USE_LSX
#undef _dl_runtime_resolve
#endif
#include "dl-trampoline.h"
#include "dl-link.h" #include "dl-link.h"
/* Assembler veneer called from the PLT header code for lazy loading.
The PLT header passes its own args in t0-t2. */
#ifdef __loongarch_soft_float
#define FRAME_SIZE (-((-10 * SZREG) & ALMASK))
#else
#define FRAME_SIZE (-((-10 * SZREG - 8 * SZFREG) & ALMASK))
#endif
ENTRY (_dl_runtime_resolve)
/* Save arguments to stack. */
ADDI sp, sp, -FRAME_SIZE
REG_S ra, sp, 9*SZREG
REG_S a0, sp, 1*SZREG
REG_S a1, sp, 2*SZREG
REG_S a2, sp, 3*SZREG
REG_S a3, sp, 4*SZREG
REG_S a4, sp, 5*SZREG
REG_S a5, sp, 6*SZREG
REG_S a6, sp, 7*SZREG
REG_S a7, sp, 8*SZREG
#ifndef __loongarch_soft_float
FREG_S fa0, sp, 10*SZREG + 0*SZFREG
FREG_S fa1, sp, 10*SZREG + 1*SZFREG
FREG_S fa2, sp, 10*SZREG + 2*SZFREG
FREG_S fa3, sp, 10*SZREG + 3*SZFREG
FREG_S fa4, sp, 10*SZREG + 4*SZFREG
FREG_S fa5, sp, 10*SZREG + 5*SZFREG
FREG_S fa6, sp, 10*SZREG + 6*SZFREG
FREG_S fa7, sp, 10*SZREG + 7*SZFREG
#endif
/* Update .got.plt and obtain runtime address of callee */
SLLI a1, t1, 1
or a0, t0, zero
ADD a1, a1, t1
la a2, _dl_fixup
jirl ra, a2, 0
or t1, v0, zero
/* Restore arguments from stack. */
REG_L ra, sp, 9*SZREG
REG_L a0, sp, 1*SZREG
REG_L a1, sp, 2*SZREG
REG_L a2, sp, 3*SZREG
REG_L a3, sp, 4*SZREG
REG_L a4, sp, 5*SZREG
REG_L a5, sp, 6*SZREG
REG_L a6, sp, 7*SZREG
REG_L a7, sp, 8*SZREG
#ifndef __loongarch_soft_float
FREG_L fa0, sp, 10*SZREG + 0*SZFREG
FREG_L fa1, sp, 10*SZREG + 1*SZFREG
FREG_L fa2, sp, 10*SZREG + 2*SZFREG
FREG_L fa3, sp, 10*SZREG + 3*SZFREG
FREG_L fa4, sp, 10*SZREG + 4*SZFREG
FREG_L fa5, sp, 10*SZREG + 5*SZFREG
FREG_L fa6, sp, 10*SZREG + 6*SZFREG
FREG_L fa7, sp, 10*SZREG + 7*SZFREG
#endif
ADDI sp, sp, FRAME_SIZE
/* Invoke the callee. */
jirl zero, t1, 0
END (_dl_runtime_resolve)
ENTRY (_dl_runtime_profile) ENTRY (_dl_runtime_profile)
/* LoongArch we get called with: /* LoongArch we get called with:
t0 linkr_map pointer t0 linkr_map pointer

View File

@ -0,0 +1,129 @@
/* PLT trampolines.
Copyright (C) 2022-2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
/* Assembler veneer called from the PLT header code for lazy loading.
The PLT header passes its own args in t0-t2. */
#ifndef __loongarch_soft_float
# ifdef USE_LASX
# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZXREG) & ALMASK))
# elif defined USE_LSX
# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG - 8 * SZVREG) & ALMASK))
# else
# define FRAME_SIZE (-((-9 * SZREG - 8 * SZFREG) & ALMASK))
# endif
#else
# define FRAME_SIZE (-((-9 * SZREG) & ALMASK))
#endif
ENTRY (_dl_runtime_resolve)
/* Save arguments to stack. */
ADDI sp, sp, -FRAME_SIZE
REG_S ra, sp, 0*SZREG
REG_S a0, sp, 1*SZREG
REG_S a1, sp, 2*SZREG
REG_S a2, sp, 3*SZREG
REG_S a3, sp, 4*SZREG
REG_S a4, sp, 5*SZREG
REG_S a5, sp, 6*SZREG
REG_S a6, sp, 7*SZREG
REG_S a7, sp, 8*SZREG
#ifdef USE_LASX
xvst xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
xvst xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
xvst xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
xvst xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
xvst xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
xvst xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
xvst xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
xvst xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
#elif defined USE_LSX
vst vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
vst vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
vst vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
vst vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
vst vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
vst vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
vst vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
vst vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
#elif !defined __loongarch_soft_float
FREG_S fa0, sp, 9*SZREG + 0*SZFREG
FREG_S fa1, sp, 9*SZREG + 1*SZFREG
FREG_S fa2, sp, 9*SZREG + 2*SZFREG
FREG_S fa3, sp, 9*SZREG + 3*SZFREG
FREG_S fa4, sp, 9*SZREG + 4*SZFREG
FREG_S fa5, sp, 9*SZREG + 5*SZFREG
FREG_S fa6, sp, 9*SZREG + 6*SZFREG
FREG_S fa7, sp, 9*SZREG + 7*SZFREG
#endif
/* Update .got.plt and obtain runtime address of callee */
SLLI a1, t1, 1
or a0, t0, zero
ADD a1, a1, t1
la a2, _dl_fixup
jirl ra, a2, 0
or t1, v0, zero
/* Restore arguments from stack. */
REG_L ra, sp, 0*SZREG
REG_L a0, sp, 1*SZREG
REG_L a1, sp, 2*SZREG
REG_L a2, sp, 3*SZREG
REG_L a3, sp, 4*SZREG
REG_L a4, sp, 5*SZREG
REG_L a5, sp, 6*SZREG
REG_L a6, sp, 7*SZREG
REG_L a7, sp, 8*SZREG
#ifdef USE_LASX
xvld xr0, sp, 9*SZREG + 8*SZFREG + 0*SZXREG
xvld xr1, sp, 9*SZREG + 8*SZFREG + 1*SZXREG
xvld xr2, sp, 9*SZREG + 8*SZFREG + 2*SZXREG
xvld xr3, sp, 9*SZREG + 8*SZFREG + 3*SZXREG
xvld xr4, sp, 9*SZREG + 8*SZFREG + 4*SZXREG
xvld xr5, sp, 9*SZREG + 8*SZFREG + 5*SZXREG
xvld xr6, sp, 9*SZREG + 8*SZFREG + 6*SZXREG
xvld xr7, sp, 9*SZREG + 8*SZFREG + 7*SZXREG
#elif defined USE_LSX
vld vr0, sp, 9*SZREG + 8*SZFREG + 0*SZVREG
vld vr1, sp, 9*SZREG + 8*SZFREG + 1*SZVREG
vld vr2, sp, 9*SZREG + 8*SZFREG + 2*SZVREG
vld vr3, sp, 9*SZREG + 8*SZFREG + 3*SZVREG
vld vr4, sp, 9*SZREG + 8*SZFREG + 4*SZVREG
vld vr5, sp, 9*SZREG + 8*SZFREG + 5*SZVREG
vld vr6, sp, 9*SZREG + 8*SZFREG + 6*SZVREG
vld vr7, sp, 9*SZREG + 8*SZFREG + 7*SZVREG
#elif !defined __loongarch_soft_float
FREG_L fa0, sp, 9*SZREG + 0*SZFREG
FREG_L fa1, sp, 9*SZREG + 1*SZFREG
FREG_L fa2, sp, 9*SZREG + 2*SZFREG
FREG_L fa3, sp, 9*SZREG + 3*SZFREG
FREG_L fa4, sp, 9*SZREG + 4*SZFREG
FREG_L fa5, sp, 9*SZREG + 5*SZFREG
FREG_L fa6, sp, 9*SZREG + 6*SZFREG
FREG_L fa7, sp, 9*SZREG + 7*SZFREG
#endif
ADDI sp, sp, FRAME_SIZE
/* Invoke the callee. */
jirl zero, t1, 0
END (_dl_runtime_resolve)

View File

@ -20,6 +20,7 @@
#define _LOONGARCH_LDSODEFS_H 1 #define _LOONGARCH_LDSODEFS_H 1
#include <elf.h> #include <elf.h>
#include <cpu-features.h>
struct La_loongarch_regs; struct La_loongarch_regs;
struct La_loongarch_retval; struct La_loongarch_retval;

View File

@ -25,6 +25,8 @@
/* Macros to handle different pointer/register sizes for 32/64-bit code. */ /* Macros to handle different pointer/register sizes for 32/64-bit code. */
#define SZREG 8 #define SZREG 8
#define SZFREG 8 #define SZFREG 8
#define SZVREG 16
#define SZXREG 32
#define REG_L ld.d #define REG_L ld.d
#define REG_S st.d #define REG_S st.d
#define SRLI srli.d #define SRLI srli.d

View File

@ -90,4 +90,22 @@
#define fs6 $f30 #define fs6 $f30
#define fs7 $f31 #define fs7 $f31
#define vr0 $vr0
#define vr1 $vr1
#define vr2 $vr2
#define vr3 $vr3
#define vr4 $vr4
#define vr5 $vr5
#define vr6 $vr6
#define vr7 $vr7
#define xr0 $xr0
#define xr1 $xr1
#define xr2 $xr2
#define xr3 $xr3
#define xr4 $xr4
#define xr5 $xr5
#define xr6 $xr6
#define xr7 $xr7
#endif /* _SYS_REGDEF_H */ #endif /* _SYS_REGDEF_H */

View File

@ -0,0 +1,37 @@
/* Defines for bits in AT_HWCAP. LoongArch64 Linux version.
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#if !defined (_SYS_AUXV_H)
# error "Never include <bits/hwcap.h> directly; use <sys/auxv.h> instead."
#endif
/* The following must match the kernel's <asm/hwcap.h>. */
/* HWCAP flags */
#define HWCAP_LOONGARCH_CPUCFG (1 << 0)
#define HWCAP_LOONGARCH_LAM (1 << 1)
#define HWCAP_LOONGARCH_UAL (1 << 2)
#define HWCAP_LOONGARCH_FPU (1 << 3)
#define HWCAP_LOONGARCH_LSX (1 << 4)
#define HWCAP_LOONGARCH_LASX (1 << 5)
#define HWCAP_LOONGARCH_CRC32 (1 << 6)
#define HWCAP_LOONGARCH_COMPLEX (1 << 7)
#define HWCAP_LOONGARCH_CRYPTO (1 << 8)
#define HWCAP_LOONGARCH_LVZ (1 << 9)
#define HWCAP_LOONGARCH_LBT_X86 (1 << 10)
#define HWCAP_LOONGARCH_LBT_ARM (1 << 11)
#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12)

View File

@ -0,0 +1,29 @@
/* Initialize CPU feature data. LoongArch64 version.
This file is part of the GNU C Library.
Copyright (C) 2022 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#ifndef _CPU_FEATURES_LOONGARCH64_H
#define _CPU_FEATURES_LOONGARCH64_H
#include <sys/auxv.h>
#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
#endif /* _CPU_FEATURES_LOONGARCH64_H */