mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-13 14:50:17 +00:00
* sysdeps/powerpc/elf/libc-start.c
(__cache_line_size): Declare. (__aux_init_cache): New. (__libc_start_main): Change type of `auxvec' parameter to `ElfW(auxv_t) *'. Correct walking of aux vector. Call __aux_init_cache. * sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c (__cache_line_size): Declare. (__aux_init_cache): New. (DL_PLATFORM_INIT): Define. * sysdeps/powerpc/memset.S: Define __cache_line_size and use its value to select the correct stride for dcbz.
This commit is contained in:
parent
4343130e1d
commit
adb89120bb
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 1998, 2000, 2001 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -26,6 +26,10 @@ extern void __libc_init_first (int argc, char **argv, char **envp);
|
||||
|
||||
extern int _dl_starting_up;
|
||||
weak_extern (_dl_starting_up)
|
||||
|
||||
extern int __cache_line_size;
|
||||
weak_extern (__cache_line_size)
|
||||
|
||||
extern int __libc_multiple_libcs;
|
||||
extern void *__libc_stack_end;
|
||||
|
||||
@ -37,12 +41,33 @@ struct startup_info
|
||||
void (*fini) (void);
|
||||
};
|
||||
|
||||
/* Scan the Aux Vector for the "Data Cache Block Size" entry. If found
|
||||
verify that the static extern __cache_line_size is defined by checking
|
||||
for not NULL. If it is defined then assign the cache block size
|
||||
value to __cache_line_size. */
|
||||
static inline void
|
||||
__aux_init_cache (ElfW(auxv_t) *av)
|
||||
{
|
||||
for (; av->a_type != AT_NULL; ++av)
|
||||
switch (av->a_type)
|
||||
{
|
||||
case AT_DCACHEBSIZE:
|
||||
{
|
||||
int *cls = & __cache_line_size;
|
||||
if (cls != NULL)
|
||||
*cls = av->a_un.a_val;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
/* GKM FIXME: GCC: this should get __BP_ prefix by virtue of the
|
||||
BPs in the arglist of startup_info.main and startup_info.init. */
|
||||
BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
|
||||
char *__unbounded *__unbounded ubp_ev,
|
||||
void *__unbounded auxvec, void (*rtld_fini) (void),
|
||||
ElfW(auxv_t) *__unbounded auxvec, void (*rtld_fini) (void),
|
||||
struct startup_info *__unbounded stinfo,
|
||||
char *__unbounded *__unbounded stack_on_entry)
|
||||
{
|
||||
@ -66,16 +91,19 @@ BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
|
||||
as a statically-linked program by Linux... */
|
||||
if (*stack_on_entry != NULL)
|
||||
{
|
||||
char *__unbounded *__unbounded temp;
|
||||
/* ...in which case, we have argc as the top thing on the
|
||||
stack, followed by argv (NULL-terminated), envp (likewise),
|
||||
and the auxilary vector. */
|
||||
argc = *(int *__unbounded) stack_on_entry;
|
||||
ubp_av = stack_on_entry + 1;
|
||||
ubp_ev = ubp_av + argc + 1;
|
||||
auxvec = ubp_ev;
|
||||
while (*(char *__unbounded *__unbounded) auxvec != NULL)
|
||||
++auxvec;
|
||||
++auxvec;
|
||||
temp = ubp_ev;
|
||||
while (*temp != NULL)
|
||||
++temp;
|
||||
auxvec = (ElfW(auxv_t) *)++temp;
|
||||
|
||||
|
||||
#ifndef SHARED
|
||||
_dl_aux_init ((ElfW(auxv_t) *) auxvec);
|
||||
#endif
|
||||
@ -84,6 +112,9 @@ BP_SYM (__libc_start_main) (int argc, char *__unbounded *__unbounded ubp_av,
|
||||
|
||||
INIT_ARGV_and_ENVIRON;
|
||||
|
||||
/* Initialize the __cache_line_size variable from the aux vector. */
|
||||
__aux_init_cache((ElfW(auxv_t) *) auxvec);
|
||||
|
||||
/* Store something that has some relationship to the end of the
|
||||
stack, for backtraces. This variable should be thread-specific. */
|
||||
__libc_stack_end = stack_on_entry + 4;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Optimized memset implementation for PowerPC.
|
||||
Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
|
||||
Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -21,12 +21,26 @@
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* Define a global static that can hold the cache line size. The
|
||||
assumption is that startup code will access the "aux vector" to
|
||||
to obtain the value set by the kernel and store it into this
|
||||
variable. */
|
||||
|
||||
.globl __cache_line_size
|
||||
.section ".data","aw"
|
||||
.align 2
|
||||
.type __cache_line_size,@object
|
||||
.size __cache_line_size,4
|
||||
__cache_line_size:
|
||||
.long 0
|
||||
.section ".text"
|
||||
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
|
||||
Returns 's'.
|
||||
|
||||
The memset is done in three sizes: byte (8 bits), word (32 bits),
|
||||
cache line (256 bits). There is a special case for setting cache lines
|
||||
to 0, to take advantage of the dcbz instruction. */
|
||||
The memset is done in four sizes: byte (8 bits), word (32 bits),
|
||||
32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
|
||||
There is a special case for setting whole cache lines to 0, which
|
||||
takes advantage of the dcbz instruction. */
|
||||
|
||||
EALIGN (BP_SYM (memset), 5, 1)
|
||||
|
||||
@ -50,6 +64,10 @@ EALIGN (BP_SYM (memset), 5, 1)
|
||||
#define rNEG64 r8 /* constant -64 for clearing with dcbz */
|
||||
#define rNEG32 r9 /* constant -32 for clearing with dcbz */
|
||||
|
||||
#define rGOT r9 /* Address of the Global Offset Table. */
|
||||
#define rCLS r8 /* Cache line size obtained from static. */
|
||||
#define rCLM r9 /* Cache line size mask to check for cache alignment. */
|
||||
|
||||
#if __BOUNDED_POINTERS__
|
||||
cmplwi cr1, rRTN, 0
|
||||
CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN)
|
||||
@ -105,7 +123,17 @@ L(caligned):
|
||||
cmplwi cr1, rCHR, 0
|
||||
clrrwi. rALIGN, rLEN, 5
|
||||
mtcrf 0x01, rLEN /* 40th instruction from .align */
|
||||
beq cr1, L(zloopstart) /* special case for clearing memory using dcbz */
|
||||
|
||||
/* Check if we can use the special case for clearing memory using dcbz.
|
||||
This requires that we know the correct cache line size for this
|
||||
processor. Getting the __cache_line_size may require establishing GOT
|
||||
addressability, so branch out of line to set this up. */
|
||||
beq cr1, L(checklinesize)
|
||||
|
||||
/* Store blocks of 32-bytes (256-bits) starting on a 32-byte boundary.
|
||||
Can't assume that rCHR is zero or that the cache line size is either
|
||||
32-bytes or even known. */
|
||||
L(nondcbz):
|
||||
srwi rTMP, rALIGN, 5
|
||||
mtctr rTMP
|
||||
beq L(medium) /* we may not actually get to do a full line */
|
||||
@ -114,7 +142,9 @@ L(caligned):
|
||||
li rNEG64, -0x40
|
||||
bdz L(cloopdone) /* 48th instruction from .align */
|
||||
|
||||
L(c3): dcbz rNEG64, rMEMP
|
||||
/* We can't use dcbz here as we don't know the cache line size. We can
|
||||
use "data cache block touch for store", which is safe. */
|
||||
L(c3): dcbtst rNEG64, rMEMP
|
||||
stw rCHR, -4(rMEMP)
|
||||
stw rCHR, -8(rMEMP)
|
||||
stw rCHR, -12(rMEMP)
|
||||
@ -142,7 +172,10 @@ L(cloopdone):
|
||||
|
||||
.align 5
|
||||
nop
|
||||
/* Clear lines of memory in 128-byte chunks. */
|
||||
/* Clear cache lines of memory in 128-byte chunks.
|
||||
This code is optimized for processors with 32-byte cache lines.
|
||||
It is further optimized for the 601 processor, which requires
|
||||
some care in how the code is aligned in the i-cache. */
|
||||
L(zloopstart):
|
||||
clrlwi rLEN, rLEN, 27
|
||||
mtcrf 0x02, rALIGN
|
||||
@ -226,4 +259,80 @@ L(medium_28t):
|
||||
stw rCHR, -4(rMEMP)
|
||||
stw rCHR, -8(rMEMP)
|
||||
blr
|
||||
|
||||
L(checklinesize):
|
||||
#ifdef SHARED
|
||||
mflr rTMP
|
||||
/* If the remaining length is less the 32 bytes then don't bother getting
|
||||
the cache line size. */
|
||||
beq L(medium)
|
||||
/* Establishes GOT addressability so we can load __cache_line_size
|
||||
from static. This value was set from the aux vector during startup. */
|
||||
bl _GLOBAL_OFFSET_TABLE_@local-4
|
||||
mflr rGOT
|
||||
lwz rGOT,__cache_line_size@got(rGOT)
|
||||
lwz rCLS,0(rGOT)
|
||||
mtlr rTMP
|
||||
#else
|
||||
/* Load __cache_line_size from static. This value was set from the
|
||||
aux vector during startup. */
|
||||
lis rCLS,__cache_line_size@ha
|
||||
/* If the remaining length is less the 32 bytes then don't bother getting
|
||||
the cache line size. */
|
||||
beq L(medium)
|
||||
lwz rCLS,__cache_line_size@l(rCLS)
|
||||
#endif
|
||||
|
||||
/*If the cache line size was not set then goto to L(nondcbz), which is
|
||||
safe for any cache line size. */
|
||||
cmplwi cr1,rCLS,0
|
||||
beq cr1,L(nondcbz)
|
||||
|
||||
/* If the cache line size is 32 bytes then goto to L(zloopstart),
|
||||
which is coded specificly for 32-byte lines (and 601). */
|
||||
cmplwi cr1,rCLS,32
|
||||
beq cr1,L(zloopstart)
|
||||
|
||||
/* Now we know the cache line size and it is not 32-bytes. However
|
||||
we may not yet be aligned to the cache line and may have a partial
|
||||
line to fill. Touch it 1st to fetch the cache line. */
|
||||
dcbtst 0,rMEMP
|
||||
|
||||
addi rCLM,rCLS,-1
|
||||
L(getCacheAligned):
|
||||
cmplwi cr1,rLEN,32
|
||||
and. rTMP,rCLM,rMEMP
|
||||
blt cr1,L(handletail32)
|
||||
beq L(cacheAligned)
|
||||
/* We are not aligned to start of a cache line yet. Store 32-byte
|
||||
of data and test again. */
|
||||
addi rMEMP,rMEMP,32
|
||||
addi rLEN,rLEN,-32
|
||||
stw rCHR,-32(rMEMP)
|
||||
stw rCHR,-28(rMEMP)
|
||||
stw rCHR,-24(rMEMP)
|
||||
stw rCHR,-20(rMEMP)
|
||||
stw rCHR,-16(rMEMP)
|
||||
stw rCHR,-12(rMEMP)
|
||||
stw rCHR,-8(rMEMP)
|
||||
stw rCHR,-4(rMEMP)
|
||||
b L(getCacheAligned)
|
||||
|
||||
/* Now we are aligned to the cache line and can use dcbz. */
|
||||
L(cacheAligned):
|
||||
cmplw cr1,rLEN,rCLS
|
||||
blt cr1,L(handletail32)
|
||||
dcbz 0,rMEMP
|
||||
subf rLEN,rCLS,rLEN
|
||||
add rMEMP,rMEMP,rCLS
|
||||
b L(cacheAligned)
|
||||
|
||||
/* We are here because; the cache line size was set, it was not
|
||||
32-bytes, and the remainder (rLEN) is now less than the actual cache
|
||||
line size. Set up the preconditions for L(nondcbz) and go there to
|
||||
store the remaining bytes. */
|
||||
L(handletail32):
|
||||
clrrwi. rALIGN, rLEN, 5
|
||||
b L(nondcbz)
|
||||
|
||||
END (BP_SYM (memset))
|
||||
|
@ -20,6 +20,32 @@
|
||||
|
||||
#include "config.h"
|
||||
#include "kernel-features.h"
|
||||
#include <ldsodefs.h>
|
||||
|
||||
extern int __cache_line_size;
|
||||
weak_extern (__cache_line_size)
|
||||
|
||||
#define DL_PLATFORM_INIT __aux_init_cache(_dl_auxv)
|
||||
|
||||
/* Scan the Aux Vector for the "Data Cache Block Size" entry. If found
|
||||
verify that the static extern __cache_line_size is defined by checking
|
||||
for not NULL. If it is defined then assign the cache block size
|
||||
value to __cache_line_size. */
|
||||
static inline void
|
||||
__aux_init_cache (ElfW(auxv_t) *av)
|
||||
{
|
||||
for (; av->a_type != AT_NULL; ++av)
|
||||
switch (av->a_type)
|
||||
{
|
||||
case AT_DCACHEBSIZE:
|
||||
{
|
||||
int *cls = & __cache_line_size;
|
||||
if (cls != NULL)
|
||||
*cls = av->a_un.a_val;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __ASSUME_STD_AUXV
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user