2020-01-01 00:14:33 +00:00
|
|
|
/* Copyright (C) 2000-2020 Free Software Foundation, Inc.
|
2000-12-08 17:18:30 +00:00
|
|
|
Contributed by Richard Henderson (rth@tamu.edu)
|
|
|
|
EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
|
|
|
|
This file is part of the GNU C Library.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
2001-07-06 04:56:23 +00:00
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
2000-12-08 17:18:30 +00:00
|
|
|
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
2001-07-06 04:56:23 +00:00
|
|
|
Lesser General Public License for more details.
|
2000-12-08 17:18:30 +00:00
|
|
|
|
2001-07-06 04:56:23 +00:00
|
|
|
You should have received a copy of the GNU Lesser General Public
|
2012-03-09 23:56:38 +00:00
|
|
|
License along with the GNU C Library. If not, see
|
Prefer https to http for gnu.org and fsf.org URLs
Also, change sources.redhat.com to sourceware.org.
This patch was automatically generated by running the following shell
script, which uses GNU sed, and which avoids modifying files imported
from upstream:
sed -ri '
s,(http|ftp)(://(.*\.)?(gnu|fsf|sourceware)\.org($|[^.]|\.[^a-z])),https\2,g
s,(http|ftp)(://(.*\.)?)sources\.redhat\.com($|[^.]|\.[^a-z]),https\2sourceware.org\4,g
' \
$(find $(git ls-files) -prune -type f \
! -name '*.po' \
! -name 'ChangeLog*' \
! -path COPYING ! -path COPYING.LIB \
! -path manual/fdl-1.3.texi ! -path manual/lgpl-2.1.texi \
! -path manual/texinfo.tex ! -path scripts/config.guess \
! -path scripts/config.sub ! -path scripts/install-sh \
! -path scripts/mkinstalldirs ! -path scripts/move-if-change \
! -path INSTALL ! -path locale/programs/charmap-kw.h \
! -path po/libc.pot ! -path sysdeps/gnu/errlist.c \
! '(' -name configure \
-execdir test -f configure.ac -o -f configure.in ';' ')' \
! '(' -name preconfigure \
-execdir test -f preconfigure.ac ';' ')' \
-print)
and then by running 'make dist-prepare' to regenerate files built
from the altered files, and then executing the following to cleanup:
chmod a+x sysdeps/unix/sysv/linux/riscv/configure
# Omit irrelevant whitespace and comment-only changes,
# perhaps from a slightly-different Autoconf version.
git checkout -f \
sysdeps/csky/configure \
sysdeps/hppa/configure \
sysdeps/riscv/configure \
sysdeps/unix/sysv/linux/csky/configure
# Omit changes that caused a pre-commit check to fail like this:
# remote: *** error: sysdeps/powerpc/powerpc64/ppc-mcount.S: trailing lines
git checkout -f \
sysdeps/powerpc/powerpc64/ppc-mcount.S \
sysdeps/unix/sysv/linux/s390/s390-64/syscall.S
# Omit change that caused a pre-commit check to fail like this:
# remote: *** error: sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: last line does not end in newline
git checkout -f sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
2019-09-07 05:40:42 +00:00
|
|
|
<https://www.gnu.org/licenses/>. */
|
2000-12-08 17:18:30 +00:00
|
|
|
|
|
|
|
#include <sysdep.h>
|
|
|
|
|
|
|
|
.arch ev6
|
|
|
|
.set noat
|
|
|
|
.set noreorder
|
|
|
|
|
|
|
|
ENTRY(memset)
|
|
|
|
#ifdef PROF
|
|
|
|
ldgp gp, 0(pv)
|
|
|
|
lda AT, _mcount
|
|
|
|
jsr AT, (AT), _mcount
|
|
|
|
.prologue 1
|
|
|
|
#else
|
|
|
|
.prologue 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Serious stalling happens. The only way to mitigate this is to
|
|
|
|
* undertake a major re-write to interleave the constant materialization
|
|
|
|
* with other parts of the fall-through code. This is important, even
|
|
|
|
* though it makes maintenance tougher.
|
|
|
|
* Do this later.
|
|
|
|
*/
|
|
|
|
and $17, 255, $1 # E : 00000000000000ch
|
|
|
|
insbl $17, 1, $2 # U : 000000000000ch00
|
|
|
|
mov $16, $0 # E : return value
|
|
|
|
ble $18, $end # U : zero length requested?
|
|
|
|
|
|
|
|
addq $18, $16, $6 # E : max address to write to
|
|
|
|
or $1, $2, $17 # E : 000000000000chch
|
|
|
|
insbl $1, 2, $3 # U : 0000000000ch0000
|
|
|
|
insbl $1, 3, $4 # U : 00000000ch000000
|
|
|
|
|
|
|
|
or $3, $4, $3 # E : 00000000chch0000
|
|
|
|
inswl $17, 4, $5 # U : 0000chch00000000
|
|
|
|
xor $16, $6, $1 # E : will complete write be within one quadword?
|
|
|
|
inswl $17, 6, $2 # U : chch000000000000
|
|
|
|
|
|
|
|
or $17, $3, $17 # E : 00000000chchchch
|
|
|
|
or $2, $5, $2 # E : chchchch00000000
|
|
|
|
bic $1, 7, $1 # E : fit within a single quadword?
|
|
|
|
and $16, 7, $3 # E : Target addr misalignment
|
|
|
|
|
|
|
|
or $17, $2, $17 # E : chchchchchchchch
|
|
|
|
beq $1, $within_quad # U :
|
|
|
|
nop # E :
|
|
|
|
beq $3, $aligned # U : target is 0mod8
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Target address is misaligned, and won't fit within a quadword.
|
|
|
|
*/
|
|
|
|
ldq_u $4, 0($16) # L : Fetch first partial
|
|
|
|
mov $16, $5 # E : Save the address
|
|
|
|
insql $17, $16, $2 # U : Insert new bytes
|
|
|
|
subq $3, 8, $3 # E : Invert (for addressing uses)
|
|
|
|
|
|
|
|
addq $18, $3, $18 # E : $18 is new count ($3 is negative)
|
|
|
|
mskql $4, $16, $4 # U : clear relevant parts of the quad
|
|
|
|
subq $16, $3, $16 # E : $16 is new aligned destination
|
|
|
|
or $2, $4, $1 # E : Final bytes
|
|
|
|
|
|
|
|
nop
|
|
|
|
stq_u $1,0($5) # L : Store result
|
|
|
|
nop
|
|
|
|
nop
|
|
|
|
|
|
|
|
.align 4
|
|
|
|
$aligned:
|
|
|
|
/*
|
|
|
|
* We are now guaranteed to be quad aligned, with at least
|
|
|
|
* one partial quad to write.
|
|
|
|
*/
|
|
|
|
|
|
|
|
sra $18, 3, $3 # U : Number of remaining quads to write
|
|
|
|
and $18, 7, $18 # E : Number of trailing bytes to write
|
|
|
|
mov $16, $5 # E : Save dest address
|
|
|
|
beq $3, $no_quad # U : tail stuff only
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It's worth the effort to unroll this and use wh64 if possible.
|
|
|
|
* At this point, entry values are:
|
|
|
|
* $16 Current destination address
|
|
|
|
* $5 A copy of $16
|
|
|
|
* $6 The max quadword address to write to
|
|
|
|
* $18 Number trailer bytes
|
|
|
|
* $3 Number quads to write
|
|
|
|
*/
|
|
|
|
|
|
|
|
and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop)
|
|
|
|
subq $3, 16, $4 # E : Only try to unroll if > 128 bytes
|
|
|
|
subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64)
|
|
|
|
blt $4, $loop # U :
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We know we've got at least 16 quads, minimum of one trip
|
|
|
|
* through unrolled loop. Do a quad at a time to get us 0mod64
|
|
|
|
* aligned.
|
|
|
|
*/
|
|
|
|
|
|
|
|
nop # E :
|
|
|
|
nop # E :
|
|
|
|
nop # E :
|
|
|
|
beq $1, $bigalign # U :
|
|
|
|
|
|
|
|
$alignmod64:
|
|
|
|
stq $17, 0($5) # L :
|
|
|
|
subq $3, 1, $3 # E : For consistency later
|
|
|
|
addq $1, 8, $1 # E : Increment towards zero for alignment
|
|
|
|
addq $5, 8, $4 # E : Initial wh64 address (filler instruction)
|
|
|
|
|
|
|
|
nop
|
|
|
|
nop
|
|
|
|
addq $5, 8, $5 # E : Inc address
|
|
|
|
blt $1, $alignmod64 # U :
|
|
|
|
|
|
|
|
$bigalign:
|
|
|
|
/*
|
|
|
|
* $3 - number quads left to go
|
|
|
|
* $5 - target address (aligned 0mod64)
|
|
|
|
* $17 - mask of stuff to store
|
|
|
|
* Scratch registers available: $7, $2, $4, $1
|
|
|
|
* We know that we'll be taking a minimum of one trip through.
|
2014-02-12 14:54:57 +00:00
|
|
|
* CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
|
2000-12-08 17:18:30 +00:00
|
|
|
* Assumes the wh64 needs to be for 2 trips through the loop in the future.
|
|
|
|
* The wh64 is issued on for the starting destination address for trip +2
|
|
|
|
* through the loop, and if there are less than two trips left, the target
|
|
|
|
* address will be for the current trip.
|
|
|
|
*/
|
|
|
|
|
|
|
|
$do_wh64:
|
|
|
|
wh64 ($4) # L1 : memory subsystem write hint
|
|
|
|
subq $3, 24, $2 # E : For determining future wh64 addresses
|
|
|
|
stq $17, 0($5) # L :
|
|
|
|
nop # E :
|
|
|
|
|
|
|
|
addq $5, 128, $4 # E : speculative target of next wh64
|
|
|
|
stq $17, 8($5) # L :
|
|
|
|
stq $17, 16($5) # L :
|
|
|
|
addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr)
|
|
|
|
|
|
|
|
stq $17, 24($5) # L :
|
|
|
|
stq $17, 32($5) # L :
|
|
|
|
cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle
|
|
|
|
nop
|
|
|
|
|
|
|
|
stq $17, 40($5) # L :
|
|
|
|
stq $17, 48($5) # L :
|
|
|
|
subq $3, 16, $2 # E : Repeat the loop at least once more?
|
|
|
|
nop
|
|
|
|
|
|
|
|
stq $17, 56($5) # L :
|
|
|
|
addq $5, 64, $5 # E :
|
|
|
|
subq $3, 8, $3 # E :
|
|
|
|
bge $2, $do_wh64 # U :
|
|
|
|
|
|
|
|
nop
|
|
|
|
nop
|
|
|
|
nop
|
|
|
|
beq $3, $no_quad # U : Might have finished already
|
|
|
|
|
|
|
|
.align 4
|
|
|
|
/*
|
|
|
|
* Simple loop for trailing quadwords, or for small amounts
|
|
|
|
* of data (where we can't use an unrolled loop and wh64)
|
|
|
|
*/
|
|
|
|
$loop:
|
|
|
|
stq $17, 0($5) # L :
|
|
|
|
subq $3, 1, $3 # E : Decrement number quads left
|
|
|
|
addq $5, 8, $5 # E : Inc address
|
|
|
|
bne $3, $loop # U : more?
|
|
|
|
|
|
|
|
$no_quad:
|
|
|
|
/*
|
|
|
|
* Write 0..7 trailing bytes.
|
|
|
|
*/
|
|
|
|
nop # E :
|
|
|
|
beq $18, $end # U : All done?
|
|
|
|
ldq $7, 0($5) # L :
|
|
|
|
mskqh $7, $6, $2 # U : Mask final quad
|
|
|
|
|
|
|
|
insqh $17, $6, $4 # U : New bits
|
|
|
|
or $2, $4, $1 # E : Put it all together
|
|
|
|
stq $1, 0($5) # L : And back to memory
|
|
|
|
ret $31,($26),1 # L0 :
|
|
|
|
|
|
|
|
$within_quad:
|
|
|
|
ldq_u $1, 0($16) # L :
|
|
|
|
insql $17, $16, $2 # U : New bits
|
|
|
|
mskql $1, $16, $4 # U : Clear old
|
|
|
|
or $2, $4, $2 # E : New result
|
|
|
|
|
|
|
|
mskql $2, $6, $4 # U :
|
|
|
|
mskqh $1, $6, $2 # U :
|
|
|
|
or $2, $4, $1 # E :
|
|
|
|
stq_u $1, 0($16) # L :
|
|
|
|
|
|
|
|
$end:
|
|
|
|
nop
|
|
|
|
nop
|
|
|
|
nop
|
|
|
|
ret $31,($26),1 # L0 :
|
|
|
|
|
|
|
|
END(memset)
|
2003-04-29 22:47:20 +00:00
|
|
|
libc_hidden_builtin_def (memset)
|