Remove sfi_* annotations from ARM assembly files.

This semi-mechanical patch removes all uses and definitions of the
sfi_breg, sfi_pld, and sfi_sp macros from various ARM-specific
assembly files.  These were only used by NaCl.

	* sysdeps/arm/sysdep.h
        (ARM_SFI_MACROS, sfi_breg, sfi_pld, sfi_sp): Delete definitions.

	* sysdeps/arm/__longjmp.S, sysdeps/arm/add_n.S
	* sysdeps/arm/addmul_1.S, sysdeps/arm/arm-mcount.S
	* sysdeps/arm/armv6/rawmemchr.S, sysdeps/arm/armv6/strchr.S
	* sysdeps/arm/armv6/strcpy.S, sysdeps/arm/armv6/strlen.S
	* sysdeps/arm/armv6/strrchr.S, sysdeps/arm/armv6t2/memchr.S
	* sysdeps/arm/armv6t2/strlen.S
	* sysdeps/arm/armv7/multiarch/memcpy_impl.S
	* sysdeps/arm/armv7/strcmp.S, sysdeps/arm/dl-tlsdesc.S
	* sysdeps/arm/memcpy.S, sysdeps/arm/memmove.S
	* sysdeps/arm/memset.S, sysdeps/arm/setjmp.S
	* sysdeps/arm/strlen.S, sysdeps/arm/submul_1.S:
        Remove all uses of sfi_breg, sfi_pld, and sfi_sp.
This commit is contained in:
Zack Weinberg 2017-05-11 20:36:15 -04:00
parent 42a844c6a2
commit 81cb7a0b2b
22 changed files with 459 additions and 881 deletions

View File

@ -1,3 +1,21 @@
2017-05-20 Zack Weinberg <zackw@panix.com>
* sysdeps/arm/sysdep.h
(ARM_SFI_MACROS, sfi_breg, sfi_pld, sfi_sp): Delete definitions.
* sysdeps/arm/__longjmp.S, sysdeps/arm/add_n.S
* sysdeps/arm/addmul_1.S, sysdeps/arm/arm-mcount.S
* sysdeps/arm/armv6/rawmemchr.S, sysdeps/arm/armv6/strchr.S
* sysdeps/arm/armv6/strcpy.S, sysdeps/arm/armv6/strlen.S
* sysdeps/arm/armv6/strrchr.S, sysdeps/arm/armv6t2/memchr.S
* sysdeps/arm/armv6t2/strlen.S
* sysdeps/arm/armv7/multiarch/memcpy_impl.S
* sysdeps/arm/armv7/strcmp.S, sysdeps/arm/dl-tlsdesc.S
* sysdeps/arm/memcpy.S, sysdeps/arm/memmove.S
* sysdeps/arm/memset.S, sysdeps/arm/setjmp.S
* sysdeps/arm/strlen.S, sysdeps/arm/submul_1.S:
Remove all uses of sfi_breg, sfi_pld, and sfi_sp.
2017-05-20 Zack Weinberg <zackw@panix.com> 2017-05-20 Zack Weinberg <zackw@panix.com>
Remove the NaCl port. Remove the NaCl port.

View File

@ -28,8 +28,7 @@ ENTRY (__longjmp)
mov ip, r0 mov ip, r0
#ifdef CHECK_SP #ifdef CHECK_SP
sfi_breg ip, \ ldr r4, [ip] /* jmpbuf's sp */
ldr r4, [\B] /* jmpbuf's sp */
cfi_undefined (r4) cfi_undefined (r4)
#ifdef PTR_DEMANGLE #ifdef PTR_DEMANGLE
PTR_DEMANGLE (r4, r4, a3, a4) PTR_DEMANGLE (r4, r4, a3, a4)
@ -38,28 +37,22 @@ ENTRY (__longjmp)
#endif #endif
#ifdef PTR_DEMANGLE #ifdef PTR_DEMANGLE
sfi_breg ip, \ ldr a4, [ip], #4
ldr a4, [\B], #4
PTR_DEMANGLE (a4, a4, a3, r4) PTR_DEMANGLE (a4, a4, a3, r4)
cfi_undefined (r4) cfi_undefined (r4)
sfi_breg ip, \ ldr r4, [ip], #4
ldr r4, [\B], #4
PTR_DEMANGLE2 (r4, r4, a3) PTR_DEMANGLE2 (r4, r4, a3)
#else #else
sfi_breg ip, \ ldr a4, [ip], #4
ldr a4, [\B], #4 ldr r4, [ip], #4
sfi_breg ip, \
ldr r4, [\B], #4
cfi_undefined (r4) cfi_undefined (r4)
#endif #endif
/* longjmp probe expects longjmp first argument (4@r0), second /* longjmp probe expects longjmp first argument (4@r0), second
argument (-4@r1), and target address (4@r4), respectively. */ argument (-4@r1), and target address (4@r4), respectively. */
LIBC_PROBE (longjmp, 3, 4@r0, -4@r1, 4@r4) LIBC_PROBE (longjmp, 3, 4@r0, -4@r1, 4@r4)
sfi_sp \
mov sp, a4 mov sp, a4
mov lr, r4 mov lr, r4
sfi_breg ip, \ ldmia ip!, JMP_BUF_REGLIST
ldmia \B!, JMP_BUF_REGLIST
cfi_restore (v1) cfi_restore (v1)
cfi_restore (v2) cfi_restore (v2)
cfi_restore (v3) cfi_restore (v3)
@ -97,8 +90,7 @@ ENTRY (__longjmp)
/* Restore the VFP registers. */ /* Restore the VFP registers. */
/* Following instruction is vldmia ip!, {d8-d15}. */ /* Following instruction is vldmia ip!, {d8-d15}. */
sfi_breg r12, \ ldc p11, cr8, [r12], #64
ldc p11, cr8, [\B], #64
.Lno_vfp: .Lno_vfp:
#ifndef ARM_ASSUME_NO_IWMMXT #ifndef ARM_ASSUME_NO_IWMMXT
@ -107,18 +99,12 @@ ENTRY (__longjmp)
/* Restore the call-preserved iWMMXt registers. */ /* Restore the call-preserved iWMMXt registers. */
/* Following instructions are wldrd wr10, [ip], #8 (etc.) */ /* Following instructions are wldrd wr10, [ip], #8 (etc.) */
sfi_breg r12, \ ldcl p1, cr10, [r12], #8
ldcl p1, cr10, [\B], #8 ldcl p1, cr11, [r12], #8
sfi_breg r12, \ ldcl p1, cr12, [r12], #8
ldcl p1, cr11, [\B], #8 ldcl p1, cr13, [r12], #8
sfi_breg r12, \ ldcl p1, cr14, [r12], #8
ldcl p1, cr12, [\B], #8 ldcl p1, cr15, [r12], #8
sfi_breg r12, \
ldcl p1, cr13, [\B], #8
sfi_breg r12, \
ldcl p1, cr14, [\B], #8
sfi_breg r12, \
ldcl p1, cr15, [\B], #8
.Lno_iwmmxt: .Lno_iwmmxt:
#endif #endif

View File

@ -52,40 +52,31 @@ ENTRY (FUNC)
add lr, r1, r3, lsl #2 /* compute end src1 */ add lr, r1, r3, lsl #2 /* compute end src1 */
beq 1f beq 1f
sfi_breg r1, \ ldr r4, [r1], #4 /* do one to make count even */
ldr r4, [\B], #4 /* do one to make count even */ ldr r5, [r2], #4
sfi_breg r2, \
ldr r5, [\B], #4
OPC r4, r4, r5 OPC r4, r4, r5
teq r1, lr /* end of count? (preserve carry) */ teq r1, lr /* end of count? (preserve carry) */
sfi_breg r0, \ str r4, [r0], #4
str r4, [\B], #4
beq 9f beq 9f
1: 1:
tst r3, #2 /* count & 2 == 2? */ tst r3, #2 /* count & 2 == 2? */
beq 2f beq 2f
sfi_breg r1, \ ldm r1!, { r4, r5 } /* do two to make count 0 mod 4 */
ldm \B!, { r4, r5 } /* do two to make count 0 mod 4 */ ldm r2!, { r6, r7 }
sfi_breg r2, \
ldm \B!, { r6, r7 }
OPC r4, r4, r6 OPC r4, r4, r6
OPC r5, r5, r7 OPC r5, r5, r7
teq r1, lr /* end of count? */ teq r1, lr /* end of count? */
sfi_breg r0, \ stm r0!, { r4, r5 }
stm \B!, { r4, r5 }
beq 9f beq 9f
2: 2:
sfi_breg r1, \ ldm r1!, { r3, r5, r7, r10 } /* do four each loop */
ldm \B!, { r3, r5, r7, r10 } /* do four each loop */ ldm r2!, { r4, r6, r8, ip }
sfi_breg r2, \
ldm \B!, { r4, r6, r8, ip }
OPC r3, r3, r4 OPC r3, r3, r4
OPC r5, r5, r6 OPC r5, r5, r6
OPC r7, r7, r8 OPC r7, r7, r8
OPC r10, r10, ip OPC r10, r10, ip
teq r1, lr teq r1, lr
sfi_breg r0, \ stm r0!, { r3, r5, r7, r10 }
stm \B!, { r3, r5, r7, r10 }
bne 2b bne 2b
9: 9:

View File

@ -37,21 +37,16 @@ ENTRY (__mpn_addmul_1)
cfi_rel_offset (r6, 8) cfi_rel_offset (r6, 8)
cfi_rel_offset (r7, 12) cfi_rel_offset (r7, 12)
sfi_breg r1, \ ldr r6, [r1], #4
ldr r6, [\B], #4 ldr r5, [r0]
sfi_breg r0, \
ldr r5, [\B]
mov r4, #0 /* init carry in */ mov r4, #0 /* init carry in */
b 1f b 1f
0: 0:
sfi_breg r1, \ ldr r6, [r1], #4 /* load next ul */
ldr r6, [\B], #4 /* load next ul */
adds r7, r4, r5 /* (out, c) = cl + lpl */ adds r7, r4, r5 /* (out, c) = cl + lpl */
sfi_breg r0, \ ldr r5, [r0, #4] /* load next rl */
ldr r5, [\B, #4] /* load next rl */
adc r4, ip, #0 /* cl = hpl + c */ adc r4, ip, #0 /* cl = hpl + c */
sfi_breg r0, \ str r7, [r0], #4
str r7, [\B], #4
1: 1:
mov ip, #0 /* zero-extend rl */ mov ip, #0 /* zero-extend rl */
umlal r5, ip, r6, r3 /* (hpl, lpl) = ul * vl + rl */ umlal r5, ip, r6, r3 /* (hpl, lpl) = ul * vl + rl */
@ -59,8 +54,7 @@ ENTRY (__mpn_addmul_1)
bne 0b bne 0b
adds r4, r4, r5 /* (out, c) = cl + llpl */ adds r4, r4, r5 /* (out, c) = cl + llpl */
sfi_breg r0, \ str r4, [r0]
str r4, [\B]
adc r0, ip, #0 /* return hpl + c */ adc r0, ip, #0 /* return hpl + c */
pop { r4, r5, r6, r7 } pop { r4, r5, r6, r7 }

View File

@ -90,8 +90,7 @@ ENTRY(__mcount_arm_compat)
cfi_rel_offset (lr, 20) cfi_rel_offset (lr, 20)
movs r0, fp movs r0, fp
ittt ne ittt ne
sfi_breg r0, \ ldrne r0, [r0, #-4]
ldrne r0, [\B, #-4]
movsne r1, lr movsne r1, lr
blne __mcount_internal blne __mcount_internal
# if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__) # if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)

View File

@ -25,8 +25,7 @@ ENTRY (__rawmemchr)
@ r0 = start of string @ r0 = start of string
@ r1 = character to match @ r1 = character to match
@ returns a pointer to the match, which must be present. @ returns a pointer to the match, which must be present.
sfi_breg r0, \ ldrb r2, [r0] @ load first byte asap
ldrb r2, [\B] @ load first byte asap
@ To cater to long strings, we want to search through a few @ To cater to long strings, we want to search through a few
@ characters until we reach an aligned pointer. To cater to @ characters until we reach an aligned pointer. To cater to
@ -42,8 +41,7 @@ ENTRY (__rawmemchr)
bxeq lr bxeq lr
@ Loop until we find ... @ Loop until we find ...
1: sfi_breg r0, \ 1: ldrb r2, [r0, #1]!
ldrb r2, [\B, #1]!
subs r3, r3, #1 @ ... the alignment point subs r3, r3, #1 @ ... the alignment point
it ne it ne
cmpne r2, r1 @ ... or C cmpne r2, r1 @ ... or C
@ -56,16 +54,15 @@ ENTRY (__rawmemchr)
add r0, r0, #1 add r0, r0, #1
@ So now we're aligned. @ So now we're aligned.
sfi_breg r0, \ ldrd r2, r3, [r0], #8
ldrd r2, r3, [\B], #8
orr r1, r1, r1, lsl #8 @ Replicate C to all bytes orr r1, r1, r1, lsl #8 @ Replicate C to all bytes
#ifdef ARCH_HAS_T2 #ifdef ARCH_HAS_T2
movw ip, #0x0101 movw ip, #0x0101
sfi_pld r0, #64 pld [r0, #64]
movt ip, #0x0101 movt ip, #0x0101
#else #else
ldr ip, =0x01010101 ldr ip, =0x01010101
sfi_pld r0, #64 pld [r0, #64]
#endif #endif
orr r1, r1, r1, lsl #16 orr r1, r1, r1, lsl #16
@ -77,11 +74,10 @@ ENTRY (__rawmemchr)
eor r3, r3, r1 eor r3, r3, r1
uqsub8 r2, ip, r2 @ Find C uqsub8 r2, ip, r2 @ Find C
uqsub8 r3, ip, r3 uqsub8 r3, ip, r3
sfi_pld r0, #128 pld [r0, #128]
orrs r3, r3, r2 @ Test both words for found orrs r3, r3, r2 @ Test both words for found
it eq it eq
sfi_breg r0, \ ldrdeq r2, r3, [r0], #8
ldrdeq r2, r3, [\B], #8
beq 2b beq 2b
@ Found something. Disambiguate between first and second words. @ Found something. Disambiguate between first and second words.

View File

@ -25,8 +25,7 @@ ENTRY (strchr)
@ r0 = start of string @ r0 = start of string
@ r1 = character to match @ r1 = character to match
@ returns NULL for no match, or a pointer to the match @ returns NULL for no match, or a pointer to the match
sfi_breg r0, \ ldrb r2, [r0] @ load the first byte asap
ldrb r2, [\B] @ load the first byte asap
uxtb r1, r1 uxtb r1, r1
@ To cater to long strings, we want to search through a few @ To cater to long strings, we want to search through a few
@ -43,8 +42,7 @@ ENTRY (strchr)
beq 99f beq 99f
@ Loop until we find ... @ Loop until we find ...
1: sfi_breg r0, \ 1: ldrb r2, [r0, #1]!
ldrb r2, [\B, #1]!
subs r3, r3, #1 @ ... the aligment point subs r3, r3, #1 @ ... the aligment point
it ne it ne
cmpne r2, r1 @ ... or the character cmpne r2, r1 @ ... or the character
@ -67,16 +65,15 @@ ENTRY (strchr)
cfi_rel_offset (r6, 8) cfi_rel_offset (r6, 8)
cfi_rel_offset (r7, 12) cfi_rel_offset (r7, 12)
sfi_breg r0, \ ldrd r2, r3, [r0], #8
ldrd r2, r3, [\B], #8
orr r1, r1, r1, lsl #8 @ Replicate C to all bytes orr r1, r1, r1, lsl #8 @ Replicate C to all bytes
#ifdef ARCH_HAS_T2 #ifdef ARCH_HAS_T2
movw ip, #0x0101 movw ip, #0x0101
sfi_pld r0, #64 pld [r0, #64]
movt ip, #0x0101 movt ip, #0x0101
#else #else
ldr ip, =0x01010101 ldr ip, =0x01010101
sfi_pld r0, #64 pld [r0, #64]
#endif #endif
orr r1, r1, r1, lsl #16 orr r1, r1, r1, lsl #16
@ -90,14 +87,13 @@ ENTRY (strchr)
uqsub8 r5, ip, r3 uqsub8 r5, ip, r3
eor r7, r3, r1 eor r7, r3, r1
uqsub8 r6, ip, r6 @ Find C uqsub8 r6, ip, r6 @ Find C
sfi_pld r0, #128 @ Prefetch 2 lines ahead pld [r0, #128] @ Prefetch 2 lines ahead
uqsub8 r7, ip, r7 uqsub8 r7, ip, r7
orr r4, r4, r6 @ Combine found for EOS and C orr r4, r4, r6 @ Combine found for EOS and C
orr r5, r5, r7 orr r5, r5, r7
orrs r6, r4, r5 @ Combine the two words orrs r6, r4, r5 @ Combine the two words
it eq it eq
sfi_breg r0, \ ldrdeq r2, r3, [r0], #8
ldrdeq r2, r3, [\B], #8
beq 2b beq 2b
@ Found something. Disambiguate between first and second words. @ Found something. Disambiguate between first and second words.

View File

@ -44,8 +44,8 @@ ENTRY (strcpy)
@ Signal strcpy with DEST in IP. @ Signal strcpy with DEST in IP.
mov ip, r0 mov ip, r0
0: 0:
sfi_pld r0 pld [r0, #0]
sfi_pld r1 pld [r1, #0]
@ To cater to long strings, we want 8 byte alignment in the source. @ To cater to long strings, we want 8 byte alignment in the source.
@ To cater to small strings, we don't want to start that right away. @ To cater to small strings, we don't want to start that right away.
@ -54,11 +54,9 @@ ENTRY (strcpy)
rsb r3, r3, #16 rsb r3, r3, #16
@ Loop until we find ... @ Loop until we find ...
1: sfi_breg r1, \ 1: ldrb r2, [r1], #1
ldrb r2, [\B], #1
subs r3, r3, #1 @ ... the alignment point subs r3, r3, #1 @ ... the alignment point
sfi_breg r0, \ strb r2, [r0], #1
strb r2, [\B], #1
it ne it ne
cmpne r2, #0 @ ... or EOS cmpne r2, #0 @ ... or EOS
bne 1b bne 1b
@ -68,10 +66,9 @@ ENTRY (strcpy)
beq .Lreturn beq .Lreturn
@ Load the next two words asap @ Load the next two words asap
sfi_breg r1, \ ldrd r2, r3, [r1], #8
ldrd r2, r3, [\B], #8 pld [r0, #64]
sfi_pld r0, #64 pld [r1, #64]
sfi_pld r1, #64
@ For longer strings, we actaully need a stack frame. @ For longer strings, we actaully need a stack frame.
push { r4, r5, r6, r7 } push { r4, r5, r6, r7 }
@ -99,18 +96,15 @@ ENTRY (strcpy)
.balign 16 .balign 16
2: uqsub8 r4, r7, r2 @ Find EOS 2: uqsub8 r4, r7, r2 @ Find EOS
uqsub8 r5, r7, r3 uqsub8 r5, r7, r3
sfi_pld r1, #128 pld [r1, #128]
cmp r4, #0 @ EOS in first word? cmp r4, #0 @ EOS in first word?
sfi_pld r0, #128 pld [r0, #128]
bne 3f bne 3f
sfi_breg r0, \ str r2, [r0], #4
str r2, [\B], #4
cmp r5, #0 @ EOS in second word? cmp r5, #0 @ EOS in second word?
bne 4f bne 4f
sfi_breg r0, \ str r3, [r0], #4
str r3, [\B], #4 ldrd r2, r3, [r1], #8
sfi_breg r1, \
ldrd r2, r3, [\B], #8
b 2b b 2b
3: sub r1, r1, #4 @ backup to first word 3: sub r1, r1, #4 @ backup to first word
@ -120,11 +114,9 @@ ENTRY (strcpy)
@ Note that we generally back up and re-read source bytes, @ Note that we generally back up and re-read source bytes,
@ but we'll not re-write dest bytes. @ but we'll not re-write dest bytes.
.Lbyte_loop: .Lbyte_loop:
sfi_breg r1, \ ldrb r2, [r1], #1
ldrb r2, [\B], #1
cmp r2, #0 cmp r2, #0
sfi_breg r0, \ strb r2, [r0], #1
strb r2, [\B], #1
bne .Lbyte_loop bne .Lbyte_loop
pop { r4, r5, r6, r7 } pop { r4, r5, r6, r7 }
@ -169,8 +161,7 @@ ENTRY (strcpy)
@ Store a few bytes from the first word. @ Store a few bytes from the first word.
@ At the same time we align r0 and shift out bytes from r2. @ At the same time we align r0 and shift out bytes from r2.
.rept 4-\unalign .rept 4-\unalign
sfi_breg r0, \ strb r2, [r0], #1
strb r2, [\B], #1
lsr r2, r2, #8 lsr r2, r2, #8
.endr .endr
#ifdef __ARMEB__ #ifdef __ARMEB__
@ -185,23 +176,20 @@ ENTRY (strcpy)
orr r2, r2, r3, lsh_gt #(\unalign*8) orr r2, r2, r3, lsh_gt #(\unalign*8)
@ Save leftover bytes from the two words @ Save leftover bytes from the two words
lsh_ls r6, r3, #((4-\unalign)*8) lsh_ls r6, r3, #((4-\unalign)*8)
sfi_breg r0, \ str r2, [r0], #4
str r2, [\B], #4
@ The "real" start of the unaligned copy loop. @ The "real" start of the unaligned copy loop.
sfi_breg r1, \ ldrd r2, r3, [r1], #8 @ Load 8 more bytes
ldrd r2, r3, [\B], #8 @ Load 8 more bytes
uqsub8 r4, r7, r2 @ Find EOS uqsub8 r4, r7, r2 @ Find EOS
sfi_pld r1, #128 pld [r1, #128]
uqsub8 r5, r7, r3 uqsub8 r5, r7, r3
sfi_pld r0, #128 pld [r0, #128]
cmp r4, #0 @ EOS in first word? cmp r4, #0 @ EOS in first word?
bne 3f bne 3f
@ Combine the leftover and the first word @ Combine the leftover and the first word
orr r6, r6, r2, lsh_gt #(\unalign*8) orr r6, r6, r2, lsh_gt #(\unalign*8)
@ Discard used bytes from the first word. @ Discard used bytes from the first word.
lsh_ls r2, r2, #((4-\unalign)*8) lsh_ls r2, r2, #((4-\unalign)*8)
sfi_breg r0, \ str r6, [r0], #4
str r6, [\B], #4
b 1b b 1b
@ Found EOS in one of the words; adjust backward @ Found EOS in one of the words; adjust backward
3: sub r1, r1, #4 3: sub r1, r1, #4
@ -212,8 +200,7 @@ ENTRY (strcpy)
rev r2, r2 rev r2, r2
#endif #endif
.rept \unalign .rept \unalign
sfi_breg r0, \ strb r2, [r0], #1
strb r2, [\B], #1
lsr r2, r2, #8 lsr r2, r2, #8
.endr .endr
b .Lbyte_loop b .Lbyte_loop

View File

@ -23,8 +23,7 @@
ENTRY (strlen) ENTRY (strlen)
@ r0 = start of string @ r0 = start of string
sfi_breg r0, \ ldrb r2, [r0] @ load the first byte asap
ldrb r2, [\B] @ load the first byte asap
@ To cater to long strings, we want to search through a few @ To cater to long strings, we want to search through a few
@ characters until we reach an aligned pointer. To cater to @ characters until we reach an aligned pointer. To cater to
@ -39,8 +38,7 @@ ENTRY (strlen)
beq 99f beq 99f
@ Loop until we find ... @ Loop until we find ...
1: sfi_breg r0, \ 1: ldrb r2, [r0, #1]!
ldrb r2, [\B, #1]!
subs r3, r3, #1 @ ... the aligment point subs r3, r3, #1 @ ... the aligment point
it ne it ne
cmpne r2, #0 @ ... or EOS cmpne r2, #0 @ ... or EOS
@ -52,15 +50,14 @@ ENTRY (strlen)
add r0, r0, #1 add r0, r0, #1
@ So now we're aligned. @ So now we're aligned.
sfi_breg r0, \ ldrd r2, r3, [r0], #8
ldrd r2, r3, [\B], #8
#ifdef ARCH_HAS_T2 #ifdef ARCH_HAS_T2
movw ip, #0x0101 movw ip, #0x0101
sfi_pld r0, #64 pld [r0, #64]
movt ip, #0x0101 movt ip, #0x0101
#else #else
ldr ip, =0x01010101 ldr ip, =0x01010101
sfi_pld r0, #64 pld [r0, #64]
#endif #endif
@ Loop searching for EOS, 8 bytes at a time. @ Loop searching for EOS, 8 bytes at a time.
@ -70,11 +67,10 @@ ENTRY (strlen)
.balign 16 .balign 16
2: uqsub8 r2, ip, r2 @ Find EOS 2: uqsub8 r2, ip, r2 @ Find EOS
uqsub8 r3, ip, r3 uqsub8 r3, ip, r3
sfi_pld r0, #128 @ Prefetch 2 lines ahead pld [r0, #128] @ Prefetch 2 lines ahead
orrs r3, r3, r2 @ Combine the two words orrs r3, r3, r2 @ Combine the two words
it eq it eq
sfi_breg r0, \ ldrdeq r2, r3, [r0], #8
ldrdeq r2, r3, [\B], #8
beq 2b beq 2b
@ Found something. Disambiguate between first and second words. @ Found something. Disambiguate between first and second words.

View File

@ -33,8 +33,7 @@ ENTRY (strrchr)
@ Loop a few times until we're aligned. @ Loop a few times until we're aligned.
tst r3, #7 tst r3, #7
beq 2f beq 2f
1: sfi_breg r3, \ 1: ldrb r2, [r3], #1
ldrb r2, [\B], #1
cmp r2, r1 @ Find the character cmp r2, r1 @ Find the character
it eq it eq
subeq r0, r3, #1 subeq r0, r3, #1
@ -65,8 +64,7 @@ ENTRY (strrchr)
@ Loop searching for EOS and C, 8 bytes at a time. @ Loop searching for EOS and C, 8 bytes at a time.
@ Any time we find a match in a word, we copy the address of @ Any time we find a match in a word, we copy the address of
@ the word to r0, and the found bits to r2. @ the word to r0, and the found bits to r2.
3: sfi_breg r3, \ 3: ldrd r4, r5, [r3], #8
ldrd r4, r5, [\B], #8
@ Subtracting (unsigned saturating) from 1 means result of 1 for @ Subtracting (unsigned saturating) from 1 means result of 1 for
@ any byte that was originally zero and 0 otherwise. Therefore @ any byte that was originally zero and 0 otherwise. Therefore
@ we consider the lsb of each byte the "found" bit. @ we consider the lsb of each byte the "found" bit.

View File

@ -65,8 +65,7 @@ ENTRY(memchr)
@ Work up to an aligned point @ Work up to an aligned point
5: 5:
sfi_breg r0, \ ldrb r3, [r0],#1
ldrb r3, [\B],#1
subs r2, r2, #1 subs r2, r2, #1
cmp r3, r1 cmp r3, r1
beq 50f @ If it matches exit found beq 50f @ If it matches exit found
@ -91,8 +90,7 @@ ENTRY(memchr)
movs r3, #0 movs r3, #0
15: 15:
sfi_breg r0, \ ldrd r4,r5, [r0],#8
ldrd r4,r5, [\B],#8
#ifndef NO_THUMB #ifndef NO_THUMB
subs r6, r6, #8 subs r6, r6, #8
#endif #endif
@ -130,8 +128,7 @@ ENTRY(memchr)
#endif #endif
21: @ Post aligned section, or just a short call 21: @ Post aligned section, or just a short call
sfi_breg r0, \ ldrb r3,[r0],#1
ldrb r3,[\B],#1
#ifndef NO_THUMB #ifndef NO_THUMB
subs r2,r2,#1 subs r2,r2,#1
eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub

View File

@ -67,7 +67,7 @@
.text .text
.p2align 6 .p2align 6
ENTRY(strlen) ENTRY(strlen)
sfi_pld srcin, #0 pld [srcin, #0]
strd r4, r5, [sp, #-8]! strd r4, r5, [sp, #-8]!
cfi_adjust_cfa_offset (8) cfi_adjust_cfa_offset (8)
cfi_rel_offset (r4, 0) cfi_rel_offset (r4, 0)
@ -76,15 +76,14 @@ ENTRY(strlen)
bic src, srcin, #7 bic src, srcin, #7
mvn const_m1, #0 mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */ ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
sfi_pld src, #32 pld [src, #32]
bne.w .Lmisaligned8 bne.w .Lmisaligned8
mov const_0, #0 mov const_0, #0
mov result, #-8 mov result, #-8
.Lloop_aligned: .Lloop_aligned:
/* Bytes 0-7. */ /* Bytes 0-7. */
sfi_breg src, \ ldrd data1a, data1b, [src]
ldrd data1a, data1b, [\B] pld [src, #64]
sfi_pld src, #64
add result, result, #8 add result, result, #8
.Lstart_realigned: .Lstart_realigned:
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
@ -94,8 +93,7 @@ ENTRY(strlen)
cbnz data1b, .Lnull_found cbnz data1b, .Lnull_found
/* Bytes 8-15. */ /* Bytes 8-15. */
sfi_breg src, \ ldrd data1a, data1b, [src, #8]
ldrd data1a, data1b, [\B, #8]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8 add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
@ -104,8 +102,7 @@ ENTRY(strlen)
cbnz data1b, .Lnull_found cbnz data1b, .Lnull_found
/* Bytes 16-23. */ /* Bytes 16-23. */
sfi_breg src, \ ldrd data1a, data1b, [src, #16]
ldrd data1a, data1b, [\B, #16]
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8 add result, result, #8
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
@ -114,8 +111,7 @@ ENTRY(strlen)
cbnz data1b, .Lnull_found cbnz data1b, .Lnull_found
/* Bytes 24-31. */ /* Bytes 24-31. */
sfi_breg src, \ ldrd data1a, data1b, [src, #24]
ldrd data1a, data1b, [\B, #24]
add src, src, #32 add src, src, #32
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
add result, result, #8 add result, result, #8
@ -143,13 +139,12 @@ ENTRY(strlen)
.Lmisaligned8: .Lmisaligned8:
cfi_restore_state cfi_restore_state
sfi_breg src, \ ldrd data1a, data1b, [src]
ldrd data1a, data1b, [\B]
and tmp2, tmp1, #3 and tmp2, tmp1, #3
rsb result, tmp1, #0 rsb result, tmp1, #0
lsl tmp2, tmp2, #3 /* Bytes -> bits. */ lsl tmp2, tmp2, #3 /* Bytes -> bits. */
tst tmp1, #4 tst tmp1, #4
sfi_pld src, #64 pld [src, #64]
S2HI tmp2, const_m1, tmp2 S2HI tmp2, const_m1, tmp2
#ifdef NO_THUMB #ifdef NO_THUMB
mvn tmp1, tmp2 mvn tmp1, tmp2

View File

@ -226,71 +226,40 @@
#ifdef USE_VFP #ifdef USE_VFP
.macro cpy_line_vfp vreg, base .macro cpy_line_vfp vreg, base
sfi_breg dst, \ vstr \vreg, [dst, #\base]
vstr \vreg, [\B, #\base] vldr \vreg, [src, #\base]
sfi_breg src, \ vstr d0, [dst, #\base + 8]
vldr \vreg, [\B, #\base] vldr d0, [src, #\base + 8]
sfi_breg dst, \ vstr d1, [dst, #\base + 16]
vstr d0, [\B, #\base + 8] vldr d1, [src, #\base + 16]
sfi_breg src, \ vstr d2, [dst, #\base + 24]
vldr d0, [\B, #\base + 8] vldr d2, [src, #\base + 24]
sfi_breg dst, \ vstr \vreg, [dst, #\base + 32]
vstr d1, [\B, #\base + 16] vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
sfi_breg src, \ vstr d0, [dst, #\base + 40]
vldr d1, [\B, #\base + 16] vldr d0, [src, #\base + 40]
sfi_breg dst, \ vstr d1, [dst, #\base + 48]
vstr d2, [\B, #\base + 24] vldr d1, [src, #\base + 48]
sfi_breg src, \ vstr d2, [dst, #\base + 56]
vldr d2, [\B, #\base + 24] vldr d2, [src, #\base + 56]
sfi_breg dst, \
vstr \vreg, [\B, #\base + 32]
sfi_breg src, \
vldr \vreg, [\B, #\base + prefetch_lines * 64 - 32]
sfi_breg dst, \
vstr d0, [\B, #\base + 40]
sfi_breg src, \
vldr d0, [\B, #\base + 40]
sfi_breg dst, \
vstr d1, [\B, #\base + 48]
sfi_breg src, \
vldr d1, [\B, #\base + 48]
sfi_breg dst, \
vstr d2, [\B, #\base + 56]
sfi_breg src, \
vldr d2, [\B, #\base + 56]
.endm .endm
.macro cpy_tail_vfp vreg, base .macro cpy_tail_vfp vreg, base
sfi_breg dst, \ vstr \vreg, [dst, #\base]
vstr \vreg, [\B, #\base] vldr \vreg, [src, #\base]
sfi_breg src, \ vstr d0, [dst, #\base + 8]
vldr \vreg, [\B, #\base] vldr d0, [src, #\base + 8]
sfi_breg dst, \ vstr d1, [dst, #\base + 16]
vstr d0, [\B, #\base + 8] vldr d1, [src, #\base + 16]
sfi_breg src, \ vstr d2, [dst, #\base + 24]
vldr d0, [\B, #\base + 8] vldr d2, [src, #\base + 24]
sfi_breg dst, \ vstr \vreg, [dst, #\base + 32]
vstr d1, [\B, #\base + 16] vstr d0, [dst, #\base + 40]
sfi_breg src, \ vldr d0, [src, #\base + 40]
vldr d1, [\B, #\base + 16] vstr d1, [dst, #\base + 48]
sfi_breg dst, \ vldr d1, [src, #\base + 48]
vstr d2, [\B, #\base + 24] vstr d2, [dst, #\base + 56]
sfi_breg src, \ vldr d2, [src, #\base + 56]
vldr d2, [\B, #\base + 24]
sfi_breg dst, \
vstr \vreg, [\B, #\base + 32]
sfi_breg dst, \
vstr d0, [\B, #\base + 40]
sfi_breg src, \
vldr d0, [\B, #\base + 40]
sfi_breg dst, \
vstr d1, [\B, #\base + 48]
sfi_breg src, \
vldr d1, [\B, #\base + 48]
sfi_breg dst, \
vstr d2, [\B, #\base + 56]
sfi_breg src, \
vldr d2, [\B, #\base + 56]
.endm .endm
#endif #endif
@ -316,26 +285,16 @@ ENTRY(memcpy)
vst1.8 {d0}, [\reg]! vst1.8 {d0}, [\reg]!
.endm .endm
/* These are used by the NaCl sfi_breg macro. */
.macro _sfi_breg_dmask_neon_load_d0 reg
_sfi_dmask \reg
.endm
.macro _sfi_breg_dmask_neon_store_d0 reg
_sfi_dmask \reg
.endm
and tmp1, count, #0x38 and tmp1, count, #0x38
.macro dispatch_step i .macro dispatch_step i
sfi_breg src, neon_load_d0 \B neon_load_d0 src
sfi_breg dst, neon_store_d0 \B neon_store_d0 dst
.endm .endm
dispatch_7_dword dispatch_7_dword
tst count, #4 tst count, #4
sfi_breg src, \ ldrne tmp1, [src], #4
ldrne tmp1, [\B], #4 strne tmp1, [dst], #4
sfi_breg dst, \
strne tmp1, [\B], #4
#else #else
/* Copy up to 15 full words of data. May not be aligned. */ /* Copy up to 15 full words of data. May not be aligned. */
/* Cannot use VFP for unaligned data. */ /* Cannot use VFP for unaligned data. */
@ -344,23 +303,17 @@ ENTRY(memcpy)
add src, src, tmp1 add src, src, tmp1
/* Jump directly into the sequence below at the correct offset. */ /* Jump directly into the sequence below at the correct offset. */
.macro dispatch_step i .macro dispatch_step i
sfi_breg src, \ ldr tmp1, [src, #-(\i * 4)]
ldr tmp1, [\B, #-(\i * 4)] str tmp1, [dst, #-(\i * 4)]
sfi_breg dst, \
str tmp1, [\B, #-(\i * 4)]
.endm .endm
dispatch_15_word dispatch_15_word
#endif #endif
lsls count, count, #31 lsls count, count, #31
sfi_breg src, \ ldrhcs tmp1, [src], #2
ldrhcs tmp1, [\B], #2 ldrbne src, [src] /* Src is dead, use as a scratch. */
sfi_breg src, \ strhcs tmp1, [dst], #2
ldrbne src, [\B] /* Src is dead, use as a scratch. */ strbne src, [dst]
sfi_breg dst, \
strhcs tmp1, [\B], #2
sfi_breg dst, \
strbne src, [\B]
bx lr bx lr
.Lcpy_not_short: .Lcpy_not_short:
@ -388,19 +341,13 @@ ENTRY(memcpy)
beq 1f beq 1f
rsbs tmp2, tmp2, #0 rsbs tmp2, tmp2, #0
sub count, count, tmp2, lsr #29 sub count, count, tmp2, lsr #29
sfi_breg src, \ ldrmi tmp1, [src], #4
ldrmi tmp1, [\B], #4 strmi tmp1, [dst], #4
sfi_breg dst, \
strmi tmp1, [\B], #4
lsls tmp2, tmp2, #2 lsls tmp2, tmp2, #2
sfi_breg src, \ ldrhcs tmp1, [src], #2
ldrhcs tmp1, [\B], #2 ldrbne tmp2, [src], #1
sfi_breg src, \ strhcs tmp1, [dst], #2
ldrbne tmp2, [\B], #1 strbne tmp2, [dst], #1
sfi_breg dst, \
strhcs tmp1, [\B], #2
sfi_breg dst, \
strbne tmp2, [\B], #1
1: 1:
subs tmp2, count, #64 /* Use tmp2 for count. */ subs tmp2, count, #64 /* Use tmp2 for count. */
@ -412,40 +359,24 @@ ENTRY(memcpy)
.Lcpy_body_medium: /* Count in tmp2. */ .Lcpy_body_medium: /* Count in tmp2. */
#ifdef USE_VFP #ifdef USE_VFP
1: 1:
sfi_breg src, \ vldr d0, [src, #0]
vldr d0, [\B, #0]
subs tmp2, tmp2, #64 subs tmp2, tmp2, #64
sfi_breg src, \ vldr d1, [src, #8]
vldr d1, [\B, #8] vstr d0, [dst, #0]
sfi_breg dst, \ vldr d0, [src, #16]
vstr d0, [\B, #0] vstr d1, [dst, #8]
sfi_breg src, \ vldr d1, [src, #24]
vldr d0, [\B, #16] vstr d0, [dst, #16]
sfi_breg dst, \ vldr d0, [src, #32]
vstr d1, [\B, #8] vstr d1, [dst, #24]
sfi_breg src, \ vldr d1, [src, #40]
vldr d1, [\B, #24] vstr d0, [dst, #32]
sfi_breg dst, \ vldr d0, [src, #48]
vstr d0, [\B, #16] vstr d1, [dst, #40]
sfi_breg src, \ vldr d1, [src, #56]
vldr d0, [\B, #32] vstr d0, [dst, #48]
sfi_breg dst, \
vstr d1, [\B, #24]
sfi_breg src, \
vldr d1, [\B, #40]
sfi_breg dst, \
vstr d0, [\B, #32]
sfi_breg src, \
vldr d0, [\B, #48]
sfi_breg dst, \
vstr d1, [\B, #40]
sfi_breg src, \
vldr d1, [\B, #56]
sfi_breg dst, \
vstr d0, [\B, #48]
add src, src, #64 add src, src, #64
sfi_breg dst, \ vstr d1, [dst, #56]
vstr d1, [\B, #56]
add dst, dst, #64 add dst, dst, #64
bge 1b bge 1b
tst tmp2, #0x3f tst tmp2, #0x3f
@ -456,48 +387,30 @@ ENTRY(memcpy)
add dst, dst, tmp1 add dst, dst, tmp1
add src, src, tmp1 add src, src, tmp1
.macro dispatch_step i .macro dispatch_step i
sfi_breg src, \ vldr d0, [src, #-(\i * 8)]
vldr d0, [\B, #-(\i * 8)] vstr d0, [dst, #-(\i * 8)]
sfi_breg dst, \
vstr d0, [\B, #-(\i * 8)]
.endm .endm
dispatch_7_dword dispatch_7_dword
#else #else
sub src, src, #8 sub src, src, #8
sub dst, dst, #8 sub dst, dst, #8
1: 1:
sfi_breg src, \ ldrd A_l, A_h, [src, #8]
ldrd A_l, A_h, [\B, #8] strd A_l, A_h, [dst, #8]
sfi_breg dst, \ ldrd A_l, A_h, [src, #16]
strd A_l, A_h, [\B, #8] strd A_l, A_h, [dst, #16]
sfi_breg src, \ ldrd A_l, A_h, [src, #24]
ldrd A_l, A_h, [\B, #16] strd A_l, A_h, [dst, #24]
sfi_breg dst, \ ldrd A_l, A_h, [src, #32]
strd A_l, A_h, [\B, #16] strd A_l, A_h, [dst, #32]
sfi_breg src, \ ldrd A_l, A_h, [src, #40]
ldrd A_l, A_h, [\B, #24] strd A_l, A_h, [dst, #40]
sfi_breg dst, \ ldrd A_l, A_h, [src, #48]
strd A_l, A_h, [\B, #24] strd A_l, A_h, [dst, #48]
sfi_breg src, \ ldrd A_l, A_h, [src, #56]
ldrd A_l, A_h, [\B, #32] strd A_l, A_h, [dst, #56]
sfi_breg dst, \ ldrd A_l, A_h, [src, #64]!
strd A_l, A_h, [\B, #32] strd A_l, A_h, [dst, #64]!
sfi_breg src, \
ldrd A_l, A_h, [\B, #40]
sfi_breg dst, \
strd A_l, A_h, [\B, #40]
sfi_breg src, \
ldrd A_l, A_h, [\B, #48]
sfi_breg dst, \
strd A_l, A_h, [\B, #48]
sfi_breg src, \
ldrd A_l, A_h, [\B, #56]
sfi_breg dst, \
strd A_l, A_h, [\B, #56]
sfi_breg src, \
ldrd A_l, A_h, [\B, #64]!
sfi_breg dst, \
strd A_l, A_h, [\B, #64]!
subs tmp2, tmp2, #64 subs tmp2, tmp2, #64
bge 1b bge 1b
tst tmp2, #0x3f tst tmp2, #0x3f
@ -524,28 +437,20 @@ ENTRY(memcpy)
add dst, dst, tmp1 add dst, dst, tmp1
add src, src, tmp1 add src, src, tmp1
.macro dispatch_step i .macro dispatch_step i
sfi_breg src, \ ldrd A_l, A_h, [src, #-(\i * 8)]
ldrd A_l, A_h, [\B, #-(\i * 8)] strd A_l, A_h, [dst, #-(\i * 8)]
sfi_breg dst, \
strd A_l, A_h, [\B, #-(\i * 8)]
.endm .endm
dispatch_7_dword dispatch_7_dword
#endif #endif
tst tmp2, #4 tst tmp2, #4
sfi_breg src, \ ldrne tmp1, [src], #4
ldrne tmp1, [\B], #4 strne tmp1, [dst], #4
sfi_breg dst, \
strne tmp1, [\B], #4
lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */ lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */
sfi_breg src, \ ldrhcs tmp1, [src], #2
ldrhcs tmp1, [\B], #2 ldrbne tmp2, [src]
sfi_breg src, \ strhcs tmp1, [dst], #2
ldrbne tmp2, [\B] strbne tmp2, [dst]
sfi_breg dst, \
strhcs tmp1, [\B], #2
sfi_breg dst, \
strbne tmp2, [\B]
.Ldone: .Ldone:
ldr tmp2, [sp], #FRAME_SIZE ldr tmp2, [sp], #FRAME_SIZE
@ -565,23 +470,15 @@ ENTRY(memcpy)
copy position into a register. This should act like a PLD copy position into a register. This should act like a PLD
operation but we won't have to repeat the transfer. */ operation but we won't have to repeat the transfer. */
sfi_breg src, \ vldr d3, [src, #0]
vldr d3, [\B, #0] vldr d4, [src, #64]
sfi_breg src, \ vldr d5, [src, #128]
vldr d4, [\B, #64] vldr d6, [src, #192]
sfi_breg src, \ vldr d7, [src, #256]
vldr d5, [\B, #128]
sfi_breg src, \
vldr d6, [\B, #192]
sfi_breg src, \
vldr d7, [\B, #256]
sfi_breg src, \ vldr d0, [src, #8]
vldr d0, [\B, #8] vldr d1, [src, #16]
sfi_breg src, \ vldr d2, [src, #24]
vldr d1, [\B, #16]
sfi_breg src, \
vldr d2, [\B, #24]
add src, src, #32 add src, src, #32
subs tmp2, tmp2, #prefetch_lines * 64 * 2 subs tmp2, tmp2, #prefetch_lines * 64 * 2
@ -606,31 +503,19 @@ ENTRY(memcpy)
add src, src, #3 * 64 add src, src, #3 * 64
add dst, dst, #3 * 64 add dst, dst, #3 * 64
cpy_tail_vfp d6, 0 cpy_tail_vfp d6, 0
sfi_breg dst, \ vstr d7, [dst, #64]
vstr d7, [\B, #64] vldr d7, [src, #64]
sfi_breg src, \ vstr d0, [dst, #64 + 8]
vldr d7, [\B, #64] vldr d0, [src, #64 + 8]
sfi_breg dst, \ vstr d1, [dst, #64 + 16]
vstr d0, [\B, #64 + 8] vldr d1, [src, #64 + 16]
sfi_breg src, \ vstr d2, [dst, #64 + 24]
vldr d0, [\B, #64 + 8] vldr d2, [src, #64 + 24]
sfi_breg dst, \ vstr d7, [dst, #64 + 32]
vstr d1, [\B, #64 + 16]
sfi_breg src, \
vldr d1, [\B, #64 + 16]
sfi_breg dst, \
vstr d2, [\B, #64 + 24]
sfi_breg src, \
vldr d2, [\B, #64 + 24]
sfi_breg dst, \
vstr d7, [\B, #64 + 32]
add src, src, #96 add src, src, #96
sfi_breg dst, \ vstr d0, [dst, #64 + 40]
vstr d0, [\B, #64 + 40] vstr d1, [dst, #64 + 48]
sfi_breg dst, \ vstr d2, [dst, #64 + 56]
vstr d1, [\B, #64 + 48]
sfi_breg dst, \
vstr d2, [\B, #64 + 56]
add dst, dst, #128 add dst, dst, #128
add tmp2, tmp2, #prefetch_lines * 64 add tmp2, tmp2, #prefetch_lines * 64
b .Lcpy_body_medium b .Lcpy_body_medium
@ -641,83 +526,59 @@ ENTRY(memcpy)
/* Pre-bias src and dst. */ /* Pre-bias src and dst. */
sub src, src, #8 sub src, src, #8
sub dst, dst, #8 sub dst, dst, #8
sfi_pld src, #8 pld [src, #8]
sfi_pld src, #72 pld [src, #72]
subs tmp2, tmp2, #64 subs tmp2, tmp2, #64
sfi_pld src, #136 pld [src, #136]
sfi_breg src, \ ldrd A_l, A_h, [src, #8]
ldrd A_l, A_h, [\B, #8]
strd B_l, B_h, [sp, #8] strd B_l, B_h, [sp, #8]
cfi_rel_offset (B_l, 8) cfi_rel_offset (B_l, 8)
cfi_rel_offset (B_h, 12) cfi_rel_offset (B_h, 12)
sfi_breg src, \ ldrd B_l, B_h, [src, #16]
ldrd B_l, B_h, [\B, #16]
strd C_l, C_h, [sp, #16] strd C_l, C_h, [sp, #16]
cfi_rel_offset (C_l, 16) cfi_rel_offset (C_l, 16)
cfi_rel_offset (C_h, 20) cfi_rel_offset (C_h, 20)
sfi_breg src, \ ldrd C_l, C_h, [src, #24]
ldrd C_l, C_h, [\B, #24]
strd D_l, D_h, [sp, #24] strd D_l, D_h, [sp, #24]
cfi_rel_offset (D_l, 24) cfi_rel_offset (D_l, 24)
cfi_rel_offset (D_h, 28) cfi_rel_offset (D_h, 28)
sfi_pld src, #200 pld [src, #200]
sfi_breg src, \ ldrd D_l, D_h, [src, #32]!
ldrd D_l, D_h, [\B, #32]!
b 1f b 1f
.p2align 6 .p2align 6
2: 2:
sfi_pld src, #232 pld [src, #232]
sfi_breg dst, \ strd A_l, A_h, [dst, #40]
strd A_l, A_h, [\B, #40] ldrd A_l, A_h, [src, #40]
sfi_breg src, \ strd B_l, B_h, [dst, #48]
ldrd A_l, A_h, [\B, #40] ldrd B_l, B_h, [src, #48]
sfi_breg dst, \ strd C_l, C_h, [dst, #56]
strd B_l, B_h, [\B, #48] ldrd C_l, C_h, [src, #56]
sfi_breg src, \ strd D_l, D_h, [dst, #64]!
ldrd B_l, B_h, [\B, #48] ldrd D_l, D_h, [src, #64]!
sfi_breg dst, \
strd C_l, C_h, [\B, #56]
sfi_breg src, \
ldrd C_l, C_h, [\B, #56]
sfi_breg dst, \
strd D_l, D_h, [\B, #64]!
sfi_breg src, \
ldrd D_l, D_h, [\B, #64]!
subs tmp2, tmp2, #64 subs tmp2, tmp2, #64
1: 1:
sfi_breg dst, \ strd A_l, A_h, [dst, #8]
strd A_l, A_h, [\B, #8] ldrd A_l, A_h, [src, #8]
sfi_breg src, \ strd B_l, B_h, [dst, #16]
ldrd A_l, A_h, [\B, #8] ldrd B_l, B_h, [src, #16]
sfi_breg dst, \ strd C_l, C_h, [dst, #24]
strd B_l, B_h, [\B, #16] ldrd C_l, C_h, [src, #24]
sfi_breg src, \ strd D_l, D_h, [dst, #32]
ldrd B_l, B_h, [\B, #16] ldrd D_l, D_h, [src, #32]
sfi_breg dst, \
strd C_l, C_h, [\B, #24]
sfi_breg src, \
ldrd C_l, C_h, [\B, #24]
sfi_breg dst, \
strd D_l, D_h, [\B, #32]
sfi_breg src, \
ldrd D_l, D_h, [\B, #32]
bcs 2b bcs 2b
/* Save the remaining bytes and restore the callee-saved regs. */ /* Save the remaining bytes and restore the callee-saved regs. */
sfi_breg dst, \ strd A_l, A_h, [dst, #40]
strd A_l, A_h, [\B, #40]
add src, src, #40 add src, src, #40
sfi_breg dst, \ strd B_l, B_h, [dst, #48]
strd B_l, B_h, [\B, #48]
ldrd B_l, B_h, [sp, #8] ldrd B_l, B_h, [sp, #8]
cfi_restore (B_l) cfi_restore (B_l)
cfi_restore (B_h) cfi_restore (B_h)
sfi_breg dst, \ strd C_l, C_h, [dst, #56]
strd C_l, C_h, [\B, #56]
ldrd C_l, C_h, [sp, #16] ldrd C_l, C_h, [sp, #16]
cfi_restore (C_l) cfi_restore (C_l)
cfi_restore (C_h) cfi_restore (C_h)
sfi_breg dst, \ strd D_l, D_h, [dst, #64]
strd D_l, D_h, [\B, #64]
ldrd D_l, D_h, [sp, #24] ldrd D_l, D_h, [sp, #24]
cfi_restore (D_l) cfi_restore (D_l)
cfi_restore (D_h) cfi_restore (D_h)
@ -734,35 +595,29 @@ ENTRY(memcpy)
cfi_remember_state cfi_remember_state
.Lcpy_notaligned: .Lcpy_notaligned:
sfi_pld src pld [src, #0]
sfi_pld src, #64 pld [src, #64]
/* There's at least 64 bytes to copy, but there is no mutual /* There's at least 64 bytes to copy, but there is no mutual
alignment. */ alignment. */
/* Bring DST to 64-bit alignment. */ /* Bring DST to 64-bit alignment. */
lsls tmp2, dst, #29 lsls tmp2, dst, #29
sfi_pld src, #(2 * 64) pld [src, #(2 * 64)]
beq 1f beq 1f
rsbs tmp2, tmp2, #0 rsbs tmp2, tmp2, #0
sub count, count, tmp2, lsr #29 sub count, count, tmp2, lsr #29
sfi_breg src, \ ldrmi tmp1, [src], #4
ldrmi tmp1, [\B], #4 strmi tmp1, [dst], #4
sfi_breg dst, \
strmi tmp1, [\B], #4
lsls tmp2, tmp2, #2 lsls tmp2, tmp2, #2
sfi_breg src, \ ldrbne tmp1, [src], #1
ldrbne tmp1, [\B], #1 ldrhcs tmp2, [src], #2
sfi_breg src, \ strbne tmp1, [dst], #1
ldrhcs tmp2, [\B], #2 strhcs tmp2, [dst], #2
sfi_breg dst, \
strbne tmp1, [\B], #1
sfi_breg dst, \
strhcs tmp2, [\B], #2
1: 1:
sfi_pld src, #(3 * 64) pld [src, #(3 * 64)]
subs count, count, #64 subs count, count, #64
ldrmi tmp2, [sp], #FRAME_SIZE ldrmi tmp2, [sp], #FRAME_SIZE
bmi .Ltail63unaligned bmi .Ltail63unaligned
sfi_pld src, #(4 * 64) pld [src, #(4 * 64)]
#ifdef USE_NEON #ifdef USE_NEON
/* These need an extra layer of macro just to work around a /* These need an extra layer of macro just to work around a
@ -775,132 +630,88 @@ ENTRY(memcpy)
vst1.8 {\reglist}, [ALIGN (\basereg, 64)]! vst1.8 {\reglist}, [ALIGN (\basereg, 64)]!
.endm .endm
/* These are used by the NaCl sfi_breg macro. */ neon_load_multi d0-d3, src
.macro _sfi_breg_dmask_neon_load_multi reg neon_load_multi d4-d7, src
_sfi_dmask \reg
.endm
.macro _sfi_breg_dmask_neon_store_multi reg
_sfi_dmask \reg
.endm
sfi_breg src, neon_load_multi d0-d3, \B
sfi_breg src, neon_load_multi d4-d7, \B
subs count, count, #64 subs count, count, #64
bmi 2f bmi 2f
1: 1:
sfi_pld src, #(4 * 64) pld [src, #(4 * 64)]
sfi_breg dst, neon_store_multi d0-d3, \B neon_store_multi d0-d3, dst
sfi_breg src, neon_load_multi d0-d3, \B neon_load_multi d0-d3, src
sfi_breg dst, neon_store_multi d4-d7, \B neon_store_multi d4-d7, dst
sfi_breg src, neon_load_multi d4-d7, \B neon_load_multi d4-d7, src
subs count, count, #64 subs count, count, #64
bpl 1b bpl 1b
2: 2:
sfi_breg dst, neon_store_multi d0-d3, \B neon_store_multi d0-d3, dst
sfi_breg dst, neon_store_multi d4-d7, \B neon_store_multi d4-d7, dst
ands count, count, #0x3f ands count, count, #0x3f
#else #else
/* Use an SMS style loop to maximize the I/O bandwidth. */ /* Use an SMS style loop to maximize the I/O bandwidth. */
sub src, src, #4 sub src, src, #4
sub dst, dst, #8 sub dst, dst, #8
subs tmp2, count, #64 /* Use tmp2 for count. */ subs tmp2, count, #64 /* Use tmp2 for count. */
sfi_breg src, \ ldr A_l, [src, #4]
ldr A_l, [\B, #4] ldr A_h, [src, #8]
sfi_breg src, \
ldr A_h, [\B, #8]
strd B_l, B_h, [sp, #8] strd B_l, B_h, [sp, #8]
cfi_rel_offset (B_l, 8) cfi_rel_offset (B_l, 8)
cfi_rel_offset (B_h, 12) cfi_rel_offset (B_h, 12)
sfi_breg src, \ ldr B_l, [src, #12]
ldr B_l, [\B, #12] ldr B_h, [src, #16]
sfi_breg src, \
ldr B_h, [\B, #16]
strd C_l, C_h, [sp, #16] strd C_l, C_h, [sp, #16]
cfi_rel_offset (C_l, 16) cfi_rel_offset (C_l, 16)
cfi_rel_offset (C_h, 20) cfi_rel_offset (C_h, 20)
sfi_breg src, \ ldr C_l, [src, #20]
ldr C_l, [\B, #20] ldr C_h, [src, #24]
sfi_breg src, \
ldr C_h, [\B, #24]
strd D_l, D_h, [sp, #24] strd D_l, D_h, [sp, #24]
cfi_rel_offset (D_l, 24) cfi_rel_offset (D_l, 24)
cfi_rel_offset (D_h, 28) cfi_rel_offset (D_h, 28)
sfi_breg src, \ ldr D_l, [src, #28]
ldr D_l, [\B, #28] ldr D_h, [src, #32]!
sfi_breg src, \
ldr D_h, [\B, #32]!
b 1f b 1f
.p2align 6 .p2align 6
2: 2:
sfi_pld src, #(5 * 64) - (32 - 4) pld [src, #(5 * 64) - (32 - 4)]
sfi_breg dst, \ strd A_l, A_h, [dst, #40]
strd A_l, A_h, [\B, #40] ldr A_l, [src, #36]
sfi_breg src, \ ldr A_h, [src, #40]
ldr A_l, [\B, #36] strd B_l, B_h, [dst, #48]
sfi_breg src, \ ldr B_l, [src, #44]
ldr A_h, [\B, #40] ldr B_h, [src, #48]
sfi_breg dst, \ strd C_l, C_h, [dst, #56]
strd B_l, B_h, [\B, #48] ldr C_l, [src, #52]
sfi_breg src, \ ldr C_h, [src, #56]
ldr B_l, [\B, #44] strd D_l, D_h, [dst, #64]!
sfi_breg src, \ ldr D_l, [src, #60]
ldr B_h, [\B, #48] ldr D_h, [src, #64]!
sfi_breg dst, \
strd C_l, C_h, [\B, #56]
sfi_breg src, \
ldr C_l, [\B, #52]
sfi_breg src, \
ldr C_h, [\B, #56]
sfi_breg dst, \
strd D_l, D_h, [\B, #64]!
sfi_breg src, \
ldr D_l, [\B, #60]
sfi_breg src, \
ldr D_h, [\B, #64]!
subs tmp2, tmp2, #64 subs tmp2, tmp2, #64
1: 1:
sfi_breg dst, \ strd A_l, A_h, [dst, #8]
strd A_l, A_h, [\B, #8] ldr A_l, [src, #4]
sfi_breg src, \ ldr A_h, [src, #8]
ldr A_l, [\B, #4] strd B_l, B_h, [dst, #16]
sfi_breg src, \ ldr B_l, [src, #12]
ldr A_h, [\B, #8] ldr B_h, [src, #16]
sfi_breg dst, \ strd C_l, C_h, [dst, #24]
strd B_l, B_h, [\B, #16] ldr C_l, [src, #20]
sfi_breg src, \ ldr C_h, [src, #24]
ldr B_l, [\B, #12] strd D_l, D_h, [dst, #32]
sfi_breg src, \ ldr D_l, [src, #28]
ldr B_h, [\B, #16] ldr D_h, [src, #32]
sfi_breg dst, \
strd C_l, C_h, [\B, #24]
sfi_breg src, \
ldr C_l, [\B, #20]
sfi_breg src, \
ldr C_h, [\B, #24]
sfi_breg dst, \
strd D_l, D_h, [\B, #32]
sfi_breg src, \
ldr D_l, [\B, #28]
sfi_breg src, \
ldr D_h, [\B, #32]
bcs 2b bcs 2b
/* Save the remaining bytes and restore the callee-saved regs. */ /* Save the remaining bytes and restore the callee-saved regs. */
sfi_breg dst, \ strd A_l, A_h, [dst, #40]
strd A_l, A_h, [\B, #40]
add src, src, #36 add src, src, #36
sfi_breg dst, \ strd B_l, B_h, [dst, #48]
strd B_l, B_h, [\B, #48]
ldrd B_l, B_h, [sp, #8] ldrd B_l, B_h, [sp, #8]
cfi_restore (B_l) cfi_restore (B_l)
cfi_restore (B_h) cfi_restore (B_h)
sfi_breg dst, \ strd C_l, C_h, [dst, #56]
strd C_l, C_h, [\B, #56]
ldrd C_l, C_h, [sp, #16] ldrd C_l, C_h, [sp, #16]
cfi_restore (C_l) cfi_restore (C_l)
cfi_restore (C_h) cfi_restore (C_h)
sfi_breg dst, \ strd D_l, D_h, [dst, #64]
strd D_l, D_h, [\B, #64]
ldrd D_l, D_h, [sp, #24] ldrd D_l, D_h, [sp, #24]
cfi_restore (D_l) cfi_restore (D_l)
cfi_restore (D_h) cfi_restore (D_h)

View File

@ -178,10 +178,8 @@
#endif #endif
ENTRY (strcmp) ENTRY (strcmp)
#if STRCMP_PRECHECK == 1 #if STRCMP_PRECHECK == 1
sfi_breg src1, \ ldrb r2, [src1]
ldrb r2, [\B] ldrb r3, [src2]
sfi_breg src2, \
ldrb r3, [\B]
cmp r2, #1 cmp r2, #1
it cs it cs
cmpcs r2, r3 cmpcs r2, r3
@ -211,11 +209,9 @@ ENTRY (strcmp)
and tmp2, tmp1, #3 and tmp2, tmp1, #3
bic src2, src2, #7 bic src2, src2, #7
lsl tmp2, tmp2, #3 /* Bytes -> bits. */ lsl tmp2, tmp2, #3 /* Bytes -> bits. */
sfi_breg src1, \ ldrd data1a, data1b, [src1], #16
ldrd data1a, data1b, [\B], #16
tst tmp1, #4 tst tmp1, #4
sfi_breg src2, \ ldrd data2a, data2b, [src2], #16
ldrd data2a, data2b, [\B], #16
prepare_mask tmp1, tmp2 prepare_mask tmp1, tmp2
apply_mask data1a, tmp1 apply_mask data1a, tmp1
apply_mask data2a, tmp1 apply_mask data2a, tmp1
@ -231,10 +227,8 @@ ENTRY (strcmp)
.p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */
.p2align 2 /* Always word aligned. */ .p2align 2 /* Always word aligned. */
.Lloop_aligned8: .Lloop_aligned8:
sfi_breg src1, \ ldrd data1a, data1b, [src1], #16
ldrd data1a, data1b, [\B], #16 ldrd data2a, data2b, [src2], #16
sfi_breg src2, \
ldrd data2a, data2b, [\B], #16
.Lstart_realigned8: .Lstart_realigned8:
uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
eor syndrome_a, data1a, data2a eor syndrome_a, data1a, data2a
@ -245,10 +239,8 @@ ENTRY (strcmp)
sel syndrome_b, syndrome_b, const_m1 sel syndrome_b, syndrome_b, const_m1
cbnz syndrome_b, .Ldiff_in_b cbnz syndrome_b, .Ldiff_in_b
sfi_breg src1, \ ldrd data1a, data1b, [src1, #-8]
ldrd data1a, data1b, [\B, #-8] ldrd data2a, data2b, [src2, #-8]
sfi_breg src2, \
ldrd data2a, data2b, [\B, #-8]
uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
eor syndrome_a, data1a, data2a eor syndrome_a, data1a, data2a
sel syndrome_a, syndrome_a, const_m1 sel syndrome_a, syndrome_a, const_m1
@ -279,19 +271,15 @@ ENTRY (strcmp)
/* Unrolled by a factor of 2, to reduce the number of post-increment /* Unrolled by a factor of 2, to reduce the number of post-increment
operations. */ operations. */
.Lloop_aligned4: .Lloop_aligned4:
sfi_breg src1, \ ldr data1, [src1], #8
ldr data1, [\B], #8 ldr data2, [src2], #8
sfi_breg src2, \
ldr data2, [\B], #8
.Lstart_realigned4: .Lstart_realigned4:
uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ uadd8 syndrome, data1, const_m1 /* Only need GE bits. */
eor syndrome, data1, data2 eor syndrome, data1, data2
sel syndrome, syndrome, const_m1 sel syndrome, syndrome, const_m1
cbnz syndrome, .Laligned4_done cbnz syndrome, .Laligned4_done
sfi_breg src1, \ ldr data1, [src1, #-4]
ldr data1, [\B, #-4] ldr data2, [src2, #-4]
sfi_breg src2, \
ldr data2, [\B, #-4]
uadd8 syndrome, data1, const_m1 uadd8 syndrome, data1, const_m1
eor syndrome, data1, data2 eor syndrome, data1, data2
sel syndrome, syndrome, const_m1 sel syndrome, syndrome, const_m1
@ -307,11 +295,9 @@ ENTRY (strcmp)
masking off the unwanted loaded data to prevent a difference. */ masking off the unwanted loaded data to prevent a difference. */
lsl tmp1, tmp1, #3 /* Bytes -> bits. */ lsl tmp1, tmp1, #3 /* Bytes -> bits. */
bic src1, src1, #3 bic src1, src1, #3
sfi_breg src1, \ ldr data1, [src1], #8
ldr data1, [\B], #8
bic src2, src2, #3 bic src2, src2, #3
sfi_breg src2, \ ldr data2, [src2], #8
ldr data2, [\B], #8
prepare_mask tmp1, tmp1 prepare_mask tmp1, tmp1
apply_mask data1, tmp1 apply_mask data1, tmp1
@ -324,30 +310,26 @@ ENTRY (strcmp)
sub src2, src2, tmp1 sub src2, src2, tmp1
bic src1, src1, #3 bic src1, src1, #3
lsls tmp1, tmp1, #31 lsls tmp1, tmp1, #31
sfi_breg src1, \ ldr data1, [src1], #4
ldr data1, [\B], #4
beq .Laligned_m2 beq .Laligned_m2
bcs .Laligned_m1 bcs .Laligned_m1
#if STRCMP_PRECHECK == 0 #if STRCMP_PRECHECK == 0
sfi_breg src2, \ ldrb data2, [src2, #1]
ldrb data2, [\B, #1]
uxtb tmp1, data1, ror #BYTE1_OFFSET uxtb tmp1, data1, ror #BYTE1_OFFSET
subs tmp1, tmp1, data2 subs tmp1, tmp1, data2
bne .Lmisaligned_exit bne .Lmisaligned_exit
cbz data2, .Lmisaligned_exit cbz data2, .Lmisaligned_exit
.Laligned_m2: .Laligned_m2:
sfi_breg src2, \ ldrb data2, [src2, #2]
ldrb data2, [\B, #2]
uxtb tmp1, data1, ror #BYTE2_OFFSET uxtb tmp1, data1, ror #BYTE2_OFFSET
subs tmp1, tmp1, data2 subs tmp1, tmp1, data2
bne .Lmisaligned_exit bne .Lmisaligned_exit
cbz data2, .Lmisaligned_exit cbz data2, .Lmisaligned_exit
.Laligned_m1: .Laligned_m1:
sfi_breg src2, \ ldrb data2, [src2, #3]
ldrb data2, [\B, #3]
uxtb tmp1, data1, ror #BYTE3_OFFSET uxtb tmp1, data1, ror #BYTE3_OFFSET
subs tmp1, tmp1, data2 subs tmp1, tmp1, data2
bne .Lmisaligned_exit bne .Lmisaligned_exit
@ -356,16 +338,14 @@ ENTRY (strcmp)
#else /* STRCMP_PRECHECK */ #else /* STRCMP_PRECHECK */
/* If we've done the pre-check, then we don't need to check the /* If we've done the pre-check, then we don't need to check the
first byte again here. */ first byte again here. */
sfi_breg src2, \ ldrb data2, [src2, #2]
ldrb data2, [\B, #2]
uxtb tmp1, data1, ror #BYTE2_OFFSET uxtb tmp1, data1, ror #BYTE2_OFFSET
subs tmp1, tmp1, data2 subs tmp1, tmp1, data2
bne .Lmisaligned_exit bne .Lmisaligned_exit
cbz data2, .Lmisaligned_exit cbz data2, .Lmisaligned_exit
.Laligned_m2: .Laligned_m2:
sfi_breg src2, \ ldrb data2, [src2, #3]
ldrb data2, [\B, #3]
uxtb tmp1, data1, ror #BYTE3_OFFSET uxtb tmp1, data1, ror #BYTE3_OFFSET
subs tmp1, tmp1, data2 subs tmp1, tmp1, data2
bne .Lmisaligned_exit bne .Lmisaligned_exit
@ -391,13 +371,11 @@ ENTRY (strcmp)
cfi_restore_state cfi_restore_state
/* src1 is word aligned, but src2 has no common alignment /* src1 is word aligned, but src2 has no common alignment
with it. */ with it. */
sfi_breg src1, \ ldr data1, [src1], #4
ldr data1, [\B], #4
lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */
bic src2, src2, #3 bic src2, src2, #3
sfi_breg src2, \ ldr data2, [src2], #4
ldr data2, [\B], #4
bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */
bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */
@ -409,13 +387,11 @@ ENTRY (strcmp)
sel syndrome, syndrome, const_m1 sel syndrome, syndrome, const_m1
bne 4f bne 4f
cbnz syndrome, 5f cbnz syndrome, 5f
sfi_breg src2, \ ldr data2, [src2], #4
ldr data2, [\B], #4
eor tmp1, tmp1, data1 eor tmp1, tmp1, data1
cmp tmp1, data2, S2HI #24 cmp tmp1, data2, S2HI #24
bne 6f bne 6f
sfi_breg src1, \ ldr data1, [src1], #4
ldr data1, [\B], #4
b .Loverlap3 b .Loverlap3
4: 4:
S2LO data2, data2, #8 S2LO data2, data2, #8
@ -427,8 +403,7 @@ ENTRY (strcmp)
/* We can only get here if the MSB of data1 contains 0, so /* We can only get here if the MSB of data1 contains 0, so
fast-path the exit. */ fast-path the exit. */
sfi_breg src2, \ ldrb result, [src2]
ldrb result, [\B]
ldrd r4, r5, [sp], #16 ldrd r4, r5, [sp], #16
cfi_remember_state cfi_remember_state
cfi_def_cfa_offset (0) cfi_def_cfa_offset (0)
@ -454,13 +429,11 @@ ENTRY (strcmp)
sel syndrome, syndrome, const_m1 sel syndrome, syndrome, const_m1
bne 4f bne 4f
cbnz syndrome, 5f cbnz syndrome, 5f
sfi_breg src2, \ ldr data2, [src2], #4
ldr data2, [\B], #4
eor tmp1, tmp1, data1 eor tmp1, tmp1, data1
cmp tmp1, data2, S2HI #16 cmp tmp1, data2, S2HI #16
bne 6f bne 6f
sfi_breg src1, \ ldr data1, [src1], #4
ldr data1, [\B], #4
b .Loverlap2 b .Loverlap2
4: 4:
S2LO data2, data2, #16 S2LO data2, data2, #16
@ -469,8 +442,7 @@ ENTRY (strcmp)
ands syndrome, syndrome, const_m1, S2LO #16 ands syndrome, syndrome, const_m1, S2LO #16
bne .Lstrcmp_done_equal bne .Lstrcmp_done_equal
sfi_breg src2, \ ldrh data2, [src2]
ldrh data2, [\B]
S2LO data1, data1, #16 S2LO data1, data1, #16
#ifdef __ARM_BIG_ENDIAN #ifdef __ARM_BIG_ENDIAN
lsl data2, data2, #16 lsl data2, data2, #16
@ -490,13 +462,11 @@ ENTRY (strcmp)
sel syndrome, syndrome, const_m1 sel syndrome, syndrome, const_m1
bne 4f bne 4f
cbnz syndrome, 5f cbnz syndrome, 5f
sfi_breg src2, \ ldr data2, [src2], #4
ldr data2, [\B], #4
eor tmp1, tmp1, data1 eor tmp1, tmp1, data1
cmp tmp1, data2, S2HI #8 cmp tmp1, data2, S2HI #8
bne 6f bne 6f
sfi_breg src1, \ ldr data1, [src1], #4
ldr data1, [\B], #4
b .Loverlap1 b .Loverlap1
4: 4:
S2LO data2, data2, #24 S2LO data2, data2, #24
@ -504,8 +474,7 @@ ENTRY (strcmp)
5: 5:
tst syndrome, #LSB tst syndrome, #LSB
bne .Lstrcmp_done_equal bne .Lstrcmp_done_equal
sfi_breg src2, \ ldr data2, [src2]
ldr data2, [\B]
6: 6:
S2LO data1, data1, #8 S2LO data1, data1, #8
bic data2, data2, #MSB bic data2, data2, #MSB

View File

@ -32,8 +32,7 @@
eabi_fnstart eabi_fnstart
.align 2 .align 2
_dl_tlsdesc_return: _dl_tlsdesc_return:
sfi_breg r0, \ ldr r0, [r0]
ldr r0, [\B]
BX (lr) BX (lr)
eabi_fnend eabi_fnend
cfi_endproc cfi_endproc
@ -92,30 +91,23 @@ _dl_tlsdesc_dynamic:
cfi_rel_offset (r3,4) cfi_rel_offset (r3,4)
cfi_rel_offset (r4,8) cfi_rel_offset (r4,8)
cfi_rel_offset (lr,12) cfi_rel_offset (lr,12)
sfi_breg r0, \ ldr r1, [r0] /* td */
ldr r1, [\B] /* td */
GET_TLS (lr) GET_TLS (lr)
mov r4, r0 /* r4 = tp */ mov r4, r0 /* r4 = tp */
sfi_breg r0, \ ldr r0, [r0]
ldr r0, [\B] ldr r2, [r1, #8] /* gen_count */
sfi_breg r1, \ ldr r3, [r0]
ldr r2, [\B, #8] /* gen_count */
sfi_breg r0, \
ldr r3, [\B]
cmp r2, r3 cmp r2, r3
bhi 1f bhi 1f
sfi_breg r1, \ ldr r3, [r1]
ldr r3, [\B]
#ifndef ARM_NO_INDEX_REGISTER #ifndef ARM_NO_INDEX_REGISTER
ldr r2, [r0, r3, lsl #3] ldr r2, [r0, r3, lsl #3]
#else #else
add lr, r0, r3, lsl #3 add lr, r0, r3, lsl #3
sfi_breg lr, \ ldr r2, [lr]
ldr r2, [\B]
#endif #endif
cmn r2, #1 cmn r2, #1
ittt ne ittt ne
sfi_breg r1, \
ldrne r3, [r1, #4] ldrne r3, [r1, #4]
addne r3, r2, r3 addne r3, r2, r3
rsbne r0, r4, r3 rsbne r0, r4, r3
@ -178,8 +170,7 @@ _dl_tlsdesc_lazy_resolver:
pop {r2} pop {r2}
cfi_adjust_cfa_offset (-4) cfi_adjust_cfa_offset (-4)
cfi_restore (r2) cfi_restore (r2)
sfi_breg r0, \ ldr r1, [r0, #4]
ldr r1, [\B, #4]
BX (r1) BX (r1)
eabi_fnend eabi_fnend
cfi_endproc cfi_endproc
@ -220,8 +211,7 @@ _dl_tlsdesc_resolve_hold:
cfi_restore (r2) cfi_restore (r2)
cfi_restore (r1) cfi_restore (r1)
cfi_restore (r0) cfi_restore (r0)
sfi_breg r0, \ ldr r1, [r0, #4]
ldr r1, [\B, #4]
BX (r1) BX (r1)
eabi_fnend eabi_fnend
cfi_endproc cfi_endproc

View File

@ -70,7 +70,7 @@ ENTRY(memcpy)
subs r2, r2, #4 subs r2, r2, #4
blt 8f blt 8f
ands ip, r0, #3 ands ip, r0, #3
PLD( sfi_pld r1, #0 ) PLD( pld [r1, #0] )
bne 9f bne 9f
ands ip, r1, #3 ands ip, r1, #3
bne 10f bne 10f
@ -97,19 +97,17 @@ ENTRY(memcpy)
CALGN( bx r4 ) CALGN( bx r4 )
#endif #endif
PLD( sfi_pld r1, #0 ) PLD( pld [r1, #0] )
2: PLD( subs r2, r2, #96 ) 2: PLD( subs r2, r2, #96 )
PLD( sfi_pld r1, #28 ) PLD( pld [r1, #28] )
PLD( blt 4f ) PLD( blt 4f )
PLD( sfi_pld r1, #60 ) PLD( pld [r1, #60] )
PLD( sfi_pld r1, #92 ) PLD( pld [r1, #92] )
3: PLD( sfi_pld r1, #124 ) 3: PLD( pld [r1, #124] )
4: sfi_breg r1, \ 4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
ldmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
subs r2, r2, #32 subs r2, r2, #32
sfi_breg r0, \ stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
stmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
bge 3b bge 3b
PLD( cmn r2, #96 ) PLD( cmn r2, #96 )
PLD( bge 4b ) PLD( bge 4b )
@ -136,26 +134,19 @@ ENTRY(memcpy)
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
6: nop 6: nop
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r3, [r1], #4
ldr r3, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r4, [r1], #4
ldr r4, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r5, [r1], #4
ldr r5, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r6, [r1], #4
ldr r6, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r7, [r1], #4
ldr r7, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r8, [r1], #4
ldr r8, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr lr, [r1], #4
ldr lr, [\B], #4
#ifndef ARM_ALWAYS_BX #ifndef ARM_ALWAYS_BX
add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
@ -172,26 +163,19 @@ ENTRY(memcpy)
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
66: nop 66: nop
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r3, [r0], #4
str r3, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r4, [r0], #4
str r4, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r5, [r0], #4
str r5, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r6, [r0], #4
str r6, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r7, [r0], #4
str r7, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r8, [r0], #4
str r8, [\B], #4
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str lr, [r0], #4
str lr, [\B], #4
#ifdef ARM_ALWAYS_BX #ifdef ARM_ALWAYS_BX
pop {r10} pop {r10}
@ -209,18 +193,12 @@ ENTRY(memcpy)
cfi_restore (r8) cfi_restore (r8)
8: movs r2, r2, lsl #31 8: movs r2, r2, lsl #31
sfi_breg r1, \ ldrbne r3, [r1], #1
ldrbne r3, [\B], #1 ldrbcs r4, [r1], #1
sfi_breg r1, \ ldrbcs ip, [r1]
ldrbcs r4, [\B], #1 strbne r3, [r0], #1
sfi_breg r1, \ strbcs r4, [r0], #1
ldrbcs ip, [\B] strbcs ip, [r0]
sfi_breg r0, \
strbne r3, [\B], #1
sfi_breg r0, \
strbcs r4, [\B], #1
sfi_breg r0, \
strbcs ip, [\B]
#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \ #if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
|| defined (ARM_ALWAYS_BX)) || defined (ARM_ALWAYS_BX))
@ -237,27 +215,20 @@ ENTRY(memcpy)
9: rsb ip, ip, #4 9: rsb ip, ip, #4
cmp ip, #2 cmp ip, #2
sfi_breg r1, \ ldrbgt r3, [r1], #1
ldrbgt r3, [\B], #1 ldrbge r4, [r1], #1
sfi_breg r1, \ ldrb lr, [r1], #1
ldrbge r4, [\B], #1 strbgt r3, [r0], #1
sfi_breg r1, \ strbge r4, [r0], #1
ldrb lr, [\B], #1
sfi_breg r0, \
strbgt r3, [\B], #1
sfi_breg r0, \
strbge r4, [\B], #1
subs r2, r2, ip subs r2, r2, ip
sfi_breg r0, \ strb lr, [r0], #1
strb lr, [\B], #1
blt 8b blt 8b
ands ip, r1, #3 ands ip, r1, #3
beq 1b beq 1b
10: bic r1, r1, #3 10: bic r1, r1, #3
cmp ip, #2 cmp ip, #2
sfi_breg r1, \ ldr lr, [r1], #4
ldr lr, [\B], #4
beq 17f beq 17f
bgt 18f bgt 18f
@ -281,20 +252,18 @@ ENTRY(memcpy)
cfi_rel_offset (r8, 12) cfi_rel_offset (r8, 12)
cfi_rel_offset (r10, 16) cfi_rel_offset (r10, 16)
PLD( sfi_pld r1, #0 ) PLD( pld [r1, #0] )
PLD( subs r2, r2, #96 ) PLD( subs r2, r2, #96 )
PLD( sfi_pld r1, #28 ) PLD( pld [r1, #28] )
PLD( blt 13f ) PLD( blt 13f )
PLD( sfi_pld r1, #60 ) PLD( pld [r1, #60] )
PLD( sfi_pld r1, #92 ) PLD( pld [r1, #92] )
12: PLD( sfi_pld r1, #124 ) 12: PLD( pld [r1, #124] )
13: sfi_breg r1, \ 13: ldmia r1!, {r4, r5, r6, r7}
ldmia \B!, {r4, r5, r6, r7}
mov r3, lr, PULL #\pull mov r3, lr, PULL #\pull
subs r2, r2, #32 subs r2, r2, #32
sfi_breg r1, \ ldmia r1!, {r8, r10, ip, lr}
ldmia \B!, {r8, r10, ip, lr}
orr r3, r3, r4, PUSH #\push orr r3, r3, r4, PUSH #\push
mov r4, r4, PULL #\pull mov r4, r4, PULL #\pull
orr r4, r4, r5, PUSH #\push orr r4, r4, r5, PUSH #\push
@ -310,8 +279,7 @@ ENTRY(memcpy)
orr r10, r10, ip, PUSH #\push orr r10, r10, ip, PUSH #\push
mov ip, ip, PULL #\pull mov ip, ip, PULL #\pull
orr ip, ip, lr, PUSH #\push orr ip, ip, lr, PUSH #\push
sfi_breg r0, \ stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip}
stmia \B!, {r3, r4, r5, r6, r7, r8, r10, ip}
bge 12b bge 12b
PLD( cmn r2, #96 ) PLD( cmn r2, #96 )
PLD( bge 13b ) PLD( bge 13b )
@ -328,12 +296,10 @@ ENTRY(memcpy)
beq 16f beq 16f
15: mov r3, lr, PULL #\pull 15: mov r3, lr, PULL #\pull
sfi_breg r1, \ ldr lr, [r1], #4
ldr lr, [\B], #4
subs ip, ip, #4 subs ip, ip, #4
orr r3, r3, lr, PUSH #\push orr r3, r3, lr, PUSH #\push
sfi_breg r0, \ str r3, [r0], #4
str r3, [\B], #4
bgt 15b bgt 15b
CALGN( cmp r2, #0 ) CALGN( cmp r2, #0 )
CALGN( bge 11b ) CALGN( bge 11b )

View File

@ -87,7 +87,7 @@ ENTRY(memmove)
subs r2, r2, #4 subs r2, r2, #4
blt 8f blt 8f
ands ip, r0, #3 ands ip, r0, #3
PLD( sfi_pld r1, #-4 ) PLD( pld [r1, #-4] )
bne 9f bne 9f
ands ip, r1, #3 ands ip, r1, #3
bne 10f bne 10f
@ -113,19 +113,17 @@ ENTRY(memmove)
CALGN( bx r4 ) CALGN( bx r4 )
#endif #endif
PLD( sfi_pld r1, #-4 ) PLD( pld [r1, #-4] )
2: PLD( subs r2, r2, #96 ) 2: PLD( subs r2, r2, #96 )
PLD( sfi_pld r1, #-32 ) PLD( pld [r1, #-32] )
PLD( blt 4f ) PLD( blt 4f )
PLD( sfi_pld r1, #-64 ) PLD( pld [r1, #-64] )
PLD( sfi_pld r1, #-96 ) PLD( pld [r1, #-96] )
3: PLD( sfi_pld r1, #-128 ) 3: PLD( pld [r1, #-128] )
4: sfi_breg r1, \ 4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
ldmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
subs r2, r2, #32 subs r2, r2, #32
sfi_breg r0, \ stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
stmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
bge 3b bge 3b
PLD( cmn r2, #96 ) PLD( cmn r2, #96 )
PLD( bge 4b ) PLD( bge 4b )
@ -152,26 +150,19 @@ ENTRY(memmove)
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
6: nop 6: nop
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r3, [r1, #-4]!
ldr r3, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r4, [r1, #-4]!
ldr r4, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r5, [r1, #-4]!
ldr r5, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r6, [r1, #-4]!
ldr r6, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r7, [r1, #-4]!
ldr r7, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr r8, [r1, #-4]!
ldr r8, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r1, \ ldr lr, [r1, #-4]!
ldr lr, [\B, #-4]!
#ifndef ARM_ALWAYS_BX #ifndef ARM_ALWAYS_BX
add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
@ -188,26 +179,19 @@ ENTRY(memmove)
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
66: nop 66: nop
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r3, [r0, #-4]!
str r3, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r4, [r0, #-4]!
str r4, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r5, [r0, #-4]!
str r5, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r6, [r0, #-4]!
str r6, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r7, [r0, #-4]!
str r7, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str r8, [r0, #-4]!
str r8, [\B, #-4]!
.p2align ARM_BX_ALIGN_LOG2 .p2align ARM_BX_ALIGN_LOG2
sfi_breg r0, \ str lr, [r0, #-4]!
str lr, [\B, #-4]!
#ifdef ARM_ALWAYS_BX #ifdef ARM_ALWAYS_BX
pop {r10} pop {r10}
@ -225,18 +209,12 @@ ENTRY(memmove)
cfi_restore (r8) cfi_restore (r8)
8: movs r2, r2, lsl #31 8: movs r2, r2, lsl #31
sfi_breg r1, \ ldrbne r3, [r1, #-1]!
ldrbne r3, [\B, #-1]! ldrbcs r4, [r1, #-1]!
sfi_breg r1, \ ldrbcs ip, [r1, #-1]
ldrbcs r4, [\B, #-1]! strbne r3, [r0, #-1]!
sfi_breg r1, \ strbcs r4, [r0, #-1]!
ldrbcs ip, [\B, #-1] strbcs ip, [r0, #-1]
sfi_breg r0, \
strbne r3, [\B, #-1]!
sfi_breg r0, \
strbcs r4, [\B, #-1]!
sfi_breg r0, \
strbcs ip, [\B, #-1]
#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
|| defined (ARM_ALWAYS_BX)) || defined (ARM_ALWAYS_BX))
@ -252,27 +230,20 @@ ENTRY(memmove)
cfi_restore_state cfi_restore_state
9: cmp ip, #2 9: cmp ip, #2
sfi_breg r1, \ ldrbgt r3, [r1, #-1]!
ldrbgt r3, [\B, #-1]! ldrbge r4, [r1, #-1]!
sfi_breg r1, \ ldrb lr, [r1, #-1]!
ldrbge r4, [\B, #-1]! strbgt r3, [r0, #-1]!
sfi_breg r1, \ strbge r4, [r0, #-1]!
ldrb lr, [\B, #-1]!
sfi_breg r0, \
strbgt r3, [\B, #-1]!
sfi_breg r0, \
strbge r4, [\B, #-1]!
subs r2, r2, ip subs r2, r2, ip
sfi_breg r0, \ strb lr, [r0, #-1]!
strb lr, [\B, #-1]!
blt 8b blt 8b
ands ip, r1, #3 ands ip, r1, #3
beq 1b beq 1b
10: bic r1, r1, #3 10: bic r1, r1, #3
cmp ip, #2 cmp ip, #2
sfi_breg r1, \ ldr r3, [r1, #0]
ldr r3, [\B, #0]
beq 17f beq 17f
blt 18f blt 18f
@ -296,20 +267,18 @@ ENTRY(memmove)
cfi_rel_offset (r8, 12) cfi_rel_offset (r8, 12)
cfi_rel_offset (r10, 16) cfi_rel_offset (r10, 16)
PLD( sfi_pld r1, #-4 ) PLD( pld [r1, #-4] )
PLD( subs r2, r2, #96 ) PLD( subs r2, r2, #96 )
PLD( sfi_pld r1, #-32 ) PLD( pld [r1, #-32] )
PLD( blt 13f ) PLD( blt 13f )
PLD( sfi_pld r1, #-64 ) PLD( pld [r1, #-64] )
PLD( sfi_pld r1, #-96 ) PLD( pld [r1, #-96] )
12: PLD( sfi_pld r1, #-128 ) 12: PLD( pld [r1, #-128] )
13: sfi_breg r1, \ 13: ldmdb r1!, {r7, r8, r10, ip}
ldmdb \B!, {r7, r8, r10, ip}
mov lr, r3, PUSH #\push mov lr, r3, PUSH #\push
subs r2, r2, #32 subs r2, r2, #32
sfi_breg r1, \ ldmdb r1!, {r3, r4, r5, r6}
ldmdb \B!, {r3, r4, r5, r6}
orr lr, lr, ip, PULL #\pull orr lr, lr, ip, PULL #\pull
mov ip, ip, PUSH #\push mov ip, ip, PUSH #\push
orr ip, ip, r10, PULL #\pull orr ip, ip, r10, PULL #\pull
@ -325,8 +294,7 @@ ENTRY(memmove)
orr r5, r5, r4, PULL #\pull orr r5, r5, r4, PULL #\pull
mov r4, r4, PUSH #\push mov r4, r4, PUSH #\push
orr r4, r4, r3, PULL #\pull orr r4, r4, r3, PULL #\pull
sfi_breg r0, \ stmdb r0!, {r4 - r8, r10, ip, lr}
stmdb \B!, {r4 - r8, r10, ip, lr}
bge 12b bge 12b
PLD( cmn r2, #96 ) PLD( cmn r2, #96 )
PLD( bge 13b ) PLD( bge 13b )
@ -343,12 +311,10 @@ ENTRY(memmove)
beq 16f beq 16f
15: mov lr, r3, PUSH #\push 15: mov lr, r3, PUSH #\push
sfi_breg r1, \ ldr r3, [r1, #-4]!
ldr r3, [\B, #-4]!
subs ip, ip, #4 subs ip, ip, #4
orr lr, lr, r3, PULL #\pull orr lr, lr, r3, PULL #\pull
sfi_breg r0, \ str lr, [r0, #-4]!
str lr, [\B, #-4]!
bgt 15b bgt 15b
CALGN( cmp r2, #0 ) CALGN( cmp r2, #0 )
CALGN( bge 11b ) CALGN( bge 11b )

View File

@ -32,8 +32,7 @@ ENTRY(memset)
1: 1:
tst r3, #3 @ aligned yet? tst r3, #3 @ aligned yet?
sfi_breg r3, \ strbne r1, [r3], #1
strbne r1, [\B], #1
subne r2, r2, #1 subne r2, r2, #1
bne 1b bne 1b
@ -44,33 +43,25 @@ ENTRY(memset)
1: 1:
subs r2, r2, #8 subs r2, r2, #8
sfi_breg r3, \ stmiacs r3!, {r1, ip} @ store up to 32 bytes per loop iteration
stmiacs \B!, {r1, ip} @ store up to 32 bytes per loop iteration
subscs r2, r2, #8 subscs r2, r2, #8
sfi_breg r3, \ stmiacs r3!, {r1, ip}
stmiacs \B!, {r1, ip}
subscs r2, r2, #8 subscs r2, r2, #8
sfi_breg r3, \ stmiacs r3!, {r1, ip}
stmiacs \B!, {r1, ip}
subscs r2, r2, #8 subscs r2, r2, #8
sfi_breg r3, \ stmiacs r3!, {r1, ip}
stmiacs \B!, {r1, ip}
bcs 1b bcs 1b
and r2, r2, #7 and r2, r2, #7
2: 2:
subs r2, r2, #1 @ store up to 4 bytes per loop iteration subs r2, r2, #1 @ store up to 4 bytes per loop iteration
sfi_breg r3, \ strbcs r1, [r3], #1
strbcs r1, [\B], #1
subscs r2, r2, #1 subscs r2, r2, #1
sfi_breg r3, \ strbcs r1, [r3], #1
strbcs r1, [\B], #1
subscs r2, r2, #1 subscs r2, r2, #1
sfi_breg r3, \ strbcs r1, [r3], #1
strbcs r1, [\B], #1
subscs r2, r2, #1 subscs r2, r2, #1
sfi_breg r3, \ strbcs r1, [r3], #1
strbcs r1, [\B], #1
bcs 2b bcs 2b
DO_RET(lr) DO_RET(lr)

View File

@ -36,20 +36,15 @@ ENTRY (__sigsetjmp)
#ifdef PTR_MANGLE #ifdef PTR_MANGLE
mov a4, sp mov a4, sp
PTR_MANGLE2 (a4, a4, a3) PTR_MANGLE2 (a4, a4, a3)
sfi_breg ip, \ str a4, [ip], #4
str a4, [\B], #4
PTR_MANGLE2 (a4, lr, a3) PTR_MANGLE2 (a4, lr, a3)
sfi_breg ip, \ str a4, [ip], #4
str a4, [\B], #4
#else #else
sfi_breg ip, \ str sp, [ip], #4
str sp, [\B], #4 str lr, [ip], #4
sfi_breg ip, \
str lr, [\B], #4
#endif #endif
/* Save registers */ /* Save registers */
sfi_breg ip, \ stmia ip!, JMP_BUF_REGLIST
stmia \B!, JMP_BUF_REGLIST
#if !defined ARM_ASSUME_NO_IWMMXT || defined __SOFTFP__ #if !defined ARM_ASSUME_NO_IWMMXT || defined __SOFTFP__
# define NEED_HWCAP 1 # define NEED_HWCAP 1
@ -80,8 +75,7 @@ ENTRY (__sigsetjmp)
Don't use VFP instructions directly because this code Don't use VFP instructions directly because this code
is used in non-VFP multilibs. */ is used in non-VFP multilibs. */
/* Following instruction is vstmia ip!, {d8-d15}. */ /* Following instruction is vstmia ip!, {d8-d15}. */
sfi_breg ip, \ stc p11, cr8, [ip], #64
stc p11, cr8, [\B], #64
.Lno_vfp: .Lno_vfp:
#ifndef ARM_ASSUME_NO_IWMMXT #ifndef ARM_ASSUME_NO_IWMMXT
@ -90,18 +84,12 @@ ENTRY (__sigsetjmp)
/* Save the call-preserved iWMMXt registers. */ /* Save the call-preserved iWMMXt registers. */
/* Following instructions are wstrd wr10, [ip], #8 (etc.) */ /* Following instructions are wstrd wr10, [ip], #8 (etc.) */
sfi_breg r12, \ stcl p1, cr10, [r12], #8
stcl p1, cr10, [\B], #8 stcl p1, cr11, [r12], #8
sfi_breg r12, \ stcl p1, cr12, [r12], #8
stcl p1, cr11, [\B], #8 stcl p1, cr13, [r12], #8
sfi_breg r12, \ stcl p1, cr14, [r12], #8
stcl p1, cr12, [\B], #8 stcl p1, cr15, [r12], #8
sfi_breg r12, \
stcl p1, cr13, [\B], #8
sfi_breg r12, \
stcl p1, cr14, [\B], #8
sfi_breg r12, \
stcl p1, cr15, [\B], #8
.Lno_iwmmxt: .Lno_iwmmxt:
#endif #endif

View File

@ -30,8 +30,7 @@
ENTRY(strlen) ENTRY(strlen)
bic r1, r0, $3 @ addr of word containing first byte bic r1, r0, $3 @ addr of word containing first byte
sfi_breg r1, \ ldr r2, [r1], $4 @ get the first word
ldr r2, [\B], $4 @ get the first word
ands r3, r0, $3 @ how many bytes are duff? ands r3, r0, $3 @ how many bytes are duff?
rsb r0, r3, $0 @ get - that number into counter. rsb r0, r3, $0 @ get - that number into counter.
beq Laligned @ skip into main check routine if no beq Laligned @ skip into main check routine if no
@ -55,8 +54,7 @@ Laligned: @ here, we have a word in r2. Does it
tstne r2, $0x00ff0000 @ tstne r2, $0x00ff0000 @
tstne r2, $0xff000000 @ tstne r2, $0xff000000 @
addne r0, r0, $4 @ if not, the string is 4 bytes longer addne r0, r0, $4 @ if not, the string is 4 bytes longer
sfi_breg r1, \ ldrne r2, [r1], $4 @ and we continue to the next word
ldrne r2, [\B], $4 @ and we continue to the next word
bne Laligned @ bne Laligned @
Llastword: @ drop through to here once we find a Llastword: @ drop through to here once we find a
#ifdef __ARMEB__ #ifdef __ARMEB__

View File

@ -37,24 +37,19 @@ ENTRY (__mpn_submul_1)
cfi_rel_offset (r6, 8) cfi_rel_offset (r6, 8)
cfi_rel_offset (r7, 12) cfi_rel_offset (r7, 12)
sfi_breg r1, \ ldr r6, [r1], #4
ldr r6, [\B], #4 ldr r7, [r0]
sfi_breg r0, \
ldr r7, [\B]
mov r4, #0 /* init carry in */ mov r4, #0 /* init carry in */
b 1f b 1f
0: 0:
sfi_breg r1, \ ldr r6, [r1], #4 /* load next ul */
ldr r6, [\B], #4 /* load next ul */
adds r5, r5, r4 /* (lpl, c) = lpl + cl */ adds r5, r5, r4 /* (lpl, c) = lpl + cl */
adc r4, ip, #0 /* cl = hpl + c */ adc r4, ip, #0 /* cl = hpl + c */
subs r5, r7, r5 /* (lpl, !c) = rl - lpl */ subs r5, r7, r5 /* (lpl, !c) = rl - lpl */
sfi_breg r0, \ ldr r7, [r0, #4] /* load next rl */
ldr r7, [\B, #4] /* load next rl */
it cc it cc
addcc r4, r4, #1 /* cl += !c */ addcc r4, r4, #1 /* cl += !c */
sfi_breg r0, \ str r5, [r0], #4
str r5, [\B], #4
1: 1:
umull r5, ip, r6, r3 /* (hpl, lpl) = ul * vl */ umull r5, ip, r6, r3 /* (hpl, lpl) = ul * vl */
subs r2, r2, #1 subs r2, r2, #1
@ -63,8 +58,7 @@ ENTRY (__mpn_submul_1)
adds r5, r5, r4 /* (lpl, c) = lpl + cl */ adds r5, r5, r4 /* (lpl, c) = lpl + cl */
adc r4, ip, #0 /* cl = hpl + c */ adc r4, ip, #0 /* cl = hpl + c */
subs r5, r7, r5 /* (lpl, !c) = rl - lpl */ subs r5, r7, r5 /* (lpl, !c) = rl - lpl */
sfi_breg r0, \ str r5, [r0], #4
str r5, [\B], #4
it cc it cc
addcc r4, r4, #1 /* cl += !c */ addcc r4, r4, #1 /* cl += !c */
mov r0, r4 /* return carry */ mov r0, r4 /* return carry */

View File

@ -150,7 +150,6 @@
second version uses it. */ second version uses it. */
# define LDST_INDEXED_NOINDEX(OP, R, T, X, Y) \ # define LDST_INDEXED_NOINDEX(OP, R, T, X, Y) \
add T, X, Y; \ add T, X, Y; \
sfi_breg T, \
OP R, [T] OP R, [T]
# define LDST_INDEXED_INDEX(OP, R, X, Y) \ # define LDST_INDEXED_INDEX(OP, R, X, Y) \
OP R, [X, Y] OP R, [X, Y]
@ -198,7 +197,7 @@
# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ # define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
movw T, #:lower16:SYMBOL; \ movw T, #:lower16:SYMBOL; \
movt T, #:upper16:SYMBOL; \ movt T, #:upper16:SYMBOL; \
sfi_breg T, ldr R, [\B, $CONSTANT] ldr R, [T, $CONSTANT]
# elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK # elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK
# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ # define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \ movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
@ -212,7 +211,7 @@
97: add R, R, pc; \ 97: add R, R, pc; \
98: LDST_PC_INDEXED (ldr, T, T, T); \ 98: LDST_PC_INDEXED (ldr, T, T, T); \
LDST_INDEXED (ldr, R, T, R, T); \ LDST_INDEXED (ldr, R, T, R, T); \
sfi_breg R, ldr R, [\B, $CONSTANT] ldr R, [R, $CONSTANT]
# else # else
# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ # define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
ldr T, 99f; \ ldr T, 99f; \
@ -277,53 +276,6 @@
cfi_restore_state cfi_restore_state
# endif /* ARCH_HAS_HARD_TP */ # endif /* ARCH_HAS_HARD_TP */
# ifndef ARM_SFI_MACROS
# define ARM_SFI_MACROS 1
/* This assembly macro is prepended to any load/store instruction,
pulling the base register out of the addressing mode syntax and
making it the first operand of the macro. For example:
ldr r0, [r1]
becomes:
sfi_breg r1, ldr r0, [\B]
The \B stands in for the base register that is the first operand
to the macro, so we can avoid error-prone repetition of the base
register in two places on the line.
This is used for all memory access through a base register other
than PC or SP. It's intended to support SFI schemes such as
Native Client, where the OS will enforce that all load/store
instructions use a special form. In any such configuration,
another sysdep.h file will have defined ARM_SFI_MACROS and
provided its own assembly macros with the same interface. */
.macro sfi_breg basereg, insn, operands:vararg
.macro _sfi_breg_doit B
\insn \operands
.endm
_sfi_breg_doit \basereg
.purgem _sfi_breg_doit
.endm
/* This assembly macro replaces the "pld" instruction.
The syntax:
sfi_pld REGISTER, #OFFSET
is exactly equivalent to:
sfi_breg REGISTER, pld [\B, #OFFSET]
(and ", #OFFSET" is optional). We have a separate macro
only to work around a bug in GAS versions prior to 2.23.2,
that misparses the sfi_breg macro expansion in this case. */
.macro sfi_pld basereg, offset=#0
pld [\basereg, \offset]
.endm
/* This macro precedes any instruction that directly changes the SP.
It's not needed for push/pop or for any kind of load or store that
implicitly changes the SP via the ! syntax. */
# define sfi_sp /* Nothing to do. */
# endif
/* These are the directives used for EABI unwind info. /* These are the directives used for EABI unwind info.
Wrap them in macros so another configuration's sysdep.h Wrap them in macros so another configuration's sysdep.h
file can define them away if it doesn't use EABI unwind info. */ file can define them away if it doesn't use EABI unwind info. */