powerpc: Avoid misaligned stores in memset

As per the section "3.1.4.2 Alignment Interrupts" of the "POWER8 Processor
User's Manual for the Single-Chip Module", alignment interrupt is reported
for misaligned stores in  Caching-inhibited storage.  As memset is used in
some drivers for DMA (like xorg), this patch avoids misaligned stores for
sizes less than 8 in memset.
This commit is contained in:
Rajalakshmi Srinivasaraghavan 2017-09-19 13:55:49 +05:30
parent 6d9b0b5a22
commit bd17ba29eb
2 changed files with 72 additions and 2 deletions

View File

@ -1,3 +1,7 @@
2017-09-19 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
* sysdeps/powerpc/powerpc64/power8/memset.S: Avoid misaligned stores.
2017-09-18 Joseph Myers <joseph@codesourcery.com>
* sysdeps/ieee754/ldbl-opt/w_exp10l_compat.c [LIBM_SVID_COMPAT &&

View File

@ -377,7 +377,10 @@ L(write_LT_32):
subf r5,r0,r5
2: bf 30,1f
sth r4,0(r10)
/* Use stb instead of sth because it doesn't generate
alignment interrupts on cache-inhibited storage. */
stb r4,0(r10)
stb r4,1(r10)
addi r10,r10,2
1: bf 31,L(end_4bytes_alignment)
@ -437,11 +440,74 @@ L(tail5):
/* Handles copies of 0~8 bytes. */
.align 4
L(write_LE_8):
bne cr6,L(tail4)
bne cr6,L(LE7_tail4)
/* If input is word aligned, use stw, else use stb. */
andi. r0,r10,3
bne L(8_unalign)
stw r4,0(r10)
stw r4,4(r10)
blr
/* Unaligned input and size is 8. */
.align 4
L(8_unalign):
andi. r0,r10,1
beq L(8_hwalign)
stb r4,0(r10)
sth r4,1(r10)
sth r4,3(r10)
sth r4,5(r10)
stb r4,7(r10)
blr
/* Halfword aligned input and size is 8. */
.align 4
L(8_hwalign):
sth r4,0(r10)
sth r4,2(r10)
sth r4,4(r10)
sth r4,6(r10)
blr
.align 4
/* Copies 4~7 bytes. */
L(LE7_tail4):
/* Use stb instead of sth because it doesn't generate
alignment interrupts on cache-inhibited storage. */
bf 29,L(LE7_tail2)
stb r4,0(r10)
stb r4,1(r10)
stb r4,2(r10)
stb r4,3(r10)
bf 30,L(LE7_tail5)
stb r4,4(r10)
stb r4,5(r10)
bflr 31
stb r4,6(r10)
blr
.align 4
/* Copies 2~3 bytes. */
L(LE7_tail2):
bf 30,1f
stb r4,0(r10)
stb r4,1(r10)
bflr 31
stb r4,2(r10)
blr
.align 4
L(LE7_tail5):
bflr 31
stb r4,4(r10)
blr
.align 4
1: bflr 31
stb r4,0(r10)
blr
END_GEN_TB (MEMSET,TB_TOCLESS)
libc_hidden_builtin_def (memset)