Makes AArch64 assembly acceptable to clang

According to ARMv8 architecture reference manual section C7.2.188, SIMD MOV (to
general) instruction format is

  MOV <Xd>, <Vn>.D[<index>]

gas appears to accept "<Vn>.2D[<index>]" as well, but clang's assembler does
not. C.f. https://community.arm.com/developer/ip-products/processors/f/cortex-a-forum/5214/aarch64-assembly-syntax-for-armclang
This commit is contained in:
Shu-Chun Weng 2019-04-19 14:47:59 -07:00 committed by Fangrui Song
parent 038be62f96
commit 83bede0cfc
4 changed files with 14 additions and 14 deletions

View File

@ -91,7 +91,7 @@ ENTRY (__memchr)
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
addp vend.16b, vend.16b, vend.16b /* 128->64 */ addp vend.16b, vend.16b, vend.16b /* 128->64 */
mov synd, vend.2d[0] mov synd, vend.d[0]
/* Clear the soff*2 lower bits */ /* Clear the soff*2 lower bits */
lsl tmp, soff, #1 lsl tmp, soff, #1
lsr synd, synd, tmp lsr synd, synd, tmp
@ -111,7 +111,7 @@ L(loop):
/* Use a fast check for the termination condition */ /* Use a fast check for the termination condition */
orr vend.16b, vhas_chr1.16b, vhas_chr2.16b orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
addp vend.2d, vend.2d, vend.2d addp vend.2d, vend.2d, vend.2d
mov synd, vend.2d[0] mov synd, vend.d[0]
/* We're not out of data, loop if we haven't found the character */ /* We're not out of data, loop if we haven't found the character */
cbz synd, L(loop) cbz synd, L(loop)
@ -121,7 +121,7 @@ L(end):
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
addp vend.16b, vend.16b, vend.16b /* 128->64 */ addp vend.16b, vend.16b, vend.16b /* 128->64 */
mov synd, vend.2d[0] mov synd, vend.d[0]
/* Only do the clear for the last possible block */ /* Only do the clear for the last possible block */
b.hi L(tail) b.hi L(tail)

View File

@ -94,7 +94,7 @@ ENTRY (strchr)
addp vend1.16b, vend1.16b, vend2.16b // 128->64 addp vend1.16b, vend1.16b, vend2.16b // 128->64
lsr tmp1, tmp3, tmp1 lsr tmp1, tmp3, tmp1
mov tmp3, vend1.2d[0] mov tmp3, vend1.d[0]
bic tmp1, tmp3, tmp1 // Mask padding bits. bic tmp1, tmp3, tmp1 // Mask padding bits.
cbnz tmp1, L(tail) cbnz tmp1, L(tail)
@ -109,7 +109,7 @@ L(loop):
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
orr vend1.16b, vend1.16b, vend2.16b orr vend1.16b, vend1.16b, vend2.16b
addp vend1.2d, vend1.2d, vend1.2d addp vend1.2d, vend1.2d, vend1.2d
mov tmp1, vend1.2d[0] mov tmp1, vend1.d[0]
cbz tmp1, L(loop) cbz tmp1, L(loop)
/* Termination condition found. Now need to establish exactly why /* Termination condition found. Now need to establish exactly why
@ -123,7 +123,7 @@ L(loop):
addp vend1.16b, vend1.16b, vend2.16b // 256->128 addp vend1.16b, vend1.16b, vend2.16b // 256->128
addp vend1.16b, vend1.16b, vend2.16b // 128->64 addp vend1.16b, vend1.16b, vend2.16b // 128->64
mov tmp1, vend1.2d[0] mov tmp1, vend1.d[0]
L(tail): L(tail):
sub src, src, #32 sub src, src, #32
rbit tmp1, tmp1 rbit tmp1, tmp1

View File

@ -91,7 +91,7 @@ ENTRY (__strchrnul)
addp vend1.16b, vend1.16b, vend1.16b // 128->64 addp vend1.16b, vend1.16b, vend1.16b // 128->64
lsr tmp1, tmp3, tmp1 lsr tmp1, tmp3, tmp1
mov tmp3, vend1.2d[0] mov tmp3, vend1.d[0]
bic tmp1, tmp3, tmp1 // Mask padding bits. bic tmp1, tmp3, tmp1 // Mask padding bits.
cbnz tmp1, L(tail) cbnz tmp1, L(tail)
@ -106,7 +106,7 @@ L(loop):
orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
addp vend1.2d, vend1.2d, vend1.2d addp vend1.2d, vend1.2d, vend1.2d
mov tmp1, vend1.2d[0] mov tmp1, vend1.d[0]
cbz tmp1, L(loop) cbz tmp1, L(loop)
/* Termination condition found. Now need to establish exactly why /* Termination condition found. Now need to establish exactly why
@ -116,7 +116,7 @@ L(loop):
addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
addp vend1.16b, vend1.16b, vend1.16b // 128->64 addp vend1.16b, vend1.16b, vend1.16b // 128->64
mov tmp1, vend1.2d[0] mov tmp1, vend1.d[0]
L(tail): L(tail):
/* Count the trailing zeros, by bit reversing... */ /* Count the trailing zeros, by bit reversing... */
rbit tmp1, tmp1 rbit tmp1, tmp1

View File

@ -101,10 +101,10 @@ ENTRY(strrchr)
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64 addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
mov nul_match, vhas_nul1.2d[0] mov nul_match, vhas_nul1.d[0]
lsl tmp1, tmp1, #1 lsl tmp1, tmp1, #1
mov const_m1, #~0 mov const_m1, #~0
mov chr_match, vhas_chr1.2d[0] mov chr_match, vhas_chr1.d[0]
lsr tmp3, const_m1, tmp1 lsr tmp3, const_m1, tmp1
bic nul_match, nul_match, tmp3 // Mask padding bits. bic nul_match, nul_match, tmp3 // Mask padding bits.
@ -127,15 +127,15 @@ L(aligned):
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
addp vend1.16b, vend1.16b, vend1.16b // 128->64 addp vend1.16b, vend1.16b, vend1.16b // 128->64
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64 addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
mov nul_match, vend1.2d[0] mov nul_match, vend1.d[0]
mov chr_match, vhas_chr1.2d[0] mov chr_match, vhas_chr1.d[0]
cbz nul_match, L(loop) cbz nul_match, L(loop)
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
mov nul_match, vhas_nul1.2d[0] mov nul_match, vhas_nul1.d[0]
L(tail): L(tail):
/* Work out exactly where the string ends. */ /* Work out exactly where the string ends. */