x86_64: Fix svml_s_erff8_core_avx2.S code formatting

This commit contains following formatting changes

1. Instructions proceeded by a tab.
2. Instruction less than 8 characters in length have a tab
   between it and the first operand.
3. Instruction greater than 7 characters in length have a
   space between it and the first operand.
4. Tabs after `#define`d names and their value.
5. 8 space at the beginning of line replaced by tab.
6. Indent comments with code.
7. Remove redundent .text section.
8. 1 space between line content and line comment.
9. Space after all commas.

Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
This commit is contained in:
Sunil K Pandey 2022-03-07 10:47:11 -08:00
parent a86ad0f03c
commit 94820193e0

View File

@ -45,19 +45,18 @@
#include <sysdep.h>
.text
.section .text.avx2,"ax",@progbits
.section .text.avx2, "ax", @progbits
ENTRY(_ZGVdN8v_erff_avx2)
lea Table_Lookup_Bias+__svml_serf_data_internal(%rip), %rax
/*
/*
* vector gather:
* erf(x0), exp(-x0*x0)*2.0/sqrt(pi)
*/
vmovups _SRound+__svml_serf_data_internal(%rip), %ymm7
vandps _AbsMask+__svml_serf_data_internal(%rip), %ymm0, %ymm6
/*
/*
* erf(x) rounds to 1.0 for x>_MaxThreshold (3.9375)
* can compute all results in the main path
*/
@ -69,13 +68,13 @@ ENTRY(_ZGVdN8v_erff_avx2)
vsubps %ymm4, %ymm8, %ymm3
vandps %ymm9, %ymm3, %ymm2
/* NaN fixup */
/* NaN fixup */
vminps %ymm6, %ymm3, %ymm3
/* D2 = Diff^2 */
/* D2 = Diff^2 */
vmulps %ymm2, %ymm2, %ymm2
/* save sign */
/* save sign */
vxorps %ymm0, %ymm6, %ymm5
vmovd %xmm11, %edx
vextractf128 $1, %ymm11, %xmm12
@ -83,8 +82,8 @@ ENTRY(_ZGVdN8v_erff_avx2)
movslq %edx, %rdx
movslq %esi, %rsi
vmovd %xmm12, %r8d
vmovq (%rax,%rdx), %xmm13
vmovq (%rax,%rsi), %xmm14
vmovq (%rax, %rdx), %xmm13
vmovq (%rax, %rsi), %xmm14
vunpcklps %xmm14, %xmm13, %xmm10
vmovups _poly3_0+__svml_serf_data_internal(%rip), %ymm14
vpextrd $1, %xmm11, %ecx
@ -93,7 +92,7 @@ ENTRY(_ZGVdN8v_erff_avx2)
vpextrd $2, %xmm12, %r10d
vpextrd $3, %xmm12, %r11d
/*
/*
* Start polynomial evaluation
* P1
*/
@ -104,18 +103,18 @@ ENTRY(_ZGVdN8v_erff_avx2)
movslq %r9d, %r9
movslq %r10d, %r10
movslq %r11d, %r11
vmovq (%rax,%rcx), %xmm1
vmovq (%rax,%rdi), %xmm15
vmovq (%rax, %rcx), %xmm1
vmovq (%rax, %rdi), %xmm15
/*
/*
* branch-free
* (exp_h(x0) * Diff) * (poly + 1.0)
*/
vfmadd213ps %ymm3, %ymm2, %ymm4
vmovq (%rax,%r8), %xmm7
vmovq (%rax,%r9), %xmm0
vmovq (%rax,%r10), %xmm8
vmovq (%rax,%r11), %xmm9
vmovq (%rax, %r8), %xmm7
vmovq (%rax, %r9), %xmm0
vmovq (%rax, %r10), %xmm8
vmovq (%rax, %r11), %xmm9
vunpcklps %xmm15, %xmm1, %xmm11
vunpcklps %xmm8, %xmm7, %xmm1
vunpcklps %xmm9, %xmm0, %xmm0
@ -124,10 +123,10 @@ ENTRY(_ZGVdN8v_erff_avx2)
vunpcklps %ymm13, %ymm12, %ymm0
vunpckhps %ymm13, %ymm12, %ymm15
/* Final result */
/* Final result */
vfmadd213ps %ymm0, %ymm15, %ymm4
/* set sign */
/* set sign */
vorps %ymm5, %ymm4, %ymm0
ret
@ -138,8 +137,7 @@ END(_ZGVdN8v_erff_avx2)
#ifdef __svml_serf_data_internal_typedef
typedef unsigned int VUINT32;
typedef struct
{
typedef struct {
__declspec(align(32)) VUINT32 _erf_tbl[1008][1];
__declspec(align(32)) VUINT32 _AbsMask[8][1];
__declspec(align(32)) VUINT32 _MaxThreshold[8][1];
@ -149,7 +147,7 @@ typedef struct
} __svml_serf_data_internal;
#endif
__svml_serf_data_internal:
/*== _erf_tbl ==*/
/* _erf_tbl */
.long 0x00000000, 0x3f906ebb
.long 0x3c106dfa, 0x3f906c79
.long 0x3c906bb8, 0x3f9065b4
@ -665,5 +663,5 @@ __svml_serf_data_internal:
.align 32
.long 0xbeaaaade, 0xbeaaaade, 0xbeaaaade, 0xbeaaaade, 0xbeaaaade, 0xbeaaaade, 0xbeaaaade, 0xbeaaaade /* _poly_3_0 */
.align 32
.type __svml_serf_data_internal,@object
.size __svml_serf_data_internal,.-__svml_serf_data_internal
.type __svml_serf_data_internal, @object
.size __svml_serf_data_internal, .-__svml_serf_data_internal