mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-25 22:40:05 +00:00
x86_64: Fix svml_s_atanf16_core_avx512.S code formatting
This commit contains following formatting changes 1. Instructions proceeded by a tab. 2. Instruction less than 8 characters in length have a tab between it and the first operand. 3. Instruction greater than 7 characters in length have a space between it and the first operand. 4. Tabs after `#define`d names and their value. 5. 8 space at the beginning of line replaced by tab. 6. Indent comments with code. 7. Remove redundent .text section. 8. 1 space between line content and line comment. 9. Space after all commas. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
This commit is contained in:
parent
f285711506
commit
67a8f9b86f
@ -30,145 +30,144 @@
|
||||
|
||||
/* Offsets for data table __svml_satan_data_internal_avx512
|
||||
*/
|
||||
#define AbsMask 0
|
||||
#define Shifter 64
|
||||
#define MaxThreshold 128
|
||||
#define MOne 192
|
||||
#define One 256
|
||||
#define LargeX 320
|
||||
#define Zero 384
|
||||
#define Tbl_H 448
|
||||
#define Pi2 576
|
||||
#define coeff_1 640
|
||||
#define coeff_2 704
|
||||
#define coeff_3 768
|
||||
#define AbsMask 0
|
||||
#define Shifter 64
|
||||
#define MaxThreshold 128
|
||||
#define MOne 192
|
||||
#define One 256
|
||||
#define LargeX 320
|
||||
#define Zero 384
|
||||
#define Tbl_H 448
|
||||
#define Pi2 576
|
||||
#define coeff_1 640
|
||||
#define coeff_2 704
|
||||
#define coeff_3 768
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.section .text.exex512,"ax",@progbits
|
||||
.section .text.exex512, "ax", @progbits
|
||||
ENTRY(_ZGVeN16v_atanf_skx)
|
||||
vandps __svml_satan_data_internal_avx512(%rip), %zmm0, %zmm7
|
||||
vmovups MaxThreshold+__svml_satan_data_internal_avx512(%rip), %zmm3
|
||||
vmovups One+__svml_satan_data_internal_avx512(%rip), %zmm8
|
||||
vandps __svml_satan_data_internal_avx512(%rip), %zmm0, %zmm7
|
||||
vmovups MaxThreshold+__svml_satan_data_internal_avx512(%rip), %zmm3
|
||||
vmovups One+__svml_satan_data_internal_avx512(%rip), %zmm8
|
||||
|
||||
/* round to 2 bits after binary point */
|
||||
vreduceps $40, {sae}, %zmm7, %zmm5
|
||||
/* round to 2 bits after binary point */
|
||||
vreduceps $40, {sae}, %zmm7, %zmm5
|
||||
|
||||
/* saturate X range */
|
||||
vmovups LargeX+__svml_satan_data_internal_avx512(%rip), %zmm6
|
||||
vmovups Shifter+__svml_satan_data_internal_avx512(%rip), %zmm2
|
||||
vcmpps $29, {sae}, %zmm3, %zmm7, %k1
|
||||
/* saturate X range */
|
||||
vmovups LargeX+__svml_satan_data_internal_avx512(%rip), %zmm6
|
||||
vmovups Shifter+__svml_satan_data_internal_avx512(%rip), %zmm2
|
||||
vcmpps $29, {sae}, %zmm3, %zmm7, %k1
|
||||
|
||||
/* table lookup sequence */
|
||||
vmovups Tbl_H+__svml_satan_data_internal_avx512(%rip), %zmm3
|
||||
vsubps {rn-sae}, %zmm5, %zmm7, %zmm4
|
||||
vaddps {rn-sae}, %zmm2, %zmm7, %zmm1
|
||||
vxorps %zmm0, %zmm7, %zmm0
|
||||
vfmadd231ps {rn-sae}, %zmm7, %zmm4, %zmm8
|
||||
vmovups coeff_2+__svml_satan_data_internal_avx512(%rip), %zmm4
|
||||
/* table lookup sequence */
|
||||
vmovups Tbl_H+__svml_satan_data_internal_avx512(%rip), %zmm3
|
||||
vsubps {rn-sae}, %zmm5, %zmm7, %zmm4
|
||||
vaddps {rn-sae}, %zmm2, %zmm7, %zmm1
|
||||
vxorps %zmm0, %zmm7, %zmm0
|
||||
vfmadd231ps {rn-sae}, %zmm7, %zmm4, %zmm8
|
||||
vmovups coeff_2+__svml_satan_data_internal_avx512(%rip), %zmm4
|
||||
|
||||
/* if|X|>=MaxThreshold, set DiffX=-1 */
|
||||
vblendmps MOne+__svml_satan_data_internal_avx512(%rip), %zmm5, %zmm9{%k1}
|
||||
vmovups coeff_3+__svml_satan_data_internal_avx512(%rip), %zmm5
|
||||
/* if|X|>=MaxThreshold, set DiffX=-1 */
|
||||
vblendmps MOne+__svml_satan_data_internal_avx512(%rip), %zmm5, %zmm9{%k1}
|
||||
vmovups coeff_3+__svml_satan_data_internal_avx512(%rip), %zmm5
|
||||
|
||||
/* if|X|>=MaxThreshold, set Y=X */
|
||||
vminps {sae}, %zmm7, %zmm6, %zmm8{%k1}
|
||||
/* if|X|>=MaxThreshold, set Y=X */
|
||||
vminps {sae}, %zmm7, %zmm6, %zmm8{%k1}
|
||||
|
||||
/* R+Rl = DiffX/Y */
|
||||
vgetmantps $0, {sae}, %zmm9, %zmm12
|
||||
vgetexpps {sae}, %zmm9, %zmm10
|
||||
vpermt2ps Tbl_H+64+__svml_satan_data_internal_avx512(%rip), %zmm1, %zmm3
|
||||
vgetmantps $0, {sae}, %zmm8, %zmm15
|
||||
vgetexpps {sae}, %zmm8, %zmm11
|
||||
vmovups coeff_1+__svml_satan_data_internal_avx512(%rip), %zmm1
|
||||
/* R+Rl = DiffX/Y */
|
||||
vgetmantps $0, {sae}, %zmm9, %zmm12
|
||||
vgetexpps {sae}, %zmm9, %zmm10
|
||||
vpermt2ps Tbl_H+64+__svml_satan_data_internal_avx512(%rip), %zmm1, %zmm3
|
||||
vgetmantps $0, {sae}, %zmm8, %zmm15
|
||||
vgetexpps {sae}, %zmm8, %zmm11
|
||||
vmovups coeff_1+__svml_satan_data_internal_avx512(%rip), %zmm1
|
||||
|
||||
/* set table value to Pi/2 for large X */
|
||||
vblendmps Pi2+__svml_satan_data_internal_avx512(%rip), %zmm3, %zmm9{%k1}
|
||||
vrcp14ps %zmm15, %zmm13
|
||||
vsubps {rn-sae}, %zmm11, %zmm10, %zmm2
|
||||
vmulps {rn-sae}, %zmm13, %zmm12, %zmm14
|
||||
vfnmadd213ps {rn-sae}, %zmm12, %zmm14, %zmm15
|
||||
vfmadd213ps {rn-sae}, %zmm14, %zmm13, %zmm15
|
||||
vscalefps {rn-sae}, %zmm2, %zmm15, %zmm7
|
||||
/* set table value to Pi/2 for large X */
|
||||
vblendmps Pi2+__svml_satan_data_internal_avx512(%rip), %zmm3, %zmm9{%k1}
|
||||
vrcp14ps %zmm15, %zmm13
|
||||
vsubps {rn-sae}, %zmm11, %zmm10, %zmm2
|
||||
vmulps {rn-sae}, %zmm13, %zmm12, %zmm14
|
||||
vfnmadd213ps {rn-sae}, %zmm12, %zmm14, %zmm15
|
||||
vfmadd213ps {rn-sae}, %zmm14, %zmm13, %zmm15
|
||||
vscalefps {rn-sae}, %zmm2, %zmm15, %zmm7
|
||||
|
||||
/* polynomial evaluation */
|
||||
vmulps {rn-sae}, %zmm7, %zmm7, %zmm8
|
||||
vmulps {rn-sae}, %zmm7, %zmm8, %zmm6
|
||||
vfmadd231ps {rn-sae}, %zmm8, %zmm1, %zmm4
|
||||
vfmadd213ps {rn-sae}, %zmm5, %zmm4, %zmm8
|
||||
vfmadd213ps {rn-sae}, %zmm7, %zmm6, %zmm8
|
||||
vaddps {rn-sae}, %zmm9, %zmm8, %zmm10
|
||||
vxorps %zmm0, %zmm10, %zmm0
|
||||
ret
|
||||
/* polynomial evaluation */
|
||||
vmulps {rn-sae}, %zmm7, %zmm7, %zmm8
|
||||
vmulps {rn-sae}, %zmm7, %zmm8, %zmm6
|
||||
vfmadd231ps {rn-sae}, %zmm8, %zmm1, %zmm4
|
||||
vfmadd213ps {rn-sae}, %zmm5, %zmm4, %zmm8
|
||||
vfmadd213ps {rn-sae}, %zmm7, %zmm6, %zmm8
|
||||
vaddps {rn-sae}, %zmm9, %zmm8, %zmm10
|
||||
vxorps %zmm0, %zmm10, %zmm0
|
||||
ret
|
||||
|
||||
END(_ZGVeN16v_atanf_skx)
|
||||
|
||||
.section .rodata, "a"
|
||||
.align 64
|
||||
.section .rodata, "a"
|
||||
.align 64
|
||||
|
||||
#ifdef __svml_satan_data_internal_avx512_typedef
|
||||
typedef unsigned int VUINT32;
|
||||
typedef struct {
|
||||
__declspec(align(64)) VUINT32 AbsMask[16][1];
|
||||
__declspec(align(64)) VUINT32 Shifter[16][1];
|
||||
__declspec(align(64)) VUINT32 MaxThreshold[16][1];
|
||||
__declspec(align(64)) VUINT32 MOne[16][1];
|
||||
__declspec(align(64)) VUINT32 One[16][1];
|
||||
__declspec(align(64)) VUINT32 LargeX[16][1];
|
||||
__declspec(align(64)) VUINT32 Zero[16][1];
|
||||
__declspec(align(64)) VUINT32 Tbl_H[32][1];
|
||||
__declspec(align(64)) VUINT32 Pi2[16][1];
|
||||
__declspec(align(64)) VUINT32 coeff[3][16][1];
|
||||
} __svml_satan_data_internal_avx512;
|
||||
__declspec(align(64)) VUINT32 AbsMask[16][1];
|
||||
__declspec(align(64)) VUINT32 Shifter[16][1];
|
||||
__declspec(align(64)) VUINT32 MaxThreshold[16][1];
|
||||
__declspec(align(64)) VUINT32 MOne[16][1];
|
||||
__declspec(align(64)) VUINT32 One[16][1];
|
||||
__declspec(align(64)) VUINT32 LargeX[16][1];
|
||||
__declspec(align(64)) VUINT32 Zero[16][1];
|
||||
__declspec(align(64)) VUINT32 Tbl_H[32][1];
|
||||
__declspec(align(64)) VUINT32 Pi2[16][1];
|
||||
__declspec(align(64)) VUINT32 coeff[3][16][1];
|
||||
} __svml_satan_data_internal_avx512;
|
||||
#endif
|
||||
__svml_satan_data_internal_avx512:
|
||||
/*== AbsMask ==*/
|
||||
.long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
|
||||
/*== Shifter ==*/
|
||||
.align 64
|
||||
.long 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000
|
||||
/*== MaxThreshold ==*/
|
||||
.align 64
|
||||
.long 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000
|
||||
/*== MOne ==*/
|
||||
.align 64
|
||||
.long 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000
|
||||
/*== One ==*/
|
||||
.align 64
|
||||
.long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
|
||||
/*== LargeX ==*/
|
||||
.align 64
|
||||
.long 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000
|
||||
/*== Zero ==*/
|
||||
.align 64
|
||||
.long 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
|
||||
/*== Tbl_H ==*/
|
||||
.align 64
|
||||
.long 0x00000000, 0x3e7adbb0
|
||||
.long 0x3eed6338, 0x3f24bc7d
|
||||
.long 0x3f490fdb, 0x3f6563e3
|
||||
.long 0x3f7b985f, 0x3f869c79
|
||||
.long 0x3f8db70d, 0x3f93877b
|
||||
.long 0x3f985b6c, 0x3f9c6b53
|
||||
.long 0x3f9fe0bb, 0x3fa2daa4
|
||||
.long 0x3fa57088, 0x3fa7b46f
|
||||
.long 0x3fa9b465, 0x3fab7b7a
|
||||
.long 0x3fad1283, 0x3fae809e
|
||||
.long 0x3fafcb99, 0x3fb0f836
|
||||
.long 0x3fb20a6a, 0x3fb30581
|
||||
.long 0x3fb3ec43, 0x3fb4c10a
|
||||
.long 0x3fb585d7, 0x3fb63c64
|
||||
.long 0x3fb6e62c, 0x3fb78478
|
||||
.long 0x3fb81868, 0x3fb8a2f5
|
||||
/*== Pi2 ==*/
|
||||
.align 64
|
||||
.long 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB
|
||||
/*== coeff3 ==*/
|
||||
.align 64
|
||||
.long 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de
|
||||
.long 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2
|
||||
.long 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa
|
||||
.align 64
|
||||
.type __svml_satan_data_internal_avx512,@object
|
||||
.size __svml_satan_data_internal_avx512,.-__svml_satan_data_internal_avx512
|
||||
/* AbsMask */
|
||||
.long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
|
||||
/* Shifter */
|
||||
.align 64
|
||||
.long 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000
|
||||
/* MaxThreshold */
|
||||
.align 64
|
||||
.long 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000
|
||||
/* MOne */
|
||||
.align 64
|
||||
.long 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000
|
||||
/* One */
|
||||
.align 64
|
||||
.long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
|
||||
/* LargeX */
|
||||
.align 64
|
||||
.long 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000
|
||||
/* Zero */
|
||||
.align 64
|
||||
.long 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
|
||||
/* Tbl_H */
|
||||
.align 64
|
||||
.long 0x00000000, 0x3e7adbb0
|
||||
.long 0x3eed6338, 0x3f24bc7d
|
||||
.long 0x3f490fdb, 0x3f6563e3
|
||||
.long 0x3f7b985f, 0x3f869c79
|
||||
.long 0x3f8db70d, 0x3f93877b
|
||||
.long 0x3f985b6c, 0x3f9c6b53
|
||||
.long 0x3f9fe0bb, 0x3fa2daa4
|
||||
.long 0x3fa57088, 0x3fa7b46f
|
||||
.long 0x3fa9b465, 0x3fab7b7a
|
||||
.long 0x3fad1283, 0x3fae809e
|
||||
.long 0x3fafcb99, 0x3fb0f836
|
||||
.long 0x3fb20a6a, 0x3fb30581
|
||||
.long 0x3fb3ec43, 0x3fb4c10a
|
||||
.long 0x3fb585d7, 0x3fb63c64
|
||||
.long 0x3fb6e62c, 0x3fb78478
|
||||
.long 0x3fb81868, 0x3fb8a2f5
|
||||
/* Pi2 */
|
||||
.align 64
|
||||
.long 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB
|
||||
/* coeff3 */
|
||||
.align 64
|
||||
.long 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de
|
||||
.long 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2
|
||||
.long 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa
|
||||
.align 64
|
||||
.type __svml_satan_data_internal_avx512, @object
|
||||
.size __svml_satan_data_internal_avx512, .-__svml_satan_data_internal_avx512
|
||||
|
Loading…
Reference in New Issue
Block a user