x86_64: Fix svml_d_atan24_core_avx2.S code formatting

This commit contains following formatting changes 1. Instructions proceeded by a tab. 2. Instruction less than 8 characters in length have a tab between it and the first operand. 3. Instruction greater than 7 characters in length have a space between it and the first operand. 4. Tabs after `#define`d names and their value. 5. 8 space at the beginning of line replaced by tab. 6. Indent comments with code. 7. Remove redundent .text section. 8. 1 space between line content and line comment. 9. Space after all commas. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
2025-01-14 21:10:19 +00:00 · 2022-03-07 10:47:10 -08:00 · 2022-03-07 10:47:10 -08:00 · 4f75454074
commit 4f75454074
parent 1447e84caf
1 changed files with 355 additions and 356 deletions
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_atan24_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_atan24_core_avx2.S
@ -60,8 +60,7 @@

 #include <sysdep.h>

-        .text
-	.section .text.avx2,"ax",@progbits
+	.section .text.avx2, "ax", @progbits
 ENTRY(_ZGVdN4vv_atan2_avx2)
 	pushq	%rbp
 	cfi_def_cfa_offset(16)
@ -72,7 +71,7 @@ ENTRY(_ZGVdN4vv_atan2_avx2)
 	subq	$128, %rsp
 	xorl	%edx, %edx

-/*
+	/*
 	 * #define NO_VECTOR_ZERO_ATAN2_ARGS
 	 *  Declarations
 	 * Variables
@ -85,7 +84,7 @@ ENTRY(_ZGVdN4vv_atan2_avx2)
 	 */
 	vmovupd	dABS_MASK+__svml_datan2_data_internal(%rip), %ymm5

-/* Argument signs */
+	/* Argument signs */
 	vmovupd	dSIGN_MASK+__svml_datan2_data_internal(%rip), %ymm4
 	vmovups	iCHK_WORK_SUB+__svml_datan2_data_internal(%rip), %xmm13
 	vmovupd	%ymm0, (%rsp)
@ -94,7 +93,7 @@ ENTRY(_ZGVdN4vv_atan2_avx2)
 	vandpd	%ymm5, %ymm0, %ymm1
 	vcmpnlt_uqpd %ymm2, %ymm1, %ymm15

-/*
+	/*
 	 * 1) If y<x then a= y, b=x, PIO2=0
 	 * 2) If y>x then a=-x, b=y, PIO2=Pi/2
 	 */
@ -108,7 +107,7 @@ ENTRY(_ZGVdN4vv_atan2_avx2)
 	vandpd	%ymm4, %ymm8, %ymm5
 	vandpd	dPIO2+__svml_datan2_data_internal(%rip), %ymm15, %ymm4

-/* Check if y and x are on main path. */
+	/* Check if y and x are on main path. */
 	vextractf128 $1, %ymm2, %xmm9
 	vextractf128 $1, %ymm1, %xmm10
 	vshufps	$221, %xmm9, %xmm2, %xmm11
@ -122,7 +121,7 @@ ENTRY(_ZGVdN4vv_atan2_avx2)
 	vpor	%xmm6, %xmm15, %xmm11
 	vpor	%xmm3, %xmm10, %xmm12

-/* Polynomial. */
+	/* Polynomial. */
 	vmulpd	%ymm14, %ymm14, %ymm10
 	vpor	%xmm12, %xmm11, %xmm3
 	vmovupd	dA18+__svml_datan2_data_internal(%rip), %ymm9
@ -130,7 +129,7 @@ ENTRY(_ZGVdN4vv_atan2_avx2)
 	vmovupd	dA16+__svml_datan2_data_internal(%rip), %ymm15
 	vmulpd	%ymm10, %ymm10, %ymm11

-/* if x<0, dPI = Pi, else dPI =0 */
+	/* if x<0, dPI = Pi, else dPI =0 */
 	vcmple_oqpd dZERO+__svml_datan2_data_internal(%rip), %ymm8, %ymm13
 	vmovmskps %xmm3, %eax
 	vmulpd	%ymm11, %ymm11, %ymm0
@ -152,13 +151,13 @@ ENTRY(_ZGVdN4vv_atan2_avx2)
 	vfmadd213pd dA01+__svml_datan2_data_internal(%rip), %ymm0, %ymm12
 	vfmadd213pd dA03+__svml_datan2_data_internal(%rip), %ymm0, %ymm13

-/* A00=1.0, account for it later  VQFMA(D, dP4, dP4, dR8, dA00); */
+	/* A00=1.0, account for it later  VQFMA(D, dP4, dP4, dR8, dA00); */
 	vmulpd	%ymm15, %ymm0, %ymm0
 	vfmadd213pd %ymm9, %ymm10, %ymm13
 	vfmadd213pd %ymm0, %ymm10, %ymm12
 	vfmadd213pd %ymm12, %ymm11, %ymm13

-/*
+	/*
 	 * Reconstruction.
 	 * dP=(R+R*dP) + dPIO2
 	 */
@ -168,29 +167,29 @@ ENTRY(_ZGVdN4vv_atan2_avx2)
 	vaddpd	%ymm0, %ymm6, %ymm9
 	vorpd	%ymm7, %ymm9, %ymm0

-/*  Special branch for fast (vector) processing of zero arguments  */
+	/*  Special branch for fast (vector) processing of zero arguments  */
 	testl	%eax, %eax

-/* Go to auxilary branch */
+	/* Go to auxilary branch */
 	jne	L(AUX_BRANCH)
 	# LOE rbx r12 r13 r14 r15 edx xmm3 ymm0 ymm1 ymm2 ymm4 ymm5 ymm6 ymm7 ymm8

-/* Return from auxilary branch
+	/* Return from auxilary branch
 	 * for out of main path inputs
 	 */

 L(AUX_BRANCH_RETURN):
-/*
+	/*
 	 *  Special branch for fast (vector) processing of zero arguments
 	 *  The end of implementation
 	 */
 	testl	%edx, %edx

-/* Go to special inputs processing branch */
+	/* Go to special inputs processing branch */
 	jne	L(SPECIAL_VALUES_BRANCH)
 	# LOE rbx r12 r13 r14 r15 edx ymm0 ymm8

-/* Restore registers
+	/* Restore registers
 	 * and exit the function
 	 */

@ -203,7 +202,7 @@ L(EXIT):
 	cfi_def_cfa(6, 16)
 	cfi_offset(6, -16)

-/* Branch to process
+	/* Branch to process
 	 * special inputs
 	 */

@ -231,18 +230,18 @@ L(SPECIAL_VALUES_BRANCH):
 	.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
 	# LOE rbx r15 r12d r13d

-/* Range mask
+	/* Range mask
 	 * bits check
 	 */

 L(RANGEMASK_CHECK):
 	btl	%r12d, %r13d

-/* Call scalar math function */
+	/* Call scalar math function */
 	jc	L(SCALAR_MATH_CALL)
 	# LOE rbx r15 r12d r13d

-/* Special inputs
+	/* Special inputs
 	 * processing loop
 	 */

@ -250,7 +249,7 @@ L(SPECIAL_VALUES_LOOP):
 	incl	%r12d
 	cmpl	$4, %r12d

-/* Check bits in range mask */
+	/* Check bits in range mask */
 	jl	L(RANGEMASK_CHECK)
 	# LOE rbx r15 r12d r13d

@ -262,7 +261,7 @@ L(SPECIAL_VALUES_LOOP):
 	cfi_restore(14)
 	vmovupd	96(%rsp), %ymm0

-/* Go to exit */
+	/* Go to exit */
 	jmp	L(EXIT)
 	/*  DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -112; DW_OP_plus)  */
 	.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x90, 0xff, 0xff, 0xff, 0x22
@ -272,37 +271,37 @@ L(SPECIAL_VALUES_LOOP):
 	.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
 	# LOE rbx r12 r13 r14 r15 ymm0

-/* Scalar math fucntion call
+	/* Scalar math fucntion call
 	 * to process special input
 	 */

 L(SCALAR_MATH_CALL):
 	movl	%r12d, %r14d
-        movsd     32(%rsp,%r14,8), %xmm0
-        movsd     64(%rsp,%r14,8), %xmm1
+	movsd	32(%rsp, %r14, 8), %xmm0
+	movsd	64(%rsp, %r14, 8), %xmm1
 	call	atan2@PLT
 	# LOE rbx r14 r15 r12d r13d xmm0

-        movsd     %xmm0, 96(%rsp,%r14,8)
+	movsd	%xmm0, 96(%rsp, %r14, 8)

-/* Process special inputs in loop */
+	/* Process special inputs in loop */
 	jmp	L(SPECIAL_VALUES_LOOP)
 	cfi_restore(12)
 	cfi_restore(13)
 	cfi_restore(14)
 	# LOE rbx r15 r12d r13d

-/* Auxilary branch
+	/* Auxilary branch
 	 * for out of main path inputs
 	 */

 L(AUX_BRANCH):
 	vmovupd	(%rsp), %ymm11

-/* Check if at least on of Y or Y is zero: iAXAYZERO */
+	/* Check if at least on of Y or Y is zero: iAXAYZERO */
 	vmovupd	dZERO+__svml_datan2_data_internal(%rip), %ymm10

-/* Check if both X & Y are not NaNs:  iXYnotNAN */
+	/* Check if both X & Y are not NaNs:  iXYnotNAN */
 	vcmpordpd %ymm8, %ymm8, %ymm12
 	vcmpordpd %ymm11, %ymm11, %ymm13
 	vcmpeqpd %ymm10, %ymm2, %ymm2
@ -314,26 +313,26 @@ L(AUX_BRANCH):
 	vshufps	$221, %xmm15, %xmm14, %xmm9
 	vshufps	$221, %xmm11, %xmm2, %xmm12

-/*
+	/*
 	 *  Path for zero arguments (at least one of both)
 	 * Check if both args are zeros (den. is zero)
 	 */
 	vcmpeqpd 32(%rsp), %ymm10, %ymm2

-/* Check if at least on of Y or Y is zero and not NaN: iAXAYZEROnotNAN */
+	/* Check if at least on of Y or Y is zero and not NaN: iAXAYZEROnotNAN */
 	vpand	%xmm9, %xmm12, %xmm1

-/* Exclude from previous callout mask zero (and not NaN) arguments */
+	/* Exclude from previous callout mask zero (and not NaN) arguments */
 	vpandn	%xmm3, %xmm1, %xmm3

-/* Go to callout */
+	/* Go to callout */
 	vmovmskps %xmm3, %edx

-/* Set sPIO2 to zero if den. is zero */
+	/* Set sPIO2 to zero if den. is zero */
 	vblendvpd %ymm2, %ymm10, %ymm4, %ymm4
 	vorpd	%ymm5, %ymm4, %ymm5

-/* Res = sign(Y)*(X<0)?(PIO2+PI):PIO2 */
+	/* Res = sign(Y)*(X<0)?(PIO2+PI):PIO2 */
 	vextractf128 $1, %ymm10, %xmm2
 	vextractf128 $1, %ymm8, %xmm3
 	vshufps	$221, %xmm2, %xmm10, %xmm4
@ -346,13 +345,13 @@ L(AUX_BRANCH):
 	vaddpd	%ymm6, %ymm5, %ymm2
 	vorpd	%ymm7, %ymm2, %ymm2

-/* Merge results from main and spec path */
+	/* Merge results from main and spec path */
 	vpshufd	$80, %xmm1, %xmm7
 	vpshufd	$250, %xmm1, %xmm1
 	vinsertf128 $1, %xmm1, %ymm7, %ymm3
 	vblendvpd %ymm3, %ymm2, %ymm0, %ymm0

-/* Return to main vector processing path */
+	/* Return to main vector processing path */
 	jmp	L(AUX_BRANCH_RETURN)
 	# LOE rbx r12 r13 r14 r15 edx ymm0 ymm8
 END(_ZGVdN4vv_atan2_avx2)
@ -393,9 +392,9 @@ typedef struct {
 } __svml_datan2_data_internal;
 #endif
 __svml_datan2_data_internal:
-        .quad 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18 //dPI
+	.quad	0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18 // dPI
 	.align	32
-        .quad 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18 //dPIO2
+	.quad	0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18 // dPIO2
 	.align	32
 	.quad	0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3 // dA19
 	.align	32
@ -437,15 +436,15 @@ __svml_datan2_data_internal:
 	.align	32
 	.quad	0x3FF0000000000000, 0x3FF0000000000000, 0x3FF0000000000000, 0x3FF0000000000000 // dA00
 	.align	32
-        .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 //dSIGN_MASK
+	.quad	0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 // dSIGN_MASK
 	.align	32
-        .long 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000 //iCHK_WORK_SUB
+	.long	0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000 // iCHK_WORK_SUB
 	.align	32
-        .long 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000 //iCHK_WORK_CMP
+	.long	0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000 // iCHK_WORK_CMP
 	.align	32
-        .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff //dABS_MASK
+	.quad	0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff // dABS_MASK
 	.align	32
-        .quad 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 //dZERO
+	.quad	0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 // dZERO
 	.align	32
-        .type	__svml_datan2_data_internal,@object
-        .size	__svml_datan2_data_internal,.-__svml_datan2_data_internal
+	.type	__svml_datan2_data_internal, @object
+	.size	__svml_datan2_data_internal, .-__svml_datan2_data_internal