x86_64: Fix svml_s_tanhf8_core_avx2.S code formatting

This commit contains following formatting changes

1. Instructions proceeded by a tab.
2. Instruction less than 8 characters in length have a tab
   between it and the first operand.
3. Instruction greater than 7 characters in length have a
   space between it and the first operand.
4. Tabs after `#define`d names and their value.
5. 8 space at the beginning of line replaced by tab.
6. Indent comments with code.
7. Remove redundent .text section.
8. 1 space between line content and line comment.
9. Space after all commas.

Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
This commit is contained in:
Sunil K Pandey 2022-03-07 10:47:15 -08:00
parent 2eeea98af0
commit 2c632117bf
1 changed files with 730 additions and 732 deletions

View File

@ -28,7 +28,7 @@
* and to approximate tanh(.) with a polynomial on each of them.
*
* IEEE SPECIAL CONDITIONS:
* x = [+,-]0, r = [+,-]0
* x = [+, -]0, r = [+, -]0
* x = +Inf, r = +1
* x = -Inf, r = -1
* x = QNaN, r = QNaN
@ -82,8 +82,7 @@
#include <sysdep.h>
.text
.section .text.avx2,"ax",@progbits
.section .text.avx2, "ax", @progbits
ENTRY(_ZGVdN8v_tanhf_avx2)
pushq %rbp
cfi_def_cfa_offset(16)
@ -96,17 +95,17 @@ ENTRY(_ZGVdN8v_tanhf_avx2)
lea _dbP+16+__svml_stanh_data_internal(%rip), %r10
vmovaps %ymm0, %ymm12
/* Here huge arguments, INF and NaNs are filtered out to callout. */
/* Here huge arguments, INF and NaNs are filtered out to callout. */
vpand _iExpMantMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm14
/*
/*
* small table specific variables *
* Constant loading
*/
vmovups _iMaxIdxMask+__svml_stanh_data_internal(%rip), %ymm8
vpsubd _iMinIdxOfsMask+__svml_stanh_data_internal(%rip), %ymm14, %ymm9
/* if VMIN, VMAX is defined for I type */
/* if VMIN, VMAX is defined for I type */
vxorps %ymm15, %ymm15, %ymm15
vpcmpgtd %ymm15, %ymm9, %ymm0
vpand %ymm0, %ymm9, %ymm7
@ -136,34 +135,34 @@ ENTRY(_ZGVdN8v_tanhf_avx2)
movslq %r12d, %r12
movslq %esi, %rsi
movslq %eax, %rax
vmovupd -16(%r9,%r10), %xmm5
vmovupd -16(%rdx,%r10), %xmm14
vmovupd -16(%rcx,%r10), %xmm13
vmovupd (%r9,%r10), %xmm1
vmovupd (%r8,%r10), %xmm2
vmovupd -16(%r8,%r10), %xmm4
vinsertf128 $1, -16(%rdi,%r10), %ymm5, %ymm15
vinsertf128 $1, -16(%r12,%r10), %ymm14, %ymm3
vinsertf128 $1, -16(%rax,%r10), %ymm13, %ymm6
vinsertf128 $1, (%rdi,%r10), %ymm1, %ymm5
vinsertf128 $1, (%rsi,%r10), %ymm2, %ymm14
vmovupd -16(%r9, %r10), %xmm5
vmovupd -16(%rdx, %r10), %xmm14
vmovupd -16(%rcx, %r10), %xmm13
vmovupd (%r9, %r10), %xmm1
vmovupd (%r8, %r10), %xmm2
vmovupd -16(%r8, %r10), %xmm4
vinsertf128 $1, -16(%rdi, %r10), %ymm5, %ymm15
vinsertf128 $1, -16(%r12, %r10), %ymm14, %ymm3
vinsertf128 $1, -16(%rax, %r10), %ymm13, %ymm6
vinsertf128 $1, (%rdi, %r10), %ymm1, %ymm5
vinsertf128 $1, (%rsi, %r10), %ymm2, %ymm14
vunpcklpd %ymm3, %ymm6, %ymm8
vunpckhpd %ymm3, %ymm6, %ymm6
vunpcklpd %ymm14, %ymm5, %ymm3
vunpckhpd %ymm14, %ymm5, %ymm2
vmovupd (%rcx,%r10), %xmm13
vmovupd (%rcx, %r10), %xmm13
vcvtps2pd %xmm10, %ymm5
vextractf128 $1, %ymm10, %xmm10
vfmadd213pd %ymm3, %ymm5, %ymm2
vinsertf128 $1, -16(%rsi,%r10), %ymm4, %ymm0
vmovupd (%rdx,%r10), %xmm4
vinsertf128 $1, -16(%rsi, %r10), %ymm4, %ymm0
vmovupd (%rdx, %r10), %xmm4
vunpcklpd %ymm0, %ymm15, %ymm9
vunpckhpd %ymm0, %ymm15, %ymm7
vfmadd213pd %ymm7, %ymm5, %ymm2
vfmadd213pd %ymm9, %ymm5, %ymm2
vinsertf128 $1, (%r12,%r10), %ymm4, %ymm0
vinsertf128 $1, (%r12, %r10), %ymm4, %ymm0
vcvtps2pd %xmm10, %ymm4
vinsertf128 $1, (%rax,%r10), %ymm13, %ymm15
vinsertf128 $1, (%rax, %r10), %ymm13, %ymm15
vunpcklpd %ymm0, %ymm15, %ymm1
vunpckhpd %ymm0, %ymm15, %ymm0
vfmadd213pd %ymm1, %ymm4, %ymm0
@ -175,11 +174,11 @@ ENTRY(_ZGVdN8v_tanhf_avx2)
vorps %ymm11, %ymm2, %ymm0
testl %r11d, %r11d
/* Go to special inputs processing branch */
/* Go to special inputs processing branch */
jne L(SPECIAL_VALUES_BRANCH)
# LOE rbx r13 r14 r15 r11d ymm0 ymm12
/* Restore registers
/* Restore registers
* and exit the function
*/
@ -197,7 +196,7 @@ L(EXIT):
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -8; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22
/* Branch to process
/* Branch to process
* special inputs
*/
@ -219,18 +218,18 @@ L(SPECIAL_VALUES_BRANCH):
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
# LOE rbx r15 r12d r13d
/* Range mask
/* Range mask
* bits check
*/
L(RANGEMASK_CHECK):
btl %r12d, %r13d
/* Call scalar math function */
/* Call scalar math function */
jc L(SCALAR_MATH_CALL)
# LOE rbx r15 r12d r13d
/* Special inputs
/* Special inputs
* processing loop
*/
@ -238,7 +237,7 @@ L(SPECIAL_VALUES_LOOP):
incl %r12d
cmpl $8, %r12d
/* Check bits in range mask */
/* Check bits in range mask */
jl L(RANGEMASK_CHECK)
# LOE rbx r15 r12d r13d
@ -248,7 +247,7 @@ L(SPECIAL_VALUES_LOOP):
cfi_restore(14)
vmovups 64(%rsp), %ymm0
/* Go to exit */
/* Go to exit */
jmp L(EXIT)
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -120; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22
@ -256,19 +255,19 @@ L(SPECIAL_VALUES_LOOP):
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
# LOE rbx r13 r14 r15 ymm0
/* Scalar math fucntion call
/* Scalar math fucntion call
* to process special input
*/
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp,%r14,4), %xmm0
movss 32(%rsp, %r14, 4), %xmm0
call tanhf@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp,%r14,4)
movss %xmm0, 64(%rsp, %r14, 4)
/* Process special inputs in loop */
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)
# LOE rbx r15 r12d r13d
END(_ZGVdN8v_tanhf_avx2)
@ -278,8 +277,7 @@ END(_ZGVdN8v_tanhf_avx2)
#ifdef __svml_stanh_data_internal_typedef
typedef unsigned int VUINT32;
typedef struct
{
typedef struct {
__declspec(align(32)) VUINT32 _dbP[(134*4)][2];
__declspec(align(32)) VUINT32 _sSignMask[8][1];
__declspec(align(32)) VUINT32 _sAbsMask[8][1];
@ -840,5 +838,5 @@ __svml_stanh_data_internal:
.align 32
.long 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000 /* _iMaxIdxMask */
.align 32
.type __svml_stanh_data_internal,@object
.size __svml_stanh_data_internal,.-__svml_stanh_data_internal
.type __svml_stanh_data_internal, @object
.size __svml_stanh_data_internal, .-__svml_stanh_data_internal