mirror of git://sourceware.org/git/glibc.git
x86_64: Fix svml_s_tanhf8_core_avx2.S code formatting
This commit contains following formatting changes 1. Instructions proceeded by a tab. 2. Instruction less than 8 characters in length have a tab between it and the first operand. 3. Instruction greater than 7 characters in length have a space between it and the first operand. 4. Tabs after `#define`d names and their value. 5. 8 space at the beginning of line replaced by tab. 6. Indent comments with code. 7. Remove redundent .text section. 8. 1 space between line content and line comment. 9. Space after all commas. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
This commit is contained in:
parent
2eeea98af0
commit
2c632117bf
|
@ -28,7 +28,7 @@
|
|||
* and to approximate tanh(.) with a polynomial on each of them.
|
||||
*
|
||||
* IEEE SPECIAL CONDITIONS:
|
||||
* x = [+,-]0, r = [+,-]0
|
||||
* x = [+, -]0, r = [+, -]0
|
||||
* x = +Inf, r = +1
|
||||
* x = -Inf, r = -1
|
||||
* x = QNaN, r = QNaN
|
||||
|
@ -82,8 +82,7 @@
|
|||
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.section .text.avx2,"ax",@progbits
|
||||
.section .text.avx2, "ax", @progbits
|
||||
ENTRY(_ZGVdN8v_tanhf_avx2)
|
||||
pushq %rbp
|
||||
cfi_def_cfa_offset(16)
|
||||
|
@ -96,17 +95,17 @@ ENTRY(_ZGVdN8v_tanhf_avx2)
|
|||
lea _dbP+16+__svml_stanh_data_internal(%rip), %r10
|
||||
vmovaps %ymm0, %ymm12
|
||||
|
||||
/* Here huge arguments, INF and NaNs are filtered out to callout. */
|
||||
/* Here huge arguments, INF and NaNs are filtered out to callout. */
|
||||
vpand _iExpMantMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm14
|
||||
|
||||
/*
|
||||
/*
|
||||
* small table specific variables *
|
||||
* Constant loading
|
||||
*/
|
||||
vmovups _iMaxIdxMask+__svml_stanh_data_internal(%rip), %ymm8
|
||||
vpsubd _iMinIdxOfsMask+__svml_stanh_data_internal(%rip), %ymm14, %ymm9
|
||||
|
||||
/* if VMIN, VMAX is defined for I type */
|
||||
/* if VMIN, VMAX is defined for I type */
|
||||
vxorps %ymm15, %ymm15, %ymm15
|
||||
vpcmpgtd %ymm15, %ymm9, %ymm0
|
||||
vpand %ymm0, %ymm9, %ymm7
|
||||
|
@ -136,34 +135,34 @@ ENTRY(_ZGVdN8v_tanhf_avx2)
|
|||
movslq %r12d, %r12
|
||||
movslq %esi, %rsi
|
||||
movslq %eax, %rax
|
||||
vmovupd -16(%r9,%r10), %xmm5
|
||||
vmovupd -16(%rdx,%r10), %xmm14
|
||||
vmovupd -16(%rcx,%r10), %xmm13
|
||||
vmovupd (%r9,%r10), %xmm1
|
||||
vmovupd (%r8,%r10), %xmm2
|
||||
vmovupd -16(%r8,%r10), %xmm4
|
||||
vinsertf128 $1, -16(%rdi,%r10), %ymm5, %ymm15
|
||||
vinsertf128 $1, -16(%r12,%r10), %ymm14, %ymm3
|
||||
vinsertf128 $1, -16(%rax,%r10), %ymm13, %ymm6
|
||||
vinsertf128 $1, (%rdi,%r10), %ymm1, %ymm5
|
||||
vinsertf128 $1, (%rsi,%r10), %ymm2, %ymm14
|
||||
vmovupd -16(%r9, %r10), %xmm5
|
||||
vmovupd -16(%rdx, %r10), %xmm14
|
||||
vmovupd -16(%rcx, %r10), %xmm13
|
||||
vmovupd (%r9, %r10), %xmm1
|
||||
vmovupd (%r8, %r10), %xmm2
|
||||
vmovupd -16(%r8, %r10), %xmm4
|
||||
vinsertf128 $1, -16(%rdi, %r10), %ymm5, %ymm15
|
||||
vinsertf128 $1, -16(%r12, %r10), %ymm14, %ymm3
|
||||
vinsertf128 $1, -16(%rax, %r10), %ymm13, %ymm6
|
||||
vinsertf128 $1, (%rdi, %r10), %ymm1, %ymm5
|
||||
vinsertf128 $1, (%rsi, %r10), %ymm2, %ymm14
|
||||
vunpcklpd %ymm3, %ymm6, %ymm8
|
||||
vunpckhpd %ymm3, %ymm6, %ymm6
|
||||
vunpcklpd %ymm14, %ymm5, %ymm3
|
||||
vunpckhpd %ymm14, %ymm5, %ymm2
|
||||
vmovupd (%rcx,%r10), %xmm13
|
||||
vmovupd (%rcx, %r10), %xmm13
|
||||
vcvtps2pd %xmm10, %ymm5
|
||||
vextractf128 $1, %ymm10, %xmm10
|
||||
vfmadd213pd %ymm3, %ymm5, %ymm2
|
||||
vinsertf128 $1, -16(%rsi,%r10), %ymm4, %ymm0
|
||||
vmovupd (%rdx,%r10), %xmm4
|
||||
vinsertf128 $1, -16(%rsi, %r10), %ymm4, %ymm0
|
||||
vmovupd (%rdx, %r10), %xmm4
|
||||
vunpcklpd %ymm0, %ymm15, %ymm9
|
||||
vunpckhpd %ymm0, %ymm15, %ymm7
|
||||
vfmadd213pd %ymm7, %ymm5, %ymm2
|
||||
vfmadd213pd %ymm9, %ymm5, %ymm2
|
||||
vinsertf128 $1, (%r12,%r10), %ymm4, %ymm0
|
||||
vinsertf128 $1, (%r12, %r10), %ymm4, %ymm0
|
||||
vcvtps2pd %xmm10, %ymm4
|
||||
vinsertf128 $1, (%rax,%r10), %ymm13, %ymm15
|
||||
vinsertf128 $1, (%rax, %r10), %ymm13, %ymm15
|
||||
vunpcklpd %ymm0, %ymm15, %ymm1
|
||||
vunpckhpd %ymm0, %ymm15, %ymm0
|
||||
vfmadd213pd %ymm1, %ymm4, %ymm0
|
||||
|
@ -175,11 +174,11 @@ ENTRY(_ZGVdN8v_tanhf_avx2)
|
|||
vorps %ymm11, %ymm2, %ymm0
|
||||
testl %r11d, %r11d
|
||||
|
||||
/* Go to special inputs processing branch */
|
||||
/* Go to special inputs processing branch */
|
||||
jne L(SPECIAL_VALUES_BRANCH)
|
||||
# LOE rbx r13 r14 r15 r11d ymm0 ymm12
|
||||
|
||||
/* Restore registers
|
||||
/* Restore registers
|
||||
* and exit the function
|
||||
*/
|
||||
|
||||
|
@ -197,7 +196,7 @@ L(EXIT):
|
|||
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -8; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22
|
||||
|
||||
/* Branch to process
|
||||
/* Branch to process
|
||||
* special inputs
|
||||
*/
|
||||
|
||||
|
@ -219,18 +218,18 @@ L(SPECIAL_VALUES_BRANCH):
|
|||
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
/* Range mask
|
||||
/* Range mask
|
||||
* bits check
|
||||
*/
|
||||
|
||||
L(RANGEMASK_CHECK):
|
||||
btl %r12d, %r13d
|
||||
|
||||
/* Call scalar math function */
|
||||
/* Call scalar math function */
|
||||
jc L(SCALAR_MATH_CALL)
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
/* Special inputs
|
||||
/* Special inputs
|
||||
* processing loop
|
||||
*/
|
||||
|
||||
|
@ -238,7 +237,7 @@ L(SPECIAL_VALUES_LOOP):
|
|||
incl %r12d
|
||||
cmpl $8, %r12d
|
||||
|
||||
/* Check bits in range mask */
|
||||
/* Check bits in range mask */
|
||||
jl L(RANGEMASK_CHECK)
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
|
@ -248,7 +247,7 @@ L(SPECIAL_VALUES_LOOP):
|
|||
cfi_restore(14)
|
||||
vmovups 64(%rsp), %ymm0
|
||||
|
||||
/* Go to exit */
|
||||
/* Go to exit */
|
||||
jmp L(EXIT)
|
||||
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -120; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22
|
||||
|
@ -256,19 +255,19 @@ L(SPECIAL_VALUES_LOOP):
|
|||
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
|
||||
# LOE rbx r13 r14 r15 ymm0
|
||||
|
||||
/* Scalar math fucntion call
|
||||
/* Scalar math fucntion call
|
||||
* to process special input
|
||||
*/
|
||||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp,%r14,4), %xmm0
|
||||
movss 32(%rsp, %r14, 4), %xmm0
|
||||
call tanhf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp,%r14,4)
|
||||
movss %xmm0, 64(%rsp, %r14, 4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
# LOE rbx r15 r12d r13d
|
||||
END(_ZGVdN8v_tanhf_avx2)
|
||||
|
@ -278,8 +277,7 @@ END(_ZGVdN8v_tanhf_avx2)
|
|||
|
||||
#ifdef __svml_stanh_data_internal_typedef
|
||||
typedef unsigned int VUINT32;
|
||||
typedef struct
|
||||
{
|
||||
typedef struct {
|
||||
__declspec(align(32)) VUINT32 _dbP[(134*4)][2];
|
||||
__declspec(align(32)) VUINT32 _sSignMask[8][1];
|
||||
__declspec(align(32)) VUINT32 _sAbsMask[8][1];
|
||||
|
@ -840,5 +838,5 @@ __svml_stanh_data_internal:
|
|||
.align 32
|
||||
.long 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000 /* _iMaxIdxMask */
|
||||
.align 32
|
||||
.type __svml_stanh_data_internal,@object
|
||||
.size __svml_stanh_data_internal,.-__svml_stanh_data_internal
|
||||
.type __svml_stanh_data_internal, @object
|
||||
.size __svml_stanh_data_internal, .-__svml_stanh_data_internal
|
||||
|
|
Loading…
Reference in New Issue