From efa0569d2bfdbb7367fce42b1c99821b85d2d3ba Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 8 Aug 2009 17:48:09 -0700 Subject: [PATCH] Optimize x86-64 version of sem_timedwait. --- nptl/ChangeLog | 3 + .../unix/sysv/linux/x86_64/sem_timedwait.S | 284 +++++++++++------- 2 files changed, 185 insertions(+), 102 deletions(-) diff --git a/nptl/ChangeLog b/nptl/ChangeLog index 48fcc0fe09..5be464e2f2 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,5 +1,8 @@ 2009-08-08 Ulrich Drepper + * sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S (sem_timedwait): + Optimize code path used when FUTEX_CLOCK_REALTIME is supported. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S (__pthread_cond_wait): Optimize by avoiding use of callee-safe register. diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S index 95762834d3..0291beb169 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S @@ -65,7 +65,113 @@ sem_timedwait: retq /* Check whether the timeout value is valid. */ -1: pushq %r12 +1: cmpq $1000000000, 8(%rsi) + jae 6f + +#ifndef __ASSUME_FUTEX_CLOCK_REALTIME +# ifdef PIC + cmpl $0, __have_futex_clock_realtime(%rip) +# else + cmpl $0, __have_futex_clock_realtime +# endif + je .Lreltmo +#endif + + /* This push is only needed to store the sem_t pointer for the + exception handler. */ + pushq %rdi + cfi_adjust_cfa_offset(8) + + movq %rsi, %r10 + + LOCK + addq $1, NWAITERS(%rdi) + +.LcleanupSTART: +13: call __pthread_enable_asynccancel + movl %eax, %r8d + +#if VALUE != 0 + leaq VALUE(%rdi), %rdi +#endif + movl $0xffffffff, %r9d + movl $FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, %esi + orl PRIVATE(%rdi), %esi + movl $SYS_futex, %eax + xorl %edx, %edx + syscall + movq %rax, %r9 +#if VALUE != 0 + leaq -VALUE(%rdi), %rdi +#endif + + xchgq %r8, %rdi + call __pthread_disable_asynccancel +.LcleanupEND: + movq %r8, %rdi + + testq %r9, %r9 + je 11f + cmpq $-EWOULDBLOCK, %r9 + jne 3f + +11: +#if VALUE == 0 + movl (%rdi), %eax +#else + movl VALUE(%rdi), %eax +#endif +14: testl %eax, %eax + je 13b + + leaq -1(%rax), %rcx + LOCK +#if VALUE == 0 + cmpxchgl %ecx, (%rdi) +#else + cmpxchgl %ecx, VALUE(%rdi) +#endif + jne 14b + + xorl %eax, %eax + +15: LOCK + subq $1, NWAITERS(%rdi) + + leaq 8(%rsp), %rsp + cfi_adjust_cfa_offset(-8) + retq + + cfi_adjust_cfa_offset(8) +3: negq %r9 +#if USE___THREAD + movq errno@gottpoff(%rip), %rdx + movl %r9d, %fs:(%rdx) +#else + callq __errno_location@plt + movl %r9d, (%rax) +#endif + + orl $-1, %eax + jmp 15b + + cfi_adjust_cfa_offset(-8) +6: +#if USE___THREAD + movq errno@gottpoff(%rip), %rdx + movl $EINVAL, %fs:(%rdx) +#else + callq __errno_location@plt + movl $EINVAL, (%rax) +#endif + + orl $-1, %eax + + retq + +#ifndef __ASSUME_FUTEX_CLOCK_REALTIME +.Lreltmo: + pushq %r12 cfi_adjust_cfa_offset(8) cfi_rel_offset(%r12, 0) pushq %r13 @@ -74,6 +180,7 @@ sem_timedwait: pushq %r14 cfi_adjust_cfa_offset(8) cfi_rel_offset(%r14, 0) + #ifdef __ASSUME_FUTEX_CLOCK_REALTIME # define STACKFRAME 8 #else @@ -85,105 +192,9 @@ sem_timedwait: movq %rdi, %r12 movq %rsi, %r13 - /* Check for invalid nanosecond field. */ - cmpq $1000000000, 8(%r13) - movl $EINVAL, %r14d - jae 6f - LOCK addq $1, NWAITERS(%r12) -#ifndef __ASSUME_FUTEX_CLOCK_REALTIME -# ifdef PIC - cmpl $0, __have_futex_clock_realtime(%rip) -# else - cmpl $0, __have_futex_clock_realtime -# endif - je .Lreltmo -#endif - -.LcleanupSTART: -13: call __pthread_enable_asynccancel - movl %eax, (%rsp) - - movq %r13, %r10 -#if VALUE == 0 - movq %r12, %rdi -#else - leaq VALUE(%r12), %rdi -#endif - movl $0xffffffff, %r9d - movl $FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, %esi - orl PRIVATE(%rdi), %esi - movl $SYS_futex, %eax - xorl %edx, %edx - syscall - movq %rax, %r14 - - movl (%rsp), %edi - call __pthread_disable_asynccancel -.LcleanupEND: - - testq %r14, %r14 - je 11f - cmpq $-EWOULDBLOCK, %r14 - jne 3f - -11: -#if VALUE == 0 - movl (%r12), %eax -#else - movl VALUE(%r12), %eax -#endif -14: testl %eax, %eax - je 13b - - leaq -1(%rax), %rcx - LOCK -#if VALUE == 0 - cmpxchgl %ecx, (%r12) -#else - cmpxchgl %ecx, VALUE(%r12) -#endif - jne 14b - -10: xorl %eax, %eax - -15: LOCK - subq $1, NWAITERS(%r12) - - addq $STACKFRAME, %rsp - cfi_adjust_cfa_offset(-STACKFRAME) - popq %r14 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r14) - popq %r13 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r13) - popq %r12 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r12) - retq - - cfi_adjust_cfa_offset(STACKFRAME + 3 * 8) - cfi_rel_offset(%r12, STACKFRAME + 2 * 8) - cfi_rel_offset(%r13, STACKFRAME + 1 * 8) - cfi_rel_offset(%r14, STACKFRAME) -3: negq %r14 -6: -#if USE___THREAD - movq errno@gottpoff(%rip), %rdx - movl %r14d, %fs:(%rdx) -#else - callq __errno_location@plt - movl %r14d, (%rax) -#endif - - orl $-1, %eax - jmp 15b - -#ifndef __ASSUME_FUTEX_CLOCK_REALTIME -.Lreltmo: 7: xorl %esi, %esi movq %rsp, %rdi movq $VSYSCALL_ADDR_vgettimeofday, %rax @@ -202,7 +213,7 @@ sem_timedwait: decq %rdi 5: testq %rdi, %rdi movl $ETIMEDOUT, %r14d - js 6b /* Time is already up. */ + js 36f /* Time is already up. */ movq %rdi, (%rsp) /* Store relative timeout. */ movq %rsi, 8(%rsp) @@ -235,7 +246,7 @@ sem_timedwait: testq %r14, %r14 je 9f cmpq $-EWOULDBLOCK, %r14 - jne 3b + jne 33f 9: # if VALUE == 0 @@ -254,15 +265,54 @@ sem_timedwait: cmpxchgl %ecx, VALUE(%r12) # endif jne 8b - jmp 10b + + xorl %eax, %eax + +45: LOCK + subq $1, NWAITERS(%r12) + + addq $STACKFRAME, %rsp + cfi_adjust_cfa_offset(-STACKFRAME) + popq %r14 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r14) + popq %r13 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r13) + popq %r12 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r12) + retq + + cfi_adjust_cfa_offset(STACKFRAME + 3 * 8) + cfi_rel_offset(%r12, STACKFRAME + 2 * 8) + cfi_rel_offset(%r13, STACKFRAME + 1 * 8) + cfi_rel_offset(%r14, STACKFRAME) +33: negq %r14 +36: +#if USE___THREAD + movq errno@gottpoff(%rip), %rdx + movl %r14d, %fs:(%rdx) +#else + callq __errno_location@plt + movl %r14d, (%rax) #endif + + orl $-1, %eax + jmp 45b +#endif + cfi_endproc .size sem_timedwait,.-sem_timedwait .type sem_timedwait_cleanup,@function sem_timedwait_cleanup: + cfi_startproc + cfi_adjust_cfa_offset(8) + + movq (%rsp), %rdi LOCK - subq $1, NWAITERS(%r12) + subq $1, NWAITERS(%rdi) movq %rax, %rdi .LcallUR: call _Unwind_Resume@PLT @@ -272,6 +322,30 @@ sem_timedwait_cleanup: .size sem_timedwait_cleanup,.-sem_timedwait_cleanup +#ifndef __ASSUME_FUTEX_CLOCK_REALTIME + .type sem_timedwait_cleanup2,@function +sem_timedwait_cleanup2: + cfi_startproc + cfi_adjust_cfa_offset(STACKFRAME + 3 * 8) + cfi_rel_offset(%r12, STACKFRAME + 2 * 8) + cfi_rel_offset(%r13, STACKFRAME + 1 * 8) + cfi_rel_offset(%r14, STACKFRAME) + + LOCK + subq $1, NWAITERS(%r12) + movq %rax, %rdi + movq STACKFRAME(%rsp), %r14 + movq STACKFRAME+8(%rsp), %r13 + movq STACKFRAME+16(%rsp), %r12 +.LcallUR2: + call _Unwind_Resume@PLT + hlt +.LENDCODE2: + cfi_endproc + .size sem_timedwait_cleanup2,.-sem_timedwait_cleanup2 +#endif + + .section .gcc_except_table,"a",@progbits .LexceptSTART: .byte DW_EH_PE_omit # @LPStart format @@ -286,13 +360,19 @@ sem_timedwait_cleanup: #ifndef __ASSUME_FUTEX_CLOCK_REALTIME .uleb128 .LcleanupSTART2-.LSTARTCODE .uleb128 .LcleanupEND2-.LcleanupSTART2 - .uleb128 sem_timedwait_cleanup-.LSTARTCODE + .uleb128 sem_timedwait_cleanup2-.LSTARTCODE .uleb128 0 #endif .uleb128 .LcallUR-.LSTARTCODE .uleb128 .LENDCODE-.LcallUR .uleb128 0 .uleb128 0 +#ifndef __ASSUME_FUTEX_CLOCK_REALTIME + .uleb128 .LcallUR2-.LSTARTCODE + .uleb128 .LENDCODE2-.LcallUR2 + .uleb128 0 + .uleb128 0 +#endif .Lcstend: