From f252ffd50c97dae87b45f1dbad24f71358ccfbd6 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 8 Jan 2010 17:17:43 -0800 Subject: MIPS: New macro smp_mb__before_llsc. Replace some instances of smp_llsc_mb() with a new macro smp_mb__before_llsc(). It is used before ll/sc sequences that are documented as needing write barrier semantics. The default implementation of smp_mb__before_llsc() is just smp_llsc_mb(), so there are no changes in semantics. Also simplify definition of smp_mb(), smp_rmb(), and smp_wmb() to be just barrier() in the non-SMP case. Signed-off-by: David Daney To: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/851/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/spinlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/mips/include/asm/spinlock.h') diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 21ef9efbde43..5f16696eaa00 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -138,7 +138,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) { int tmp; - smp_llsc_mb(); + smp_mb__before_llsc(); if (R10000_LLSC_WAR) { __asm__ __volatile__ ( @@ -305,7 +305,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) { unsigned int tmp; - smp_llsc_mb(); + smp_mb__before_llsc(); if (R10000_LLSC_WAR) { __asm__ __volatile__( -- cgit v1.2.3 From 500c2e1fdbcc2b273bd4c695a9b8ac8196f61614 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 4 Feb 2010 11:31:49 -0800 Subject: MIPS: Optimize spinlocks. The current locking mechanism uses a ll/sc sequence to release a spinlock. This is slower than a wmb() followed by a store to unlock. The branching forward to .subsection 2 on sc failure slows down the contended case. So we get rid of that part too. Since we are now working on naturally aligned u16 values, we can get rid of a masking operation as the LHU already does the right thing. The ANDI are reversed for better scheduling on multi-issue CPUs On a 12 CPU 750MHz Octeon cn5750 this patch improves ipv4 UDP packet forwarding rates from 3.58*10^6 PPS to 3.99*10^6 PPS, or about 11%. Signed-off-by: David Daney To: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/937/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/spinlock.h | 118 ++++++++++++++------------------------- 1 file changed, 43 insertions(+), 75 deletions(-) (limited to 'arch/mips/include/asm/spinlock.h') diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 5f16696eaa00..396e402fbe2c 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -36,9 +36,9 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - unsigned int counters = ACCESS_ONCE(lock->lock); + u32 counters = ACCESS_ONCE(lock->lock); - return ((counters >> 14) ^ counters) & 0x1fff; + return ((counters >> 16) ^ counters) & 0xffff; } #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) @@ -47,9 +47,9 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock) static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - unsigned int counters = ACCESS_ONCE(lock->lock); + u32 counters = ACCESS_ONCE(lock->lock); - return (((counters >> 14) - counters) & 0x1fff) > 1; + return (((counters >> 16) - counters) & 0xffff) > 1; } #define arch_spin_is_contended arch_spin_is_contended @@ -57,6 +57,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { int my_ticket; int tmp; + int inc = 0x10000; if (R10000_LLSC_WAR) { __asm__ __volatile__ ( @@ -64,25 +65,24 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) " .set noreorder \n" " \n" "1: ll %[ticket], %[ticket_ptr] \n" - " addiu %[my_ticket], %[ticket], 0x4000 \n" + " addu %[my_ticket], %[ticket], %[inc] \n" " sc %[my_ticket], %[ticket_ptr] \n" " beqzl %[my_ticket], 1b \n" " nop \n" - " srl %[my_ticket], %[ticket], 14 \n" - " andi %[my_ticket], %[my_ticket], 0x1fff \n" - " andi %[ticket], %[ticket], 0x1fff \n" + " srl %[my_ticket], %[ticket], 16 \n" + " andi %[ticket], %[ticket], 0xffff \n" + " andi %[my_ticket], %[my_ticket], 0xffff \n" " bne %[ticket], %[my_ticket], 4f \n" " subu %[ticket], %[my_ticket], %[ticket] \n" "2: \n" " .subsection 2 \n" - "4: andi %[ticket], %[ticket], 0x1fff \n" + "4: andi %[ticket], %[ticket], 0xffff \n" " sll %[ticket], 5 \n" " \n" "6: bnez %[ticket], 6b \n" " subu %[ticket], 1 \n" " \n" - " lw %[ticket], %[ticket_ptr] \n" - " andi %[ticket], %[ticket], 0x1fff \n" + " lhu %[ticket], %[serving_now_ptr] \n" " beq %[ticket], %[my_ticket], 2b \n" " subu %[ticket], %[my_ticket], %[ticket] \n" " b 4b \n" @@ -90,36 +90,33 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) " .previous \n" " .set pop \n" : [ticket_ptr] "+m" (lock->lock), + [serving_now_ptr] "+m" (lock->h.serving_now), [ticket] "=&r" (tmp), - [my_ticket] "=&r" (my_ticket)); + [my_ticket] "=&r" (my_ticket) + : [inc] "r" (inc)); } else { __asm__ __volatile__ ( " .set push # arch_spin_lock \n" " .set noreorder \n" " \n" - " ll %[ticket], %[ticket_ptr] \n" - "1: addiu %[my_ticket], %[ticket], 0x4000 \n" + "1: ll %[ticket], %[ticket_ptr] \n" + " addu %[my_ticket], %[ticket], %[inc] \n" " sc %[my_ticket], %[ticket_ptr] \n" - " beqz %[my_ticket], 3f \n" - " nop \n" - " srl %[my_ticket], %[ticket], 14 \n" - " andi %[my_ticket], %[my_ticket], 0x1fff \n" - " andi %[ticket], %[ticket], 0x1fff \n" + " beqz %[my_ticket], 1b \n" + " srl %[my_ticket], %[ticket], 16 \n" + " andi %[ticket], %[ticket], 0xffff \n" + " andi %[my_ticket], %[my_ticket], 0xffff \n" " bne %[ticket], %[my_ticket], 4f \n" " subu %[ticket], %[my_ticket], %[ticket] \n" "2: \n" " .subsection 2 \n" - "3: b 1b \n" - " ll %[ticket], %[ticket_ptr] \n" - " \n" "4: andi %[ticket], %[ticket], 0x1fff \n" " sll %[ticket], 5 \n" " \n" "6: bnez %[ticket], 6b \n" " subu %[ticket], 1 \n" " \n" - " lw %[ticket], %[ticket_ptr] \n" - " andi %[ticket], %[ticket], 0x1fff \n" + " lhu %[ticket], %[serving_now_ptr] \n" " beq %[ticket], %[my_ticket], 2b \n" " subu %[ticket], %[my_ticket], %[ticket] \n" " b 4b \n" @@ -127,8 +124,10 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) " .previous \n" " .set pop \n" : [ticket_ptr] "+m" (lock->lock), + [serving_now_ptr] "+m" (lock->h.serving_now), [ticket] "=&r" (tmp), - [my_ticket] "=&r" (my_ticket)); + [my_ticket] "=&r" (my_ticket) + : [inc] "r" (inc)); } smp_llsc_mb(); @@ -136,47 +135,16 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { - int tmp; - - smp_mb__before_llsc(); - - if (R10000_LLSC_WAR) { - __asm__ __volatile__ ( - " # arch_spin_unlock \n" - "1: ll %[ticket], %[ticket_ptr] \n" - " addiu %[ticket], %[ticket], 1 \n" - " ori %[ticket], %[ticket], 0x2000 \n" - " xori %[ticket], %[ticket], 0x2000 \n" - " sc %[ticket], %[ticket_ptr] \n" - " beqzl %[ticket], 1b \n" - : [ticket_ptr] "+m" (lock->lock), - [ticket] "=&r" (tmp)); - } else { - __asm__ __volatile__ ( - " .set push # arch_spin_unlock \n" - " .set noreorder \n" - " \n" - " ll %[ticket], %[ticket_ptr] \n" - "1: addiu %[ticket], %[ticket], 1 \n" - " ori %[ticket], %[ticket], 0x2000 \n" - " xori %[ticket], %[ticket], 0x2000 \n" - " sc %[ticket], %[ticket_ptr] \n" - " beqz %[ticket], 2f \n" - " nop \n" - " \n" - " .subsection 2 \n" - "2: b 1b \n" - " ll %[ticket], %[ticket_ptr] \n" - " .previous \n" - " .set pop \n" - : [ticket_ptr] "+m" (lock->lock), - [ticket] "=&r" (tmp)); - } + unsigned int serving_now = lock->h.serving_now + 1; + wmb(); + lock->h.serving_now = (u16)serving_now; + nudge_writes(); } static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) { int tmp, tmp2, tmp3; + int inc = 0x10000; if (R10000_LLSC_WAR) { __asm__ __volatile__ ( @@ -184,11 +152,11 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) " .set noreorder \n" " \n" "1: ll %[ticket], %[ticket_ptr] \n" - " srl %[my_ticket], %[ticket], 14 \n" - " andi %[my_ticket], %[my_ticket], 0x1fff \n" - " andi %[now_serving], %[ticket], 0x1fff \n" + " srl %[my_ticket], %[ticket], 16 \n" + " andi %[my_ticket], %[my_ticket], 0xffff \n" + " andi %[now_serving], %[ticket], 0xffff \n" " bne %[my_ticket], %[now_serving], 3f \n" - " addiu %[ticket], %[ticket], 0x4000 \n" + " addu %[ticket], %[ticket], %[inc] \n" " sc %[ticket], %[ticket_ptr] \n" " beqzl %[ticket], 1b \n" " li %[ticket], 1 \n" @@ -201,33 +169,33 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) : [ticket_ptr] "+m" (lock->lock), [ticket] "=&r" (tmp), [my_ticket] "=&r" (tmp2), - [now_serving] "=&r" (tmp3)); + [now_serving] "=&r" (tmp3) + : [inc] "r" (inc)); } else { __asm__ __volatile__ ( " .set push # arch_spin_trylock \n" " .set noreorder \n" " \n" - " ll %[ticket], %[ticket_ptr] \n" - "1: srl %[my_ticket], %[ticket], 14 \n" - " andi %[my_ticket], %[my_ticket], 0x1fff \n" - " andi %[now_serving], %[ticket], 0x1fff \n" + "1: ll %[ticket], %[ticket_ptr] \n" + " srl %[my_ticket], %[ticket], 16 \n" + " andi %[my_ticket], %[my_ticket], 0xffff \n" + " andi %[now_serving], %[ticket], 0xffff \n" " bne %[my_ticket], %[now_serving], 3f \n" - " addiu %[ticket], %[ticket], 0x4000 \n" + " addu %[ticket], %[ticket], %[inc] \n" " sc %[ticket], %[ticket_ptr] \n" - " beqz %[ticket], 4f \n" + " beqz %[ticket], 1b \n" " li %[ticket], 1 \n" "2: \n" " .subsection 2 \n" "3: b 2b \n" " li %[ticket], 0 \n" - "4: b 1b \n" - " ll %[ticket], %[ticket_ptr] \n" " .previous \n" " .set pop \n" : [ticket_ptr] "+m" (lock->lock), [ticket] "=&r" (tmp), [my_ticket] "=&r" (tmp2), - [now_serving] "=&r" (tmp3)); + [now_serving] "=&r" (tmp3) + : [inc] "r" (inc)); } smp_llsc_mb(); -- cgit v1.2.3