From ec5380c768864c7afd92aa886dd4bb6d38497a01 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 7 Jan 2010 14:33:30 -0800 Subject: MIPS: Remove unused macros from barrier.h The smp_llsc_rmb() and smp_llsc_wmb() macros are not used in the tree, remove them. Signed-off-by: David Daney To: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/848/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/barrier.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/mips/include/asm/barrier.h') diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 8e9ac313ca3b..91785dc8e94e 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -149,7 +149,5 @@ do { var = value; smp_mb(); } while (0) #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") -#define smp_llsc_rmb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") -#define smp_llsc_wmb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") #endif /* __ASM_BARRIER_H */ -- cgit v1.2.3 From f252ffd50c97dae87b45f1dbad24f71358ccfbd6 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 8 Jan 2010 17:17:43 -0800 Subject: MIPS: New macro smp_mb__before_llsc. Replace some instances of smp_llsc_mb() with a new macro smp_mb__before_llsc(). It is used before ll/sc sequences that are documented as needing write barrier semantics. The default implementation of smp_mb__before_llsc() is just smp_llsc_mb(), so there are no changes in semantics. Also simplify definition of smp_mb(), smp_rmb(), and smp_wmb() to be just barrier() in the non-SMP case. Signed-off-by: David Daney To: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/851/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/barrier.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/mips/include/asm/barrier.h') diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 91785dc8e94e..1a5a51c3e96f 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -131,23 +131,26 @@ #endif /* !CONFIG_CPU_HAS_WB */ #if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) -#define __WEAK_ORDERING_MB " sync \n" +#define smp_mb() __asm__ __volatile__("sync" : : :"memory") +#define smp_rmb() __asm__ __volatile__("sync" : : :"memory") +#define smp_wmb() __asm__ __volatile__("sync" : : :"memory") #else -#define __WEAK_ORDERING_MB " \n" +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() #endif + #if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP) #define __WEAK_LLSC_MB " sync \n" #else #define __WEAK_LLSC_MB " \n" #endif -#define smp_mb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") -#define smp_rmb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") -#define smp_wmb() __asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory") - #define set_mb(var, value) \ do { var = value; smp_mb(); } while (0) #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") +#define smp_mb__before_llsc() smp_llsc_mb() + #endif /* __ASM_BARRIER_H */ -- cgit v1.2.3 From 6b07d38aaa520cee922fadfeaf90c97faf217045 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 8 Jan 2010 17:17:44 -0800 Subject: MIPS: Octeon: Use optimized memory barrier primitives. In order to achieve correct synchronization semantics, the Octeon port had defined CONFIG_WEAK_REORDERING_BEYOND_LLSC. This resulted in code that looks like: sync ll ... . . . sc ... . . sync The second SYNC was redundant, but harmless. Octeon has a SYNCW instruction that acts as a write-memory-barrier (due to an erratum in some parts two SYNCW are used). It is much faster than SYNC because it imposes ordering on the writes, but doesn't otherwise stall the execution pipeline. On Octeon, SYNC stalls execution until all preceeding writes are committed to the coherent memory system. Using: syncw;syncw ll . . . sc . . Has identical semantics to the first sequence, but is much faster. The SYNCW orders the writes, and the SC will not complete successfully until the write is committed to the coherent memory system. So at the end all preceeding writes have been committed. Since Octeon does not do speculative reads, this functions as a full barrier. The patch removes CONFIG_WEAK_REORDERING_BEYOND_LLSC, and substitutes SYNCW for SYNC in write-memory-barriers. Signed-off-by: David Daney To: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/850/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/barrier.h | 43 +++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) (limited to 'arch/mips/include/asm/barrier.h') diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 1a5a51c3e96f..a2670a239e0c 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -88,12 +88,20 @@ : /* no output */ \ : "m" (*(int *)CKSEG1) \ : "memory") - -#define fast_wmb() __sync() -#define fast_rmb() __sync() -#define fast_mb() __sync() -#ifdef CONFIG_SGI_IP28 -#define fast_iob() \ +#ifdef CONFIG_CPU_CAVIUM_OCTEON +# define OCTEON_SYNCW_STR ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n" +# define __syncw() __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory") + +# define fast_wmb() __syncw() +# define fast_rmb() barrier() +# define fast_mb() __sync() +# define fast_iob() do { } while (0) +#else /* ! CONFIG_CPU_CAVIUM_OCTEON */ +# define fast_wmb() __sync() +# define fast_rmb() __sync() +# define fast_mb() __sync() +# ifdef CONFIG_SGI_IP28 +# define fast_iob() \ __asm__ __volatile__( \ ".set push\n\t" \ ".set noreorder\n\t" \ @@ -104,13 +112,14 @@ : /* no output */ \ : "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \ : "memory") -#else -#define fast_iob() \ +# else +# define fast_iob() \ do { \ __sync(); \ __fast_iob(); \ } while (0) -#endif +# endif +#endif /* CONFIG_CPU_CAVIUM_OCTEON */ #ifdef CONFIG_CPU_HAS_WB @@ -131,9 +140,15 @@ #endif /* !CONFIG_CPU_HAS_WB */ #if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP) -#define smp_mb() __asm__ __volatile__("sync" : : :"memory") -#define smp_rmb() __asm__ __volatile__("sync" : : :"memory") -#define smp_wmb() __asm__ __volatile__("sync" : : :"memory") +# ifdef CONFIG_CPU_CAVIUM_OCTEON +# define smp_mb() __sync() +# define smp_rmb() barrier() +# define smp_wmb() __syncw() +# else +# define smp_mb() __asm__ __volatile__("sync" : : :"memory") +# define smp_rmb() __asm__ __volatile__("sync" : : :"memory") +# define smp_wmb() __asm__ __volatile__("sync" : : :"memory") +# endif #else #define smp_mb() barrier() #define smp_rmb() barrier() @@ -151,6 +166,10 @@ #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") +#ifdef CONFIG_CPU_CAVIUM_OCTEON +#define smp_mb__before_llsc() smp_wmb() +#else #define smp_mb__before_llsc() smp_llsc_mb() +#endif #endif /* __ASM_BARRIER_H */ -- cgit v1.2.3 From 500c2e1fdbcc2b273bd4c695a9b8ac8196f61614 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 4 Feb 2010 11:31:49 -0800 Subject: MIPS: Optimize spinlocks. The current locking mechanism uses a ll/sc sequence to release a spinlock. This is slower than a wmb() followed by a store to unlock. The branching forward to .subsection 2 on sc failure slows down the contended case. So we get rid of that part too. Since we are now working on naturally aligned u16 values, we can get rid of a masking operation as the LHU already does the right thing. The ANDI are reversed for better scheduling on multi-issue CPUs On a 12 CPU 750MHz Octeon cn5750 this patch improves ipv4 UDP packet forwarding rates from 3.58*10^6 PPS to 3.99*10^6 PPS, or about 11%. Signed-off-by: David Daney To: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/937/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/barrier.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/mips/include/asm/barrier.h') diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index a2670a239e0c..c0884f02d3a6 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -168,8 +168,14 @@ #ifdef CONFIG_CPU_CAVIUM_OCTEON #define smp_mb__before_llsc() smp_wmb() +/* Cause previous writes to become visible on all CPUs as soon as possible */ +#define nudge_writes() __asm__ __volatile__(".set push\n\t" \ + ".set arch=octeon\n\t" \ + "syncw\n\t" \ + ".set pop" : : : "memory") #else #define smp_mb__before_llsc() smp_llsc_mb() +#define nudge_writes() mb() #endif #endif /* __ASM_BARRIER_H */ -- cgit v1.2.3