Browse Source

sparc64: Stop using memory barriers for atomics and locks.

The kernel always executes in the TSO memory model now,
so none of this stuff is necessary any more.

With helpful feedback from Nick Piggin.

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 16 năm trước cách đây
mục cha
commit
293666b7a1

+ 0 - 7
arch/sparc/include/asm/atomic_64.h

@@ -112,17 +112,10 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
 
 /* Atomic operations are already serializing */
-#ifdef CONFIG_SMP
-#define smp_mb__before_atomic_dec()	membar_storeload_loadload();
-#define smp_mb__after_atomic_dec()	membar_storeload_storestore();
-#define smp_mb__before_atomic_inc()	membar_storeload_loadload();
-#define smp_mb__after_atomic_inc()	membar_storeload_storestore();
-#else
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
-#endif
 
 #include <asm-generic/atomic.h>
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */

+ 0 - 5
arch/sparc/include/asm/bitops_64.h

@@ -23,13 +23,8 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr);
 
 #include <asm-generic/bitops/non-atomic.h>
 
-#ifdef CONFIG_SMP
-#define smp_mb__before_clear_bit()	membar_storeload_loadload()
-#define smp_mb__after_clear_bit()	membar_storeload_storestore()
-#else
 #define smp_mb__before_clear_bit()	barrier()
 #define smp_mb__after_clear_bit()	barrier()
-#endif
 
 #include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/__ffs.h>

+ 0 - 14
arch/sparc/include/asm/spinlock_64.h

@@ -33,12 +33,10 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 
 	__asm__ __volatile__(
 "1:	ldstub		[%1], %0\n"
-"	membar		#StoreLoad | #StoreStore\n"
 "	brnz,pn		%0, 2f\n"
 "	 nop\n"
 "	.subsection	2\n"
 "2:	ldub		[%1], %0\n"
-"	membar		#LoadLoad\n"
 "	brnz,pt		%0, 2b\n"
 "	 nop\n"
 "	ba,a,pt		%%xcc, 1b\n"
@@ -54,7 +52,6 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 
 	__asm__ __volatile__(
 "	ldstub		[%1], %0\n"
-"	membar		#StoreLoad | #StoreStore"
 	: "=r" (result)
 	: "r" (lock)
 	: "memory");
@@ -65,7 +62,6 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
 	__asm__ __volatile__(
-"	membar		#StoreStore | #LoadStore\n"
 "	stb		%%g0, [%0]"
 	: /* No outputs */
 	: "r" (lock)
@@ -78,14 +74,12 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
 
 	__asm__ __volatile__(
 "1:	ldstub		[%2], %0\n"
-"	membar		#StoreLoad | #StoreStore\n"
 "	brnz,pn		%0, 2f\n"
 "	 nop\n"
 "	.subsection	2\n"
 "2:	rdpr		%%pil, %1\n"
 "	wrpr		%3, %%pil\n"
 "3:	ldub		[%2], %0\n"
-"	membar		#LoadLoad\n"
 "	brnz,pt		%0, 3b\n"
 "	 nop\n"
 "	ba,pt		%%xcc, 1b\n"
@@ -108,12 +102,10 @@ static void inline __read_lock(raw_rwlock_t *lock)
 "4:	 add		%0, 1, %1\n"
 "	cas		[%2], %0, %1\n"
 "	cmp		%0, %1\n"
-"	membar		#StoreLoad | #StoreStore\n"
 "	bne,pn		%%icc, 1b\n"
 "	 nop\n"
 "	.subsection	2\n"
 "2:	ldsw		[%2], %0\n"
-"	membar		#LoadLoad\n"
 "	brlz,pt		%0, 2b\n"
 "	 nop\n"
 "	ba,a,pt		%%xcc, 4b\n"
@@ -134,7 +126,6 @@ static int inline __read_trylock(raw_rwlock_t *lock)
 "	add		%0, 1, %1\n"
 "	cas		[%2], %0, %1\n"
 "	cmp		%0, %1\n"
-"	membar		#StoreLoad | #StoreStore\n"
 "	bne,pn		%%icc, 1b\n"
 "	 mov		1, %0\n"
 "2:"
@@ -150,7 +141,6 @@ static void inline __read_unlock(raw_rwlock_t *lock)
 	unsigned long tmp1, tmp2;
 
 	__asm__ __volatile__(
-"	membar	#StoreLoad | #LoadLoad\n"
 "1:	lduw	[%2], %0\n"
 "	sub	%0, 1, %1\n"
 "	cas	[%2], %0, %1\n"
@@ -174,12 +164,10 @@ static void inline __write_lock(raw_rwlock_t *lock)
 "4:	 or		%0, %3, %1\n"
 "	cas		[%2], %0, %1\n"
 "	cmp		%0, %1\n"
-"	membar		#StoreLoad | #StoreStore\n"
 "	bne,pn		%%icc, 1b\n"
 "	 nop\n"
 "	.subsection	2\n"
 "2:	lduw		[%2], %0\n"
-"	membar		#LoadLoad\n"
 "	brnz,pt		%0, 2b\n"
 "	 nop\n"
 "	ba,a,pt		%%xcc, 4b\n"
@@ -192,7 +180,6 @@ static void inline __write_lock(raw_rwlock_t *lock)
 static void inline __write_unlock(raw_rwlock_t *lock)
 {
 	__asm__ __volatile__(
-"	membar		#LoadStore | #StoreStore\n"
 "	stw		%%g0, [%0]"
 	: /* no outputs */
 	: "r" (lock)
@@ -212,7 +199,6 @@ static int inline __write_trylock(raw_rwlock_t *lock)
 "	 or		%0, %4, %1\n"
 "	cas		[%3], %0, %1\n"
 "	cmp		%0, %1\n"
-"	membar		#StoreLoad | #StoreStore\n"
 "	bne,pn		%%icc, 1b\n"
 "	 nop\n"
 "	mov		1, %2\n"

+ 8 - 27
arch/sparc/include/asm/system_64.h

@@ -59,20 +59,9 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 			     : : : "memory"); \
 } while (0)
 
-#define mb()	\
-	membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
-#define rmb()	\
-	membar_safe("#LoadLoad")
-#define wmb()	\
-	membar_safe("#StoreStore")
-#define membar_storeload() \
-	membar_safe("#StoreLoad")
-#define membar_storeload_storestore() \
-	membar_safe("#StoreLoad | #StoreStore")
-#define membar_storeload_loadload() \
-	membar_safe("#StoreLoad | #LoadLoad")
-#define membar_storestore_loadstore() \
-	membar_safe("#StoreStore | #LoadStore")
+#define mb()	membar_safe("#StoreLoad")
+#define rmb()	__asm__ __volatile__("":::"memory")
+#define wmb()	__asm__ __volatile__("":::"memory")
 
 #endif
 
@@ -80,20 +69,20 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 
 #define read_barrier_depends()		do { } while(0)
 #define set_mb(__var, __value) \
-	do { __var = __value; membar_storeload_storestore(); } while(0)
+	do { __var = __value; membar_safe("#StoreLoad"); } while(0)
 
 #ifdef CONFIG_SMP
 #define smp_mb()	mb()
 #define smp_rmb()	rmb()
 #define smp_wmb()	wmb()
-#define smp_read_barrier_depends()	read_barrier_depends()
 #else
 #define smp_mb()	__asm__ __volatile__("":::"memory")
 #define smp_rmb()	__asm__ __volatile__("":::"memory")
 #define smp_wmb()	__asm__ __volatile__("":::"memory")
-#define smp_read_barrier_depends()	do { } while(0)
 #endif
 
+#define smp_read_barrier_depends()	do { } while(0)
+
 #define flushi(addr)	__asm__ __volatile__ ("flush %0" : : "r" (addr) : "memory")
 
 #define flushw_all()	__asm__ __volatile__("flushw")
@@ -209,14 +198,12 @@ static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int va
 	unsigned long tmp1, tmp2;
 
 	__asm__ __volatile__(
-"	membar		#StoreLoad | #LoadLoad\n"
 "	mov		%0, %1\n"
 "1:	lduw		[%4], %2\n"
 "	cas		[%4], %2, %0\n"
 "	cmp		%2, %0\n"
 "	bne,a,pn	%%icc, 1b\n"
 "	 mov		%1, %0\n"
-"	membar		#StoreLoad | #StoreStore\n"
 	: "=&r" (val), "=&r" (tmp1), "=&r" (tmp2)
 	: "0" (val), "r" (m)
 	: "cc", "memory");
@@ -228,14 +215,12 @@ static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long
 	unsigned long tmp1, tmp2;
 
 	__asm__ __volatile__(
-"	membar		#StoreLoad | #LoadLoad\n"
 "	mov		%0, %1\n"
 "1:	ldx		[%4], %2\n"
 "	casx		[%4], %2, %0\n"
 "	cmp		%2, %0\n"
 "	bne,a,pn	%%xcc, 1b\n"
 "	 mov		%1, %0\n"
-"	membar		#StoreLoad | #StoreStore\n"
 	: "=&r" (val), "=&r" (tmp1), "=&r" (tmp2)
 	: "0" (val), "r" (m)
 	: "cc", "memory");
@@ -272,9 +257,7 @@ extern void die_if_kernel(char *str, struct pt_regs *regs) __attribute__ ((noret
 static inline unsigned long
 __cmpxchg_u32(volatile int *m, int old, int new)
 {
-	__asm__ __volatile__("membar #StoreLoad | #LoadLoad\n"
-			     "cas [%2], %3, %0\n\t"
-			     "membar #StoreLoad | #StoreStore"
+	__asm__ __volatile__("cas [%2], %3, %0"
 			     : "=&r" (new)
 			     : "0" (new), "r" (m), "r" (old)
 			     : "memory");
@@ -285,9 +268,7 @@ __cmpxchg_u32(volatile int *m, int old, int new)
 static inline unsigned long
 __cmpxchg_u64(volatile long *m, unsigned long old, unsigned long new)
 {
-	__asm__ __volatile__("membar #StoreLoad | #LoadLoad\n"
-			     "casx [%2], %3, %0\n\t"
-			     "membar #StoreLoad | #StoreStore"
+	__asm__ __volatile__("casx [%2], %3, %0"
 			     : "=&r" (new)
 			     : "0" (new), "r" (m), "r" (old)
 			     : "memory");

+ 0 - 6
arch/sparc/include/asm/tsb.h

@@ -50,8 +50,6 @@
 #define TSB_TAG_INVALID_BIT	46
 #define TSB_TAG_INVALID_HIGH	(1 << (TSB_TAG_INVALID_BIT - 32))
 
-#define TSB_MEMBAR	membar	#StoreStore
-
 /* Some cpus support physical address quad loads.  We want to use
  * those if possible so we don't need to hard-lock the TSB mapping
  * into the TLB.  We encode some instruction patching in order to
@@ -128,13 +126,11 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	cmp	REG1, REG2;		\
 	bne,pn	%icc, 99b;		\
 	 nop;				\
-	TSB_MEMBAR
 
 #define TSB_WRITE(TSB, TTE, TAG) \
 	add	TSB, 0x8, TSB;   \
 	TSB_STORE(TSB, TTE);     \
 	sub	TSB, 0x8, TSB;   \
-	TSB_MEMBAR;              \
 	TSB_STORE(TSB, TAG);
 
 #define KTSB_LOAD_QUAD(TSB, REG) \
@@ -153,13 +149,11 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	cmp	REG1, REG2;		\
 	bne,pn	%icc, 99b;		\
 	 nop;				\
-	TSB_MEMBAR
 
 #define KTSB_WRITE(TSB, TTE, TAG) \
 	add	TSB, 0x8, TSB;   \
 	stxa	TTE, [TSB] ASI_N;     \
 	sub	TSB, 0x8, TSB;   \
-	TSB_MEMBAR;              \
 	stxa	TAG, [TSB] ASI_N;
 
 	/* Do a kernel page table walk.  Leaves physical PTE pointer in

+ 5 - 6
arch/sparc64/kernel/smp.c

@@ -163,7 +163,7 @@ static inline long get_delta (long *rt, long *master)
 	for (i = 0; i < NUM_ITERS; i++) {
 		t0 = tick_ops->get_tick();
 		go[MASTER] = 1;
-		membar_storeload();
+		membar_safe("#StoreLoad");
 		while (!(tm = go[SLAVE]))
 			rmb();
 		go[SLAVE] = 0;
@@ -257,7 +257,7 @@ static void smp_synchronize_one_tick(int cpu)
 
 	/* now let the client proceed into his loop */
 	go[MASTER] = 0;
-	membar_storeload();
+	membar_safe("#StoreLoad");
 
 	spin_lock_irqsave(&itc_sync_lock, flags);
 	{
@@ -267,7 +267,7 @@ static void smp_synchronize_one_tick(int cpu)
 			go[MASTER] = 0;
 			wmb();
 			go[SLAVE] = tick_ops->get_tick();
-			membar_storeload();
+			membar_safe("#StoreLoad");
 		}
 	}
 	spin_unlock_irqrestore(&itc_sync_lock, flags);
@@ -1122,7 +1122,6 @@ void smp_capture(void)
 		       smp_processor_id());
 #endif
 		penguins_are_doing_time = 1;
-		membar_storestore_loadstore();
 		atomic_inc(&smp_capture_registry);
 		smp_cross_call(&xcall_capture, 0, 0, 0);
 		while (atomic_read(&smp_capture_registry) != ncpus)
@@ -1142,7 +1141,7 @@ void smp_release(void)
 		       smp_processor_id());
 #endif
 		penguins_are_doing_time = 0;
-		membar_storeload_storestore();
+		membar_safe("#StoreLoad");
 		atomic_dec(&smp_capture_registry);
 	}
 }
@@ -1161,7 +1160,7 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
 	__asm__ __volatile__("flushw");
 	prom_world(1);
 	atomic_inc(&smp_capture_registry);
-	membar_storeload_storestore();
+	membar_safe("#StoreLoad");
 	while (penguins_are_doing_time)
 		rmb();
 	atomic_dec(&smp_capture_registry);

+ 0 - 4
arch/sparc64/kernel/trampoline.S

@@ -109,7 +109,6 @@ startup_continue:
 	 */
 	sethi		%hi(prom_entry_lock), %g2
 1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
-	membar		#StoreLoad | #StoreStore
 	brnz,pn		%g1, 1b
 	 nop
 
@@ -214,7 +213,6 @@ startup_continue:
 
 	sethi		%hi(prom_entry_lock), %g2
 	stb		%g0, [%g2 + %lo(prom_entry_lock)]
-	membar		#StoreStore | #StoreLoad
 
 	ba,pt		%xcc, after_lock_tlb
 	 nop
@@ -330,7 +328,6 @@ after_lock_tlb:
 
 	sethi		%hi(prom_entry_lock), %g2
 1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
-	membar		#StoreLoad | #StoreStore
 	brnz,pn		%g1, 1b
 	 nop
 
@@ -394,7 +391,6 @@ after_lock_tlb:
 
 3:	sethi		%hi(prom_entry_lock), %g2
 	stb		%g0, [%g2 + %lo(prom_entry_lock)]
-	membar		#StoreStore | #StoreLoad
 
 	ldx		[%l0], %g6
 	ldx		[%g6 + TI_TASK], %g4

+ 0 - 1
arch/sparc64/kernel/traps.c

@@ -1371,7 +1371,6 @@ static int cheetah_fix_ce(unsigned long physaddr)
 	__asm__ __volatile__("ldxa	[%0] %3, %%g0\n\t"
 			     "ldxa	[%1] %3, %%g0\n\t"
 			     "casxa	[%2] %3, %%g0, %%g0\n\t"
-			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "ldxa	[%0] %3, %%g0\n\t"
 			     "ldxa	[%1] %3, %%g0\n\t"
 			     "membar	#Sync"

+ 3 - 3
arch/sparc64/kernel/tsb.S

@@ -317,7 +317,7 @@ tsb_flush:
 	srlx	%g1, 32, %o3
 	andcc	%o3, %g2, %g0
 	bne,pn	%icc, 1b
-	 membar	#LoadLoad
+	 nop
 	cmp	%g1, %o1
 	mov	1, %o3
 	bne,pt	%xcc, 2f
@@ -327,7 +327,7 @@ tsb_flush:
 	bne,pn	%xcc, 1b
 	 nop
 2:	retl
-	 TSB_MEMBAR
+	 nop
 	.size	tsb_flush, .-tsb_flush
 
 	/* Reload MMU related context switch state at
@@ -478,7 +478,7 @@ copy_tsb:		/* %o0=old_tsb_base, %o1=old_tsb_size
 	 nop
 
 	retl
-	 TSB_MEMBAR
+	 nop
 	.size		copy_tsb, .-copy_tsb
 
 	/* Set the invalid bit in all TSB entries.  */

+ 0 - 26
arch/sparc64/lib/atomic.S

@@ -43,29 +43,10 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
 	.size	atomic_sub, .-atomic_sub
 
-	/* On SMP we need to use memory barriers to ensure
-	 * correct memory operation ordering, nop these out
-	 * for uniprocessor.
-	 */
-#ifdef CONFIG_SMP
-
-#define ATOMIC_PRE_BARRIER	membar #StoreLoad | #LoadLoad;
-#define ATOMIC_POST_BARRIER	\
-	ba,pt %xcc, 80b;	\
-	membar #StoreLoad | #StoreStore
-
-80:	retl
-	 nop
-#else
-#define ATOMIC_PRE_BARRIER
-#define ATOMIC_POST_BARRIER
-#endif
-
 	.globl	atomic_add_ret
 	.type	atomic_add_ret,#function
 atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
-	ATOMIC_PRE_BARRIER
 1:	lduw	[%o1], %g1
 	add	%g1, %o0, %g7
 	cas	[%o1], %g1, %g7
@@ -73,7 +54,6 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	bne,pn	%icc, 2f
 	 add	%g7, %o0, %g7
 	sra	%g7, 0, %o0
-	ATOMIC_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
@@ -83,7 +63,6 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	.type	atomic_sub_ret,#function
 atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
-	ATOMIC_PRE_BARRIER
 1:	lduw	[%o1], %g1
 	sub	%g1, %o0, %g7
 	cas	[%o1], %g1, %g7
@@ -91,7 +70,6 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	bne,pn	%icc, 2f
 	 sub	%g7, %o0, %g7
 	sra	%g7, 0, %o0
-	ATOMIC_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
@@ -131,7 +109,6 @@ atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */
 	.type	atomic64_add_ret,#function
 atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
-	ATOMIC_PRE_BARRIER
 1:	ldx	[%o1], %g1
 	add	%g1, %o0, %g7
 	casx	[%o1], %g1, %g7
@@ -139,7 +116,6 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	bne,pn	%xcc, 2f
 	 add	%g7, %o0, %g7
 	mov	%g7, %o0
-	ATOMIC_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
@@ -149,7 +125,6 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	.type	atomic64_sub_ret,#function
 atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
-	ATOMIC_PRE_BARRIER
 1:	ldx	[%o1], %g1
 	sub	%g1, %o0, %g7
 	casx	[%o1], %g1, %g7
@@ -157,7 +132,6 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	bne,pn	%xcc, 2f
 	 sub	%g7, %o0, %g7
 	mov	%g7, %o0
-	ATOMIC_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)

+ 0 - 24
arch/sparc64/lib/bitops.S

@@ -8,29 +8,10 @@
 
 	.text
 
-	/* On SMP we need to use memory barriers to ensure
-	 * correct memory operation ordering, nop these out
-	 * for uniprocessor.
-	 */
-
-#ifdef CONFIG_SMP
-#define BITOP_PRE_BARRIER	membar #StoreLoad | #LoadLoad
-#define BITOP_POST_BARRIER	\
-	ba,pt	%xcc, 80b;	\
-	membar #StoreLoad | #StoreStore
-
-80:	retl
-	 nop
-#else
-#define BITOP_PRE_BARRIER
-#define BITOP_POST_BARRIER
-#endif
-
 	.globl	test_and_set_bit
 	.type	test_and_set_bit,#function
 test_and_set_bit:	/* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
-	BITOP_PRE_BARRIER
 	srlx	%o0, 6, %g1
 	mov	1, %o2
 	sllx	%g1, 3, %g3
@@ -45,7 +26,6 @@ test_and_set_bit:	/* %o0=nr, %o1=addr */
 	 and	%g7, %o2, %g2
 	clr	%o0
 	movrne	%g2, 1, %o0
-	BITOP_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
@@ -55,7 +35,6 @@ test_and_set_bit:	/* %o0=nr, %o1=addr */
 	.type	test_and_clear_bit,#function
 test_and_clear_bit:	/* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
-	BITOP_PRE_BARRIER
 	srlx	%o0, 6, %g1
 	mov	1, %o2
 	sllx	%g1, 3, %g3
@@ -70,7 +49,6 @@ test_and_clear_bit:	/* %o0=nr, %o1=addr */
 	 and	%g7, %o2, %g2
 	clr	%o0
 	movrne	%g2, 1, %o0
-	BITOP_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
@@ -80,7 +58,6 @@ test_and_clear_bit:	/* %o0=nr, %o1=addr */
 	.type	test_and_change_bit,#function
 test_and_change_bit:	/* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
-	BITOP_PRE_BARRIER
 	srlx	%o0, 6, %g1
 	mov	1, %o2
 	sllx	%g1, 3, %g3
@@ -95,7 +72,6 @@ test_and_change_bit:	/* %o0=nr, %o1=addr */
 	 and	%g7, %o2, %g2
 	clr	%o0
 	movrne	%g2, 1, %o0
-	BITOP_POST_BARRIER
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)

+ 0 - 7
arch/sparc64/lib/rwsem.S

@@ -17,7 +17,6 @@ __down_read:
 	bne,pn		%icc, 1b
 	 add		%g7, 1, %g7
 	cmp		%g7, 0
-	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	 nop
 2:
@@ -42,7 +41,6 @@ __down_read_trylock:
 	cmp		%g1, %g7
 	bne,pn		%icc, 1b
 	 mov		1, %o1
-	membar		#StoreLoad | #StoreStore
 2:	retl
 	 mov		%o1, %o0
 	.size		__down_read_trylock, .-__down_read_trylock
@@ -58,7 +56,6 @@ __down_write:
 	cmp		%g3, %g7
 	bne,pn		%icc, 1b
 	 cmp		%g7, 0
-	membar		#StoreLoad | #StoreStore
 	bne,pn		%icc, 3f
 	 nop
 2:	retl
@@ -85,7 +82,6 @@ __down_write_trylock:
 	cmp		%g3, %g7
 	bne,pn		%icc, 1b
 	 mov		1, %o1
-	membar		#StoreLoad | #StoreStore
 2:	retl
 	 mov		%o1, %o0
 	.size		__down_write_trylock, .-__down_write_trylock
@@ -99,7 +95,6 @@ __up_read:
 	cmp		%g1, %g7
 	bne,pn		%icc, 1b
 	 cmp		%g7, 0
-	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	 nop
 2:	retl
@@ -129,7 +124,6 @@ __up_write:
 	bne,pn		%icc, 1b
 	 sub		%g7, %g1, %g7
 	cmp		%g7, 0
-	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	 nop
 2:
@@ -155,7 +149,6 @@ __downgrade_write:
 	bne,pn		%icc, 1b
 	 sub		%g7, %g1, %g7
 	cmp		%g7, 0
-	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	 nop
 2:

+ 0 - 2
arch/sparc64/mm/init.c

@@ -214,7 +214,6 @@ static inline void set_dcache_dirty(struct page *page, int this_cpu)
 			     "or	%%g1, %0, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
-			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
 			     " nop"
 			     : /* no outputs */
@@ -236,7 +235,6 @@ static inline void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu)
 			     " andn	%%g7, %1, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
-			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
 			     " nop\n"
 			     "2:"

+ 1 - 3
arch/sparc64/mm/tsb.c

@@ -41,10 +41,8 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
 					      KERNEL_TSB_NENTRIES);
 		struct tsb *ent = &swapper_tsb[hash];
 
-		if (tag_compare(ent->tag, v)) {
+		if (tag_compare(ent->tag, v))
 			ent->tag = (1UL << TSB_TAG_INVALID_BIT);
-			membar_storeload_storestore();
-		}
 	}
 }
 

+ 0 - 2
arch/sparc64/mm/ultra.S

@@ -125,7 +125,6 @@ __spitfire_flush_tlb_mm_slow:
 	.align		32
 	.globl		__flush_icache_page
 __flush_icache_page:	/* %o0 = phys_page */
-	membar		#StoreStore
 	srlx		%o0, PAGE_SHIFT, %o0
 	sethi		%uhi(PAGE_OFFSET), %g1
 	sllx		%o0, PAGE_SHIFT, %o0
@@ -507,7 +506,6 @@ xcall_fetch_glob_regs:
 	sllx		%g2, TRAP_BLOCK_SZ_SHIFT, %g2
 	add		%g7, %g2, %g7
 	ldx		[%g7 + TRAP_PER_CPU_THREAD], %g3
-	membar		#StoreStore
 	stx		%g3, [%g1 + GR_SNAP_THREAD]
 	retry