12 years ago · bdbf0a4cf2
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -12,6 +12,7 @@
 
				 #define __ASM_ARM_ATOMIC_H
			
 
				 
			
 
				 #include <linux/compiler.h>
			
 
				+#include <linux/prefetch.h>
			
 
				 #include <linux/types.h>
			
 
				 #include <linux/irqflags.h>
			
 
				 #include <asm/barrier.h>
			
@@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v)
 
				 	unsigned long tmp;
			
 
				 	int result;
			
 
				 
			
 
				+	prefetchw(&v->counter);
			
 
				 	__asm__ __volatile__("@ atomic_add\n"
			
 
				 "1:	ldrex	%0, [%3]\n"
			
 
				 "	add	%0, %0, %4\n"
			
@@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v)
 
				 	unsigned long tmp;
			
 
				 	int result;
			
 
				 
			
 
				+	prefetchw(&v->counter);
			
 
				 	__asm__ __volatile__("@ atomic_sub\n"
			
 
				 "1:	ldrex	%0, [%3]\n"
			
 
				 "	sub	%0, %0, %4\n"
			
@@ -138,6 +141,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 
				 {
			
 
				 	unsigned long tmp, tmp2;
			
 
				 
			
 
				+	prefetchw(addr);
			
 
				 	__asm__ __volatile__("@ atomic_clear_mask\n"
			
 
				 "1:	ldrex	%0, [%3]\n"
			
 
				 "	bic	%0, %0, %4\n"
			
@@ -283,6 +287,7 @@ static inline void atomic64_set(atomic64_t *v, u64 i)
 
				 {
			
 
				 	u64 tmp;
			
 
				 
			
 
				+	prefetchw(&v->counter);
			
 
				 	__asm__ __volatile__("@ atomic64_set\n"
			
 
				 "1:	ldrexd	%0, %H0, [%2]\n"
			
 
				 "	strexd	%0, %3, %H3, [%2]\n"
			
@@ -299,6 +304,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
 
				 	u64 result;
			
 
				 	unsigned long tmp;
			
 
				 
			
 
				+	prefetchw(&v->counter);
			
 
				 	__asm__ __volatile__("@ atomic64_add\n"
			
 
				 "1:	ldrexd	%0, %H0, [%3]\n"
			
 
				 "	adds	%0, %0, %4\n"
			
@@ -339,6 +345,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
 
				 	u64 result;
			
 
				 	unsigned long tmp;
			
 
				 
			
 
				+	prefetchw(&v->counter);
			
 
				 	__asm__ __volatile__("@ atomic64_sub\n"
			
 
				 "1:	ldrexd	%0, %H0, [%3]\n"
			
 
				 "	subs	%0, %0, %4\n"
			
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -22,6 +22,7 @@
 
				 #include <asm/hw_breakpoint.h>
			
 
				 #include <asm/ptrace.h>
			
 
				 #include <asm/types.h>
			
 
				+#include <asm/unified.h>
			
 
				 
			
 
				 #ifdef __KERNEL__
			
 
				 #define STACK_TOP	((current->personality & ADDR_LIMIT_32BIT) ? \
			
@@ -87,6 +88,17 @@ unsigned long get_wchan(struct task_struct *p);
 
				 #define KSTK_EIP(tsk)	task_pt_regs(tsk)->ARM_pc
			
 
				 #define KSTK_ESP(tsk)	task_pt_regs(tsk)->ARM_sp
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				+#define __ALT_SMP_ASM(smp, up)						\
			
 
				+	"9998:	" smp "\n"						\
			
 
				+	"	.pushsection \".alt.smp.init\", \"a\"\n"		\
			
 
				+	"	.long	9998b\n"					\
			
 
				+	"	" up "\n"						\
			
 
				+	"	.popsection\n"
			
 
				+#else
			
 
				+#define __ALT_SMP_ASM(smp, up)	up
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * Prefetching support - only ARMv5.
			
 
				  */
			
@@ -97,17 +109,22 @@ static inline void prefetch(const void *ptr)
 
				 {
			
 
				 	__asm__ __volatile__(
			
 
				 		"pld\t%a0"
			
 
				-		:
			
 
				-		: "p" (ptr)
			
 
				-		: "cc");
			
 
				+		:: "p" (ptr));
			
 
				 }
			
 
				 
			
 
				+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
			
 
				 #define ARCH_HAS_PREFETCHW
			
 
				-#define prefetchw(ptr)	prefetch(ptr)
			
 
				-
			
 
				-#define ARCH_HAS_SPINLOCK_PREFETCH
			
 
				-#define spin_lock_prefetch(x) do { } while (0)
			
 
				-
			
 
				+static inline void prefetchw(const void *ptr)
			
 
				+{
			
 
				+	__asm__ __volatile__(
			
 
				+		".arch_extension	mp\n"
			
 
				+		__ALT_SMP_ASM(
			
 
				+			WASM(pldw)		"\t%a0",
			
 
				+			WASM(pld)		"\t%a0"
			
 
				+		)
			
 
				+		:: "p" (ptr));
			
 
				+}
			
 
				+#endif
			
 
				 #endif
			
 
				 
			
 
				 #define HAVE_ARCH_PICK_MMAP_LAYOUT
			
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -5,21 +5,13 @@
 
				 #error SMP not supported on pre-ARMv6 CPUs
			
 
				 #endif
			
 
				 
			
 
				-#include <asm/processor.h>
			
 
				+#include <linux/prefetch.h>
			
 
				 
			
 
				 /*
			
 
				  * sev and wfe are ARMv6K extensions.  Uniprocessor ARMv6 may not have the K
			
 
				  * extensions, so when running on UP, we have to patch these instructions away.
			
 
				  */
			
 
				-#define ALT_SMP(smp, up)					\
			
 
				-	"9998:	" smp "\n"					\
			
 
				-	"	.pushsection \".alt.smp.init\", \"a\"\n"	\
			
 
				-	"	.long	9998b\n"				\
			
 
				-	"	" up "\n"					\
			
 
				-	"	.popsection\n"
			
 
				-
			
 
				 #ifdef CONFIG_THUMB2_KERNEL
			
 
				-#define SEV		ALT_SMP("sev.w", "nop.w")
			
 
				 /*
			
 
				  * For Thumb-2, special care is needed to ensure that the conditional WFE
			
 
				  * instruction really does assemble to exactly 4 bytes (as required by
			
@@ -31,17 +23,18 @@
 
				  * the assembler won't change IT instructions which are explicitly present
			
 
				  * in the input.
			
 
				  */
			
 
				-#define WFE(cond)	ALT_SMP(		\
			
 
				+#define WFE(cond)	__ALT_SMP_ASM(		\
			
 
				 	"it " cond "\n\t"			\
			
 
				 	"wfe" cond ".n",			\
			
 
				 						\
			
 
				 	"nop.w"					\
			
 
				 )
			
 
				 #else
			
 
				-#define SEV		ALT_SMP("sev", "nop")
			
 
				-#define WFE(cond)	ALT_SMP("wfe" cond, "nop")
			
 
				+#define WFE(cond)	__ALT_SMP_ASM("wfe" cond, "nop")
			
 
				 #endif
			
 
				 
			
 
				+#define SEV		__ALT_SMP_ASM(WASM(sev), WASM(nop))
			
 
				+
			
 
				 static inline void dsb_sev(void)
			
 
				 {
			
 
				 #if __LINUX_ARM_ARCH__ >= 7
			
@@ -77,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 
				 	u32 newval;
			
 
				 	arch_spinlock_t lockval;
			
 
				 
			
 
				+	prefetchw(&lock->slock);
			
 
				 	__asm__ __volatile__(
			
 
				 "1:	ldrex	%0, [%3]\n"
			
 
				 "	add	%1, %0, %4\n"
			
@@ -100,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
				 	unsigned long contended, res;
			
 
				 	u32 slock;
			
 
				 
			
 
				+	prefetchw(&lock->slock);
			
 
				 	do {
			
 
				 		__asm__ __volatile__(
			
 
				 		"	ldrex	%0, [%3]\n"
			
@@ -152,6 +147,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 
				 {
			
 
				 	unsigned long tmp;
			
 
				 
			
 
				+	prefetchw(&rw->lock);
			
 
				 	__asm__ __volatile__(
			
 
				 "1:	ldrex	%0, [%1]\n"
			
 
				 "	teq	%0, #0\n"
			
@@ -170,6 +166,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 
				 {
			
 
				 	unsigned long contended, res;
			
 
				 
			
 
				+	prefetchw(&rw->lock);
			
 
				 	do {
			
 
				 		__asm__ __volatile__(
			
 
				 		"	ldrex	%0, [%2]\n"
			
@@ -203,7 +200,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 
				 }
			
 
				 
			
 
				 /* write_can_lock - would write_trylock() succeed? */
			
 
				-#define arch_write_can_lock(x)		((x)->lock == 0)
			
 
				+#define arch_write_can_lock(x)		(ACCESS_ONCE((x)->lock) == 0)
			
 
				 
			
 
				 /*
			
 
				  * Read locks are a bit more hairy:
			
@@ -221,6 +218,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 
				 {
			
 
				 	unsigned long tmp, tmp2;
			
 
				 
			
 
				+	prefetchw(&rw->lock);
			
 
				 	__asm__ __volatile__(
			
 
				 "1:	ldrex	%0, [%2]\n"
			
 
				 "	adds	%0, %0, #1\n"
			
@@ -241,6 +239,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 
				 
			
 
				 	smp_mb();
			
 
				 
			
 
				+	prefetchw(&rw->lock);
			
 
				 	__asm__ __volatile__(
			
 
				 "1:	ldrex	%0, [%2]\n"
			
 
				 "	sub	%0, %0, #1\n"
			
@@ -259,6 +258,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 
				 {
			
 
				 	unsigned long contended, res;
			
 
				 
			
 
				+	prefetchw(&rw->lock);
			
 
				 	do {
			
 
				 		__asm__ __volatile__(
			
 
				 		"	ldrex	%0, [%2]\n"
			
@@ -280,7 +280,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 
				 }
			
 
				 
			
 
				 /* read_can_lock - would read_trylock() succeed? */
			
 
				-#define arch_read_can_lock(x)		((x)->lock < 0x80000000)
			
 
				+#define arch_read_can_lock(x)		(ACCESS_ONCE((x)->lock) < 0x80000000)
			
 
				 
			
 
				 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
			
 
				 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
			
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -25,7 +25,7 @@ typedef struct {
 
				 #define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
			
 
				 
			
 
				 typedef struct {
			
 
				-	volatile unsigned int lock;
			
 
				+	u32 lock;
			
 
				 } arch_rwlock_t;
			
 
				 
			
 
				 #define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
			
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -38,6 +38,8 @@
 
				 #ifdef __ASSEMBLY__
			
 
				 #define W(instr)	instr.w
			
 
				 #define BSYM(sym)	sym + 1
			
 
				+#else
			
 
				+#define WASM(instr)	#instr ".w"
			
 
				 #endif
			
 
				 
			
 
				 #else	/* !CONFIG_THUMB2_KERNEL */
			
@@ -50,6 +52,8 @@
 
				 #ifdef __ASSEMBLY__
			
 
				 #define W(instr)	instr
			
 
				 #define BSYM(sym)	sym
			
 
				+#else
			
 
				+#define WASM(instr)	#instr
			
 
				 #endif
			
 
				 
			
 
				 #endif	/* CONFIG_THUMB2_KERNEL */
			
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -10,6 +10,11 @@ UNWIND(	.fnstart	)
 
				 	and	r3, r0, #31		@ Get bit offset
			
 
				 	mov	r0, r0, lsr #5
			
 
				 	add	r1, r1, r0, lsl #2	@ Get word offset
			
 
				+#if __LINUX_ARM_ARCH__ >= 7
			
 
				+	.arch_extension	mp
			
 
				+	ALT_SMP(W(pldw)	[r1])
			
 
				+	ALT_UP(W(nop))
			
 
				+#endif
			
 
				 	mov	r3, r2, lsl r3
			
 
				 1:	ldrex	r2, [r1]
			
 
				 	\instr	r2, r2, r3