14 years ago · b9ec40af0e
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -451,6 +451,26 @@ do {									\
 
				 #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
			
 
				 #endif /* !CONFIG_M386 */
			
 
				 
			
 
				+#ifdef CONFIG_X86_CMPXCHG64
			
 
				+#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)			\
			
 
				+({									\
			
 
				+	char __ret;							\
			
 
				+	typeof(o1) __o1 = o1;						\
			
 
				+	typeof(o1) __n1 = n1;						\
			
 
				+	typeof(o2) __o2 = o2;						\
			
 
				+	typeof(o2) __n2 = n2;						\
			
 
				+	typeof(o2) __dummy = n2;					\
			
 
				+	asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t"	\
			
 
				+		    : "=a"(__ret), "=m" (pcp1), "=d"(__dummy)		\
			
 
				+		    :  "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2));	\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				+#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
			
 
				+#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
			
 
				+#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)	percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
			
 
				+#endif /* CONFIG_X86_CMPXCHG64 */
			
 
				+
			
 
				 /*
			
 
				  * Per cpu atomic 64 bit operations are only available under 64 bit.
			
 
				  * 32 bit must fall back to generic operations.
			
@@ -480,6 +500,34 @@ do {									\
 
				 #define irqsafe_cpu_xor_8(pcp, val)	percpu_to_op("xor", (pcp), val)
			
 
				 #define irqsafe_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
			
 
				 #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
			
 
				+
			
 
				+/*
			
 
				+ * Pretty complex macro to generate cmpxchg16 instruction.  The instruction
			
 
				+ * is not supported on early AMD64 processors so we must be able to emulate
			
 
				+ * it in software.  The address used in the cmpxchg16 instruction must be
			
 
				+ * aligned to a 16 byte boundary.
			
 
				+ */
			
 
				+#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)			\
			
 
				+({									\
			
 
				+	char __ret;							\
			
 
				+	typeof(o1) __o1 = o1;						\
			
 
				+	typeof(o1) __n1 = n1;						\
			
 
				+	typeof(o2) __o2 = o2;						\
			
 
				+	typeof(o2) __n2 = n2;						\
			
 
				+	typeof(o2) __dummy;						\
			
 
				+	alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4,	\
			
 
				+		       "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t",	\
			
 
				+		       X86_FEATURE_CX16,				\
			
 
				+		       ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)),		\
			
 
				+		       "S" (&pcp1), "b"(__n1), "c"(__n2),		\
			
 
				+		       "a"(__o1), "d"(__o2));				\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				+#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
			
 
				+#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)		percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
			
 
				+#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)	percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
			
 
				+
			
 
				 #endif
			
 
				 
			
 
				 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
			
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -42,4 +42,5 @@ else
 
				         lib-y += memmove_64.o memset_64.o
			
 
				         lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
			
 
				 	lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
			
 
				+	lib-y += cmpxchg16b_emu.o
			
 
				 endif
			
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -0,0 +1,59 @@
 
				+/*
			
 
				+ *	This program is free software; you can redistribute it and/or
			
 
				+ *	modify it under the terms of the GNU General Public License
			
 
				+ *	as published by the Free Software Foundation; version 2
			
 
				+ *	of the License.
			
 
				+ *
			
 
				+ */
			
 
				+#include <linux/linkage.h>
			
 
				+#include <asm/alternative-asm.h>
			
 
				+#include <asm/frame.h>
			
 
				+#include <asm/dwarf2.h>
			
 
				+
			
 
				+.text
			
 
				+
			
 
				+/*
			
 
				+ * Inputs:
			
 
				+ * %rsi : memory location to compare
			
 
				+ * %rax : low 64 bits of old value
			
 
				+ * %rdx : high 64 bits of old value
			
 
				+ * %rbx : low 64 bits of new value
			
 
				+ * %rcx : high 64 bits of new value
			
 
				+ * %al  : Operation successful
			
 
				+ */
			
 
				+ENTRY(this_cpu_cmpxchg16b_emu)
			
 
				+CFI_STARTPROC
			
 
				+
			
 
				+#
			
 
				+# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
			
 
				+# via the ZF.  Caller will access %al to get result.
			
 
				+#
			
 
				+# Note that this is only useful for a cpuops operation.  Meaning that we
			
 
				+# do *not* have a fully atomic operation but just an operation that is
			
 
				+# *atomic* on a single cpu (as provided by the this_cpu_xx class of
			
 
				+# macros).
			
 
				+#
			
 
				+this_cpu_cmpxchg16b_emu:
			
 
				+	pushf
			
 
				+	cli
			
 
				+
			
 
				+	cmpq %gs:(%rsi), %rax
			
 
				+	jne not_same
			
 
				+	cmpq %gs:8(%rsi), %rdx
			
 
				+	jne not_same
			
 
				+
			
 
				+	movq %rbx, %gs:(%rsi)
			
 
				+	movq %rcx, %gs:8(%rsi)
			
 
				+
			
 
				+	popf
			
 
				+	mov $1, %al
			
 
				+	ret
			
 
				+
			
 
				+ not_same:
			
 
				+	popf
			
 
				+	xor %al,%al
			
 
				+	ret
			
 
				+
			
 
				+CFI_ENDPROC
			
 
				+
			
 
				+ENDPROC(this_cpu_cmpxchg16b_emu)