|
@@ -177,39 +177,6 @@ do { \
|
|
|
} \
|
|
|
} while (0)
|
|
|
|
|
|
-/*
|
|
|
- * Add return operation
|
|
|
- */
|
|
|
-#define percpu_add_return_op(var, val) \
|
|
|
-({ \
|
|
|
- typeof(var) paro_ret__ = val; \
|
|
|
- switch (sizeof(var)) { \
|
|
|
- case 1: \
|
|
|
- asm("xaddb %0, "__percpu_arg(1) \
|
|
|
- : "+q" (paro_ret__), "+m" (var) \
|
|
|
- : : "memory"); \
|
|
|
- break; \
|
|
|
- case 2: \
|
|
|
- asm("xaddw %0, "__percpu_arg(1) \
|
|
|
- : "+r" (paro_ret__), "+m" (var) \
|
|
|
- : : "memory"); \
|
|
|
- break; \
|
|
|
- case 4: \
|
|
|
- asm("xaddl %0, "__percpu_arg(1) \
|
|
|
- : "+r" (paro_ret__), "+m" (var) \
|
|
|
- : : "memory"); \
|
|
|
- break; \
|
|
|
- case 8: \
|
|
|
- asm("xaddq %0, "__percpu_arg(1) \
|
|
|
- : "+re" (paro_ret__), "+m" (var) \
|
|
|
- : : "memory"); \
|
|
|
- break; \
|
|
|
- default: __bad_percpu_size(); \
|
|
|
- } \
|
|
|
- paro_ret__ += val; \
|
|
|
- paro_ret__; \
|
|
|
-})
|
|
|
-
|
|
|
#define percpu_from_op(op, var, constraint) \
|
|
|
({ \
|
|
|
typeof(var) pfo_ret__; \
|
|
@@ -262,6 +229,125 @@ do { \
|
|
|
} \
|
|
|
})
|
|
|
|
|
|
+/*
|
|
|
+ * Add return operation
|
|
|
+ */
|
|
|
+#define percpu_add_return_op(var, val) \
|
|
|
+({ \
|
|
|
+ typeof(var) paro_ret__ = val; \
|
|
|
+ switch (sizeof(var)) { \
|
|
|
+ case 1: \
|
|
|
+ asm("xaddb %0, "__percpu_arg(1) \
|
|
|
+ : "+q" (paro_ret__), "+m" (var) \
|
|
|
+ : : "memory"); \
|
|
|
+ break; \
|
|
|
+ case 2: \
|
|
|
+ asm("xaddw %0, "__percpu_arg(1) \
|
|
|
+ : "+r" (paro_ret__), "+m" (var) \
|
|
|
+ : : "memory"); \
|
|
|
+ break; \
|
|
|
+ case 4: \
|
|
|
+ asm("xaddl %0, "__percpu_arg(1) \
|
|
|
+ : "+r" (paro_ret__), "+m" (var) \
|
|
|
+ : : "memory"); \
|
|
|
+ break; \
|
|
|
+ case 8: \
|
|
|
+ asm("xaddq %0, "__percpu_arg(1) \
|
|
|
+ : "+re" (paro_ret__), "+m" (var) \
|
|
|
+ : : "memory"); \
|
|
|
+ break; \
|
|
|
+ default: __bad_percpu_size(); \
|
|
|
+ } \
|
|
|
+ paro_ret__ += val; \
|
|
|
+ paro_ret__; \
|
|
|
+})
|
|
|
+
|
|
|
+/*
|
|
|
+ * xchg is implemented using cmpxchg without a lock prefix. xchg is
|
|
|
+ * expensive due to the implied lock prefix. The processor cannot prefetch
|
|
|
+ * cachelines if xchg is used.
|
|
|
+ */
|
|
|
+#define percpu_xchg_op(var, nval) \
|
|
|
+({ \
|
|
|
+ typeof(var) pxo_ret__; \
|
|
|
+ typeof(var) pxo_new__ = (nval); \
|
|
|
+ switch (sizeof(var)) { \
|
|
|
+ case 1: \
|
|
|
+ asm("\n1:mov "__percpu_arg(1)",%%al" \
|
|
|
+ "\n\tcmpxchgb %2, "__percpu_arg(1) \
|
|
|
+ "\n\tjnz 1b" \
|
|
|
+ : "=a" (pxo_ret__), "+m" (var) \
|
|
|
+ : "q" (pxo_new__) \
|
|
|
+ : "memory"); \
|
|
|
+ break; \
|
|
|
+ case 2: \
|
|
|
+ asm("\n1:mov "__percpu_arg(1)",%%ax" \
|
|
|
+ "\n\tcmpxchgw %2, "__percpu_arg(1) \
|
|
|
+ "\n\tjnz 1b" \
|
|
|
+ : "=a" (pxo_ret__), "+m" (var) \
|
|
|
+ : "r" (pxo_new__) \
|
|
|
+ : "memory"); \
|
|
|
+ break; \
|
|
|
+ case 4: \
|
|
|
+ asm("\n1:mov "__percpu_arg(1)",%%eax" \
|
|
|
+ "\n\tcmpxchgl %2, "__percpu_arg(1) \
|
|
|
+ "\n\tjnz 1b" \
|
|
|
+ : "=a" (pxo_ret__), "+m" (var) \
|
|
|
+ : "r" (pxo_new__) \
|
|
|
+ : "memory"); \
|
|
|
+ break; \
|
|
|
+ case 8: \
|
|
|
+ asm("\n1:mov "__percpu_arg(1)",%%rax" \
|
|
|
+ "\n\tcmpxchgq %2, "__percpu_arg(1) \
|
|
|
+ "\n\tjnz 1b" \
|
|
|
+ : "=a" (pxo_ret__), "+m" (var) \
|
|
|
+ : "r" (pxo_new__) \
|
|
|
+ : "memory"); \
|
|
|
+ break; \
|
|
|
+ default: __bad_percpu_size(); \
|
|
|
+ } \
|
|
|
+ pxo_ret__; \
|
|
|
+})
|
|
|
+
|
|
|
+/*
|
|
|
+ * cmpxchg has no such implied lock semantics as a result it is much
|
|
|
+ * more efficient for cpu local operations.
|
|
|
+ */
|
|
|
+#define percpu_cmpxchg_op(var, oval, nval) \
|
|
|
+({ \
|
|
|
+ typeof(var) pco_ret__; \
|
|
|
+ typeof(var) pco_old__ = (oval); \
|
|
|
+ typeof(var) pco_new__ = (nval); \
|
|
|
+ switch (sizeof(var)) { \
|
|
|
+ case 1: \
|
|
|
+ asm("cmpxchgb %2, "__percpu_arg(1) \
|
|
|
+ : "=a" (pco_ret__), "+m" (var) \
|
|
|
+ : "q" (pco_new__), "0" (pco_old__) \
|
|
|
+ : "memory"); \
|
|
|
+ break; \
|
|
|
+ case 2: \
|
|
|
+ asm("cmpxchgw %2, "__percpu_arg(1) \
|
|
|
+ : "=a" (pco_ret__), "+m" (var) \
|
|
|
+ : "r" (pco_new__), "0" (pco_old__) \
|
|
|
+ : "memory"); \
|
|
|
+ break; \
|
|
|
+ case 4: \
|
|
|
+ asm("cmpxchgl %2, "__percpu_arg(1) \
|
|
|
+ : "=a" (pco_ret__), "+m" (var) \
|
|
|
+ : "r" (pco_new__), "0" (pco_old__) \
|
|
|
+ : "memory"); \
|
|
|
+ break; \
|
|
|
+ case 8: \
|
|
|
+ asm("cmpxchgq %2, "__percpu_arg(1) \
|
|
|
+ : "=a" (pco_ret__), "+m" (var) \
|
|
|
+ : "r" (pco_new__), "0" (pco_old__) \
|
|
|
+ : "memory"); \
|
|
|
+ break; \
|
|
|
+ default: __bad_percpu_size(); \
|
|
|
+ } \
|
|
|
+ pco_ret__; \
|
|
|
+})
|
|
|
+
|
|
|
/*
|
|
|
* percpu_read() makes gcc load the percpu variable every time it is
|
|
|
* accessed while percpu_read_stable() allows the value to be cached.
|
|
@@ -300,6 +386,12 @@ do { \
|
|
|
#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
+/*
|
|
|
+ * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much
|
|
|
+ * faster than an xchg with forced lock semantics.
|
|
|
+ */
|
|
|
+#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
|
|
|
+#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
|
|
|
#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
|
|
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
|
@@ -319,6 +411,11 @@ do { \
|
|
|
#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
+#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
|
|
|
+#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
|
|
|
+#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
|
|
|
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
|
|
|
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
|
|
|
#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
|
|
|
#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
|
|
@@ -332,15 +429,32 @@ do { \
|
|
|
#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
+#define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
|
|
|
+#define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
|
|
|
+#define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
|
|
|
+#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
|
|
|
+#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
|
|
|
#ifndef CONFIG_M386
|
|
|
#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
|
|
|
#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
|
|
|
#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
|
|
|
+#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
+#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
+#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
+
|
|
|
#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
|
|
|
#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
|
|
|
#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
|
|
|
-#endif
|
|
|
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
+
|
|
|
+#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
+#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
+#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
|
|
+#endif /* !CONFIG_M386 */
|
|
|
+
|
|
|
/*
|
|
|
* Per cpu atomic 64 bit operations are only available under 64 bit.
|
|
|
* 32 bit must fall back to generic operations.
|
|
@@ -352,6 +466,7 @@ do { \
|
|
|
#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
|
|
|
#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
|
|
|
#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
+#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
|
|
|
|
|
|
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
|
|
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
|
|
@@ -359,14 +474,12 @@ do { \
|
|
|
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
|
|
|
#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
|
|
|
#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
|
|
|
|
|
|
#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
|
|
|
#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
|
|
|
#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
|
|
|
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
|
|
|
-
|
|
|
-#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
|
|
|
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
|
|
|
#endif
|
|
|
|
|
|
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
|