|
@@ -53,60 +53,33 @@ struct __xchg_dummy {
|
|
__xchg((v), (ptr), sizeof(*ptr))
|
|
__xchg((v), (ptr), sizeof(*ptr))
|
|
|
|
|
|
/*
|
|
/*
|
|
- * The semantics of XCHGCMP8B are a bit strange, this is why
|
|
|
|
- * there is a loop and the loading of %%eax and %%edx has to
|
|
|
|
- * be inside. This inlines well in most cases, the cached
|
|
|
|
- * cost is around ~38 cycles. (in the future we might want
|
|
|
|
- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
|
|
|
|
- * might have an implicit FPU-save as a cost, so it's not
|
|
|
|
- * clear which path to go.)
|
|
|
|
|
|
+ * CMPXCHG8B only writes to the target if we had the previous
|
|
|
|
+ * value in registers, otherwise it acts as a read and gives us the
|
|
|
|
+ * "new previous" value. That is why there is a loop. Preloading
|
|
|
|
+ * EDX:EAX is a performance optimization: in the common case it means
|
|
|
|
+ * we need only one locked operation.
|
|
*
|
|
*
|
|
- * cmpxchg8b must be used with the lock prefix here to allow
|
|
|
|
- * the instruction to be executed atomically, see page 3-102
|
|
|
|
- * of the instruction set reference 24319102.pdf. We need
|
|
|
|
- * the reader side to see the coherent 64bit value.
|
|
|
|
|
|
+ * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
|
|
|
|
+ * least an FPU save and/or %cr0.ts manipulation.
|
|
|
|
+ *
|
|
|
|
+ * cmpxchg8b must be used with the lock prefix here to allow the
|
|
|
|
+ * instruction to be executed atomically. We need to have the reader
|
|
|
|
+ * side to see the coherent 64bit value.
|
|
*/
|
|
*/
|
|
-static inline void __set_64bit(unsigned long long *ptr,
|
|
|
|
- unsigned int low, unsigned int high)
|
|
|
|
|
|
+static inline void set_64bit(volatile u64 *ptr, u64 value)
|
|
{
|
|
{
|
|
|
|
+ u32 low = value;
|
|
|
|
+ u32 high = value >> 32;
|
|
|
|
+ u64 prev = *ptr;
|
|
|
|
+
|
|
asm volatile("\n1:\t"
|
|
asm volatile("\n1:\t"
|
|
- "movl (%1), %%eax\n\t"
|
|
|
|
- "movl 4(%1), %%edx\n\t"
|
|
|
|
- LOCK_PREFIX "cmpxchg8b (%1)\n\t"
|
|
|
|
|
|
+ LOCK_PREFIX "cmpxchg8b %0\n\t"
|
|
"jnz 1b"
|
|
"jnz 1b"
|
|
- : "=m" (*ptr)
|
|
|
|
- : "D" (ptr),
|
|
|
|
- "b" (low),
|
|
|
|
- "c" (high)
|
|
|
|
- : "ax", "dx", "memory");
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static inline void __set_64bit_constant(unsigned long long *ptr,
|
|
|
|
- unsigned long long value)
|
|
|
|
-{
|
|
|
|
- __set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32));
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-#define ll_low(x) *(((unsigned int *)&(x)) + 0)
|
|
|
|
-#define ll_high(x) *(((unsigned int *)&(x)) + 1)
|
|
|
|
-
|
|
|
|
-static inline void __set_64bit_var(unsigned long long *ptr,
|
|
|
|
- unsigned long long value)
|
|
|
|
-{
|
|
|
|
- __set_64bit(ptr, ll_low(value), ll_high(value));
|
|
|
|
|
|
+ : "=m" (*ptr), "+A" (prev)
|
|
|
|
+ : "b" (low), "c" (high)
|
|
|
|
+ : "memory");
|
|
}
|
|
}
|
|
|
|
|
|
-#define set_64bit(ptr, value) \
|
|
|
|
- (__builtin_constant_p((value)) \
|
|
|
|
- ? __set_64bit_constant((ptr), (value)) \
|
|
|
|
- : __set_64bit_var((ptr), (value)))
|
|
|
|
-
|
|
|
|
-#define _set_64bit(ptr, value) \
|
|
|
|
- (__builtin_constant_p(value) \
|
|
|
|
- ? __set_64bit(ptr, (unsigned int)(value), \
|
|
|
|
- (unsigned int)((value) >> 32)) \
|
|
|
|
- : __set_64bit(ptr, ll_low((value)), ll_high((value))))
|
|
|
|
-
|
|
|
|
extern void __cmpxchg_wrong_size(void);
|
|
extern void __cmpxchg_wrong_size(void);
|
|
|
|
|
|
/*
|
|
/*
|