bitops.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. #ifndef _ASM_X86_BITOPS_H
  2. #define _ASM_X86_BITOPS_H
  3. /*
  4. * Copyright 1992, Linus Torvalds.
  5. *
  6. * Note: inlines with more than a single statement should be marked
  7. * __always_inline to avoid problems with older gcc's inlining heuristics.
  8. */
  9. #ifndef _LINUX_BITOPS_H
  10. #error only <linux/bitops.h> can be included directly
  11. #endif
  12. #include <linux/compiler.h>
  13. #include <asm/alternative.h>
  14. #include <asm/rmwcc.h>
  15. #if BITS_PER_LONG == 32
  16. # define _BITOPS_LONG_SHIFT 5
  17. #elif BITS_PER_LONG == 64
  18. # define _BITOPS_LONG_SHIFT 6
  19. #else
  20. # error "Unexpected BITS_PER_LONG"
  21. #endif
  22. #define BIT_64(n) (U64_C(1) << (n))
  23. /*
  24. * These have to be done with inline assembly: that way the bit-setting
  25. * is guaranteed to be atomic. All bit operations return 0 if the bit
  26. * was cleared before the operation and != 0 if it was not.
  27. *
  28. * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  29. */
  30. #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
  31. /* Technically wrong, but this avoids compilation errors on some gcc
  32. versions. */
  33. #define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
  34. #else
  35. #define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
  36. #endif
  37. #define ADDR BITOP_ADDR(addr)
  38. /*
  39. * We do the locked ops that don't return the old value as
  40. * a mask operation on a byte.
  41. */
  42. #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
  43. #define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3))
  44. #define CONST_MASK(nr) (1 << ((nr) & 7))
  45. /**
  46. * set_bit - Atomically set a bit in memory
  47. * @nr: the bit to set
  48. * @addr: the address to start counting from
  49. *
  50. * This function is atomic and may not be reordered. See __set_bit()
  51. * if you do not require the atomic guarantees.
  52. *
  53. * Note: there are no guarantees that this function will not be reordered
  54. * on non x86 architectures, so if you are writing portable code,
  55. * make sure not to rely on its reordering guarantees.
  56. *
  57. * Note that @nr may be almost arbitrarily large; this function is not
  58. * restricted to acting on a single-word quantity.
  59. */
  60. static __always_inline void
  61. set_bit(long nr, volatile unsigned long *addr)
  62. {
  63. if (IS_IMMEDIATE(nr)) {
  64. asm volatile(LOCK_PREFIX "orb %1,%0"
  65. : CONST_MASK_ADDR(nr, addr)
  66. : "iq" ((u8)CONST_MASK(nr))
  67. : "memory");
  68. } else {
  69. asm volatile(LOCK_PREFIX "bts %1,%0"
  70. : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
  71. }
  72. }
  73. /**
  74. * __set_bit - Set a bit in memory
  75. * @nr: the bit to set
  76. * @addr: the address to start counting from
  77. *
  78. * Unlike set_bit(), this function is non-atomic and may be reordered.
  79. * If it's called on the same region of memory simultaneously, the effect
  80. * may be that only one operation succeeds.
  81. */
  82. static inline void __set_bit(long nr, volatile unsigned long *addr)
  83. {
  84. asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
  85. }
  86. /**
  87. * clear_bit - Clears a bit in memory
  88. * @nr: Bit to clear
  89. * @addr: Address to start counting from
  90. *
  91. * clear_bit() is atomic and may not be reordered. However, it does
  92. * not contain a memory barrier, so if it is used for locking purposes,
  93. * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  94. * in order to ensure changes are visible on other processors.
  95. */
  96. static __always_inline void
  97. clear_bit(long nr, volatile unsigned long *addr)
  98. {
  99. if (IS_IMMEDIATE(nr)) {
  100. asm volatile(LOCK_PREFIX "andb %1,%0"
  101. : CONST_MASK_ADDR(nr, addr)
  102. : "iq" ((u8)~CONST_MASK(nr)));
  103. } else {
  104. asm volatile(LOCK_PREFIX "btr %1,%0"
  105. : BITOP_ADDR(addr)
  106. : "Ir" (nr));
  107. }
  108. }
  109. /*
  110. * clear_bit_unlock - Clears a bit in memory
  111. * @nr: Bit to clear
  112. * @addr: Address to start counting from
  113. *
  114. * clear_bit() is atomic and implies release semantics before the memory
  115. * operation. It can be used for an unlock.
  116. */
  117. static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
  118. {
  119. barrier();
  120. clear_bit(nr, addr);
  121. }
  122. static inline void __clear_bit(long nr, volatile unsigned long *addr)
  123. {
  124. asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
  125. }
  126. /*
  127. * __clear_bit_unlock - Clears a bit in memory
  128. * @nr: Bit to clear
  129. * @addr: Address to start counting from
  130. *
  131. * __clear_bit() is non-atomic and implies release semantics before the memory
  132. * operation. It can be used for an unlock if no other CPUs can concurrently
  133. * modify other bits in the word.
  134. *
  135. * No memory barrier is required here, because x86 cannot reorder stores past
  136. * older loads. Same principle as spin_unlock.
  137. */
  138. static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
  139. {
  140. barrier();
  141. __clear_bit(nr, addr);
  142. }
  143. #define smp_mb__before_clear_bit() barrier()
  144. #define smp_mb__after_clear_bit() barrier()
  145. /**
  146. * __change_bit - Toggle a bit in memory
  147. * @nr: the bit to change
  148. * @addr: the address to start counting from
  149. *
  150. * Unlike change_bit(), this function is non-atomic and may be reordered.
  151. * If it's called on the same region of memory simultaneously, the effect
  152. * may be that only one operation succeeds.
  153. */
  154. static inline void __change_bit(long nr, volatile unsigned long *addr)
  155. {
  156. asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
  157. }
  158. /**
  159. * change_bit - Toggle a bit in memory
  160. * @nr: Bit to change
  161. * @addr: Address to start counting from
  162. *
  163. * change_bit() is atomic and may not be reordered.
  164. * Note that @nr may be almost arbitrarily large; this function is not
  165. * restricted to acting on a single-word quantity.
  166. */
  167. static inline void change_bit(long nr, volatile unsigned long *addr)
  168. {
  169. if (IS_IMMEDIATE(nr)) {
  170. asm volatile(LOCK_PREFIX "xorb %1,%0"
  171. : CONST_MASK_ADDR(nr, addr)
  172. : "iq" ((u8)CONST_MASK(nr)));
  173. } else {
  174. asm volatile(LOCK_PREFIX "btc %1,%0"
  175. : BITOP_ADDR(addr)
  176. : "Ir" (nr));
  177. }
  178. }
  179. /**
  180. * test_and_set_bit - Set a bit and return its old value
  181. * @nr: Bit to set
  182. * @addr: Address to count from
  183. *
  184. * This operation is atomic and cannot be reordered.
  185. * It also implies a memory barrier.
  186. */
  187. static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
  188. {
  189. GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, nr, "%0", "c");
  190. }
  191. /**
  192. * test_and_set_bit_lock - Set a bit and return its old value for lock
  193. * @nr: Bit to set
  194. * @addr: Address to count from
  195. *
  196. * This is the same as test_and_set_bit on x86.
  197. */
  198. static __always_inline int
  199. test_and_set_bit_lock(long nr, volatile unsigned long *addr)
  200. {
  201. return test_and_set_bit(nr, addr);
  202. }
  203. /**
  204. * __test_and_set_bit - Set a bit and return its old value
  205. * @nr: Bit to set
  206. * @addr: Address to count from
  207. *
  208. * This operation is non-atomic and can be reordered.
  209. * If two examples of this operation race, one can appear to succeed
  210. * but actually fail. You must protect multiple accesses with a lock.
  211. */
  212. static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
  213. {
  214. int oldbit;
  215. asm("bts %2,%1\n\t"
  216. "sbb %0,%0"
  217. : "=r" (oldbit), ADDR
  218. : "Ir" (nr));
  219. return oldbit;
  220. }
  221. /**
  222. * test_and_clear_bit - Clear a bit and return its old value
  223. * @nr: Bit to clear
  224. * @addr: Address to count from
  225. *
  226. * This operation is atomic and cannot be reordered.
  227. * It also implies a memory barrier.
  228. */
  229. static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
  230. {
  231. GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, nr, "%0", "c");
  232. }
  233. /**
  234. * __test_and_clear_bit - Clear a bit and return its old value
  235. * @nr: Bit to clear
  236. * @addr: Address to count from
  237. *
  238. * This operation is non-atomic and can be reordered.
  239. * If two examples of this operation race, one can appear to succeed
  240. * but actually fail. You must protect multiple accesses with a lock.
  241. *
  242. * Note: the operation is performed atomically with respect to
  243. * the local CPU, but not other CPUs. Portable code should not
  244. * rely on this behaviour.
  245. * KVM relies on this behaviour on x86 for modifying memory that is also
  246. * accessed from a hypervisor on the same CPU if running in a VM: don't change
  247. * this without also updating arch/x86/kernel/kvm.c
  248. */
  249. static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
  250. {
  251. int oldbit;
  252. asm volatile("btr %2,%1\n\t"
  253. "sbb %0,%0"
  254. : "=r" (oldbit), ADDR
  255. : "Ir" (nr));
  256. return oldbit;
  257. }
  258. /* WARNING: non atomic and it can be reordered! */
  259. static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
  260. {
  261. int oldbit;
  262. asm volatile("btc %2,%1\n\t"
  263. "sbb %0,%0"
  264. : "=r" (oldbit), ADDR
  265. : "Ir" (nr) : "memory");
  266. return oldbit;
  267. }
  268. /**
  269. * test_and_change_bit - Change a bit and return its old value
  270. * @nr: Bit to change
  271. * @addr: Address to count from
  272. *
  273. * This operation is atomic and cannot be reordered.
  274. * It also implies a memory barrier.
  275. */
  276. static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
  277. {
  278. GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, nr, "%0", "c");
  279. }
  280. static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
  281. {
  282. return ((1UL << (nr & (BITS_PER_LONG-1))) &
  283. (addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
  284. }
  285. static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
  286. {
  287. int oldbit;
  288. asm volatile("bt %2,%1\n\t"
  289. "sbb %0,%0"
  290. : "=r" (oldbit)
  291. : "m" (*(unsigned long *)addr), "Ir" (nr));
  292. return oldbit;
  293. }
  294. #if 0 /* Fool kernel-doc since it doesn't do macros yet */
  295. /**
  296. * test_bit - Determine whether a bit is set
  297. * @nr: bit number to test
  298. * @addr: Address to start counting from
  299. */
  300. static int test_bit(int nr, const volatile unsigned long *addr);
  301. #endif
  302. #define test_bit(nr, addr) \
  303. (__builtin_constant_p((nr)) \
  304. ? constant_test_bit((nr), (addr)) \
  305. : variable_test_bit((nr), (addr)))
  306. /**
  307. * __ffs - find first set bit in word
  308. * @word: The word to search
  309. *
  310. * Undefined if no bit exists, so code should check against 0 first.
  311. */
  312. static inline unsigned long __ffs(unsigned long word)
  313. {
  314. asm("rep; bsf %1,%0"
  315. : "=r" (word)
  316. : "rm" (word));
  317. return word;
  318. }
  319. /**
  320. * ffz - find first zero bit in word
  321. * @word: The word to search
  322. *
  323. * Undefined if no zero exists, so code should check against ~0UL first.
  324. */
  325. static inline unsigned long ffz(unsigned long word)
  326. {
  327. asm("rep; bsf %1,%0"
  328. : "=r" (word)
  329. : "r" (~word));
  330. return word;
  331. }
  332. /*
  333. * __fls: find last set bit in word
  334. * @word: The word to search
  335. *
  336. * Undefined if no set bit exists, so code should check against 0 first.
  337. */
  338. static inline unsigned long __fls(unsigned long word)
  339. {
  340. asm("bsr %1,%0"
  341. : "=r" (word)
  342. : "rm" (word));
  343. return word;
  344. }
  345. #undef ADDR
  346. #ifdef __KERNEL__
  347. /**
  348. * ffs - find first set bit in word
  349. * @x: the word to search
  350. *
  351. * This is defined the same way as the libc and compiler builtin ffs
  352. * routines, therefore differs in spirit from the other bitops.
  353. *
  354. * ffs(value) returns 0 if value is 0 or the position of the first
  355. * set bit if value is nonzero. The first (least significant) bit
  356. * is at position 1.
  357. */
  358. static inline int ffs(int x)
  359. {
  360. int r;
  361. #ifdef CONFIG_X86_64
  362. /*
  363. * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
  364. * dest reg is undefined if x==0, but their CPU architect says its
  365. * value is written to set it to the same as before, except that the
  366. * top 32 bits will be cleared.
  367. *
  368. * We cannot do this on 32 bits because at the very least some
  369. * 486 CPUs did not behave this way.
  370. */
  371. asm("bsfl %1,%0"
  372. : "=r" (r)
  373. : "rm" (x), "0" (-1));
  374. #elif defined(CONFIG_X86_CMOV)
  375. asm("bsfl %1,%0\n\t"
  376. "cmovzl %2,%0"
  377. : "=&r" (r) : "rm" (x), "r" (-1));
  378. #else
  379. asm("bsfl %1,%0\n\t"
  380. "jnz 1f\n\t"
  381. "movl $-1,%0\n"
  382. "1:" : "=r" (r) : "rm" (x));
  383. #endif
  384. return r + 1;
  385. }
  386. /**
  387. * fls - find last set bit in word
  388. * @x: the word to search
  389. *
  390. * This is defined in a similar way as the libc and compiler builtin
  391. * ffs, but returns the position of the most significant set bit.
  392. *
  393. * fls(value) returns 0 if value is 0 or the position of the last
  394. * set bit if value is nonzero. The last (most significant) bit is
  395. * at position 32.
  396. */
  397. static inline int fls(int x)
  398. {
  399. int r;
  400. #ifdef CONFIG_X86_64
  401. /*
  402. * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
  403. * dest reg is undefined if x==0, but their CPU architect says its
  404. * value is written to set it to the same as before, except that the
  405. * top 32 bits will be cleared.
  406. *
  407. * We cannot do this on 32 bits because at the very least some
  408. * 486 CPUs did not behave this way.
  409. */
  410. asm("bsrl %1,%0"
  411. : "=r" (r)
  412. : "rm" (x), "0" (-1));
  413. #elif defined(CONFIG_X86_CMOV)
  414. asm("bsrl %1,%0\n\t"
  415. "cmovzl %2,%0"
  416. : "=&r" (r) : "rm" (x), "rm" (-1));
  417. #else
  418. asm("bsrl %1,%0\n\t"
  419. "jnz 1f\n\t"
  420. "movl $-1,%0\n"
  421. "1:" : "=r" (r) : "rm" (x));
  422. #endif
  423. return r + 1;
  424. }
  425. /**
  426. * fls64 - find last set bit in a 64-bit word
  427. * @x: the word to search
  428. *
  429. * This is defined in a similar way as the libc and compiler builtin
  430. * ffsll, but returns the position of the most significant set bit.
  431. *
  432. * fls64(value) returns 0 if value is 0 or the position of the last
  433. * set bit if value is nonzero. The last (most significant) bit is
  434. * at position 64.
  435. */
  436. #ifdef CONFIG_X86_64
  437. static __always_inline int fls64(__u64 x)
  438. {
  439. int bitpos = -1;
  440. /*
  441. * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
  442. * dest reg is undefined if x==0, but their CPU architect says its
  443. * value is written to set it to the same as before.
  444. */
  445. asm("bsrq %1,%q0"
  446. : "+r" (bitpos)
  447. : "rm" (x));
  448. return bitpos + 1;
  449. }
  450. #else
  451. #include <asm-generic/bitops/fls64.h>
  452. #endif
  453. #include <asm-generic/bitops/find.h>
  454. #include <asm-generic/bitops/sched.h>
  455. #define ARCH_HAS_FAST_MULTIPLIER 1
  456. #include <asm/arch_hweight.h>
  457. #include <asm-generic/bitops/const_hweight.h>
  458. #include <asm-generic/bitops/le.h>
  459. #include <asm-generic/bitops/ext2-atomic-setbit.h>
  460. #endif /* __KERNEL__ */
  461. #endif /* _ASM_X86_BITOPS_H */