drbd_bitmap.c 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327
  1. /*
  2. drbd_bitmap.c
  3. This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
  4. Copyright (C) 2004-2008, LINBIT Information Technologies GmbH.
  5. Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>.
  6. Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
  7. drbd is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 2, or (at your option)
  10. any later version.
  11. drbd is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with drbd; see the file COPYING. If not, write to
  17. the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  18. */
  19. #include <linux/bitops.h>
  20. #include <linux/vmalloc.h>
  21. #include <linux/string.h>
  22. #include <linux/drbd.h>
  23. #include <asm/kmap_types.h>
  24. #include "drbd_int.h"
  25. /* OPAQUE outside this file!
  26. * interface defined in drbd_int.h
  27. * convention:
  28. * function name drbd_bm_... => used elsewhere, "public".
  29. * function name bm_... => internal to implementation, "private".
  30. * Note that since find_first_bit returns int, at the current granularity of
  31. * the bitmap (4KB per byte), this implementation "only" supports up to
  32. * 1<<(32+12) == 16 TB...
  33. */
  34. /*
  35. * NOTE
  36. * Access to the *bm_pages is protected by bm_lock.
  37. * It is safe to read the other members within the lock.
  38. *
  39. * drbd_bm_set_bits is called from bio_endio callbacks,
  40. * We may be called with irq already disabled,
  41. * so we need spin_lock_irqsave().
  42. * And we need the kmap_atomic.
  43. */
  44. struct drbd_bitmap {
  45. struct page **bm_pages;
  46. spinlock_t bm_lock;
  47. /* WARNING unsigned long bm_*:
  48. * 32bit number of bit offset is just enough for 512 MB bitmap.
  49. * it will blow up if we make the bitmap bigger...
  50. * not that it makes much sense to have a bitmap that large,
  51. * rather change the granularity to 16k or 64k or something.
  52. * (that implies other problems, however...)
  53. */
  54. unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */
  55. unsigned long bm_bits;
  56. size_t bm_words;
  57. size_t bm_number_of_pages;
  58. sector_t bm_dev_capacity;
  59. struct semaphore bm_change; /* serializes resize operations */
  60. atomic_t bm_async_io;
  61. wait_queue_head_t bm_io_wait;
  62. unsigned long bm_flags;
  63. /* debugging aid, in case we are still racy somewhere */
  64. char *bm_why;
  65. struct task_struct *bm_task;
  66. };
  67. /* definition of bits in bm_flags */
  68. #define BM_LOCKED 0
  69. #define BM_MD_IO_ERROR 1
  70. #define BM_P_VMALLOCED 2
  71. static int bm_is_locked(struct drbd_bitmap *b)
  72. {
  73. return test_bit(BM_LOCKED, &b->bm_flags);
  74. }
  75. #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
  76. static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
  77. {
  78. struct drbd_bitmap *b = mdev->bitmap;
  79. if (!__ratelimit(&drbd_ratelimit_state))
  80. return;
  81. dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
  82. current == mdev->receiver.task ? "receiver" :
  83. current == mdev->asender.task ? "asender" :
  84. current == mdev->worker.task ? "worker" : current->comm,
  85. func, b->bm_why ?: "?",
  86. b->bm_task == mdev->receiver.task ? "receiver" :
  87. b->bm_task == mdev->asender.task ? "asender" :
  88. b->bm_task == mdev->worker.task ? "worker" : "?");
  89. }
  90. void drbd_bm_lock(struct drbd_conf *mdev, char *why)
  91. {
  92. struct drbd_bitmap *b = mdev->bitmap;
  93. int trylock_failed;
  94. if (!b) {
  95. dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n");
  96. return;
  97. }
  98. trylock_failed = down_trylock(&b->bm_change);
  99. if (trylock_failed) {
  100. dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
  101. current == mdev->receiver.task ? "receiver" :
  102. current == mdev->asender.task ? "asender" :
  103. current == mdev->worker.task ? "worker" : current->comm,
  104. why, b->bm_why ?: "?",
  105. b->bm_task == mdev->receiver.task ? "receiver" :
  106. b->bm_task == mdev->asender.task ? "asender" :
  107. b->bm_task == mdev->worker.task ? "worker" : "?");
  108. down(&b->bm_change);
  109. }
  110. if (__test_and_set_bit(BM_LOCKED, &b->bm_flags))
  111. dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
  112. b->bm_why = why;
  113. b->bm_task = current;
  114. }
  115. void drbd_bm_unlock(struct drbd_conf *mdev)
  116. {
  117. struct drbd_bitmap *b = mdev->bitmap;
  118. if (!b) {
  119. dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n");
  120. return;
  121. }
  122. if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags))
  123. dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
  124. b->bm_why = NULL;
  125. b->bm_task = NULL;
  126. up(&b->bm_change);
  127. }
  128. /* word offset to long pointer */
  129. static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km)
  130. {
  131. struct page *page;
  132. unsigned long page_nr;
  133. /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */
  134. page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
  135. BUG_ON(page_nr >= b->bm_number_of_pages);
  136. page = b->bm_pages[page_nr];
  137. return (unsigned long *) kmap_atomic(page, km);
  138. }
  139. static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset)
  140. {
  141. return __bm_map_paddr(b, offset, KM_IRQ1);
  142. }
  143. static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
  144. {
  145. kunmap_atomic(p_addr, km);
  146. };
  147. static void bm_unmap(unsigned long *p_addr)
  148. {
  149. return __bm_unmap(p_addr, KM_IRQ1);
  150. }
  151. /* long word offset of _bitmap_ sector */
  152. #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
  153. /* word offset from start of bitmap to word number _in_page_
  154. * modulo longs per page
  155. #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
  156. hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
  157. so do it explicitly:
  158. */
  159. #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
  160. /* Long words per page */
  161. #define LWPP (PAGE_SIZE/sizeof(long))
  162. /*
  163. * actually most functions herein should take a struct drbd_bitmap*, not a
  164. * struct drbd_conf*, but for the debug macros I like to have the mdev around
  165. * to be able to report device specific.
  166. */
  167. static void bm_free_pages(struct page **pages, unsigned long number)
  168. {
  169. unsigned long i;
  170. if (!pages)
  171. return;
  172. for (i = 0; i < number; i++) {
  173. if (!pages[i]) {
  174. printk(KERN_ALERT "drbd: bm_free_pages tried to free "
  175. "a NULL pointer; i=%lu n=%lu\n",
  176. i, number);
  177. continue;
  178. }
  179. __free_page(pages[i]);
  180. pages[i] = NULL;
  181. }
  182. }
  183. static void bm_vk_free(void *ptr, int v)
  184. {
  185. if (v)
  186. vfree(ptr);
  187. else
  188. kfree(ptr);
  189. }
  190. /*
  191. * "have" and "want" are NUMBER OF PAGES.
  192. */
  193. static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
  194. {
  195. struct page **old_pages = b->bm_pages;
  196. struct page **new_pages, *page;
  197. unsigned int i, bytes, vmalloced = 0;
  198. unsigned long have = b->bm_number_of_pages;
  199. BUG_ON(have == 0 && old_pages != NULL);
  200. BUG_ON(have != 0 && old_pages == NULL);
  201. if (have == want)
  202. return old_pages;
  203. /* Trying kmalloc first, falling back to vmalloc.
  204. * GFP_KERNEL is ok, as this is done when a lower level disk is
  205. * "attached" to the drbd. Context is receiver thread or cqueue
  206. * thread. As we have no disk yet, we are not in the IO path,
  207. * not even the IO path of the peer. */
  208. bytes = sizeof(struct page *)*want;
  209. new_pages = kmalloc(bytes, GFP_KERNEL);
  210. if (!new_pages) {
  211. new_pages = vmalloc(bytes);
  212. if (!new_pages)
  213. return NULL;
  214. vmalloced = 1;
  215. }
  216. memset(new_pages, 0, bytes);
  217. if (want >= have) {
  218. for (i = 0; i < have; i++)
  219. new_pages[i] = old_pages[i];
  220. for (; i < want; i++) {
  221. page = alloc_page(GFP_HIGHUSER);
  222. if (!page) {
  223. bm_free_pages(new_pages + have, i - have);
  224. bm_vk_free(new_pages, vmalloced);
  225. return NULL;
  226. }
  227. new_pages[i] = page;
  228. }
  229. } else {
  230. for (i = 0; i < want; i++)
  231. new_pages[i] = old_pages[i];
  232. /* NOT HERE, we are outside the spinlock!
  233. bm_free_pages(old_pages + want, have - want);
  234. */
  235. }
  236. if (vmalloced)
  237. set_bit(BM_P_VMALLOCED, &b->bm_flags);
  238. else
  239. clear_bit(BM_P_VMALLOCED, &b->bm_flags);
  240. return new_pages;
  241. }
  242. /*
  243. * called on driver init only. TODO call when a device is created.
  244. * allocates the drbd_bitmap, and stores it in mdev->bitmap.
  245. */
  246. int drbd_bm_init(struct drbd_conf *mdev)
  247. {
  248. struct drbd_bitmap *b = mdev->bitmap;
  249. WARN_ON(b != NULL);
  250. b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL);
  251. if (!b)
  252. return -ENOMEM;
  253. spin_lock_init(&b->bm_lock);
  254. init_MUTEX(&b->bm_change);
  255. init_waitqueue_head(&b->bm_io_wait);
  256. mdev->bitmap = b;
  257. return 0;
  258. }
  259. sector_t drbd_bm_capacity(struct drbd_conf *mdev)
  260. {
  261. ERR_IF(!mdev->bitmap) return 0;
  262. return mdev->bitmap->bm_dev_capacity;
  263. }
  264. /* called on driver unload. TODO: call when a device is destroyed.
  265. */
  266. void drbd_bm_cleanup(struct drbd_conf *mdev)
  267. {
  268. ERR_IF (!mdev->bitmap) return;
  269. bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
  270. bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags));
  271. kfree(mdev->bitmap);
  272. mdev->bitmap = NULL;
  273. }
  274. /*
  275. * since (b->bm_bits % BITS_PER_LONG) != 0,
  276. * this masks out the remaining bits.
  277. * Returns the number of bits cleared.
  278. */
  279. static int bm_clear_surplus(struct drbd_bitmap *b)
  280. {
  281. const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
  282. size_t w = b->bm_bits >> LN2_BPL;
  283. int cleared = 0;
  284. unsigned long *p_addr, *bm;
  285. p_addr = bm_map_paddr(b, w);
  286. bm = p_addr + MLPP(w);
  287. if (w < b->bm_words) {
  288. cleared = hweight_long(*bm & ~mask);
  289. *bm &= mask;
  290. w++; bm++;
  291. }
  292. if (w < b->bm_words) {
  293. cleared += hweight_long(*bm);
  294. *bm = 0;
  295. }
  296. bm_unmap(p_addr);
  297. return cleared;
  298. }
  299. static void bm_set_surplus(struct drbd_bitmap *b)
  300. {
  301. const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
  302. size_t w = b->bm_bits >> LN2_BPL;
  303. unsigned long *p_addr, *bm;
  304. p_addr = bm_map_paddr(b, w);
  305. bm = p_addr + MLPP(w);
  306. if (w < b->bm_words) {
  307. *bm |= ~mask;
  308. bm++; w++;
  309. }
  310. if (w < b->bm_words) {
  311. *bm = ~(0UL);
  312. }
  313. bm_unmap(p_addr);
  314. }
  315. static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian)
  316. {
  317. unsigned long *p_addr, *bm, offset = 0;
  318. unsigned long bits = 0;
  319. unsigned long i, do_now;
  320. while (offset < b->bm_words) {
  321. i = do_now = min_t(size_t, b->bm_words-offset, LWPP);
  322. p_addr = __bm_map_paddr(b, offset, KM_USER0);
  323. bm = p_addr + MLPP(offset);
  324. while (i--) {
  325. #ifndef __LITTLE_ENDIAN
  326. if (swap_endian)
  327. *bm = lel_to_cpu(*bm);
  328. #endif
  329. bits += hweight_long(*bm++);
  330. }
  331. __bm_unmap(p_addr, KM_USER0);
  332. offset += do_now;
  333. cond_resched();
  334. }
  335. return bits;
  336. }
  337. static unsigned long bm_count_bits(struct drbd_bitmap *b)
  338. {
  339. return __bm_count_bits(b, 0);
  340. }
  341. static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b)
  342. {
  343. return __bm_count_bits(b, 1);
  344. }
  345. /* offset and len in long words.*/
  346. static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
  347. {
  348. unsigned long *p_addr, *bm;
  349. size_t do_now, end;
  350. #define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512)
  351. end = offset + len;
  352. if (end > b->bm_words) {
  353. printk(KERN_ALERT "drbd: bm_memset end > bm_words\n");
  354. return;
  355. }
  356. while (offset < end) {
  357. do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
  358. p_addr = bm_map_paddr(b, offset);
  359. bm = p_addr + MLPP(offset);
  360. if (bm+do_now > p_addr + LWPP) {
  361. printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
  362. p_addr, bm, (int)do_now);
  363. break; /* breaks to after catch_oob_access_end() only! */
  364. }
  365. memset(bm, c, do_now * sizeof(long));
  366. bm_unmap(p_addr);
  367. offset += do_now;
  368. }
  369. }
  370. /*
  371. * make sure the bitmap has enough room for the attached storage,
  372. * if necessary, resize.
  373. * called whenever we may have changed the device size.
  374. * returns -ENOMEM if we could not allocate enough memory, 0 on success.
  375. * In case this is actually a resize, we copy the old bitmap into the new one.
  376. * Otherwise, the bitmap is initialized to all bits set.
  377. */
  378. int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
  379. {
  380. struct drbd_bitmap *b = mdev->bitmap;
  381. unsigned long bits, words, owords, obits, *p_addr, *bm;
  382. unsigned long want, have, onpages; /* number of pages */
  383. struct page **npages, **opages = NULL;
  384. int err = 0, growing;
  385. int opages_vmalloced;
  386. ERR_IF(!b) return -ENOMEM;
  387. drbd_bm_lock(mdev, "resize");
  388. dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
  389. (unsigned long long)capacity);
  390. if (capacity == b->bm_dev_capacity)
  391. goto out;
  392. opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags);
  393. if (capacity == 0) {
  394. spin_lock_irq(&b->bm_lock);
  395. opages = b->bm_pages;
  396. onpages = b->bm_number_of_pages;
  397. owords = b->bm_words;
  398. b->bm_pages = NULL;
  399. b->bm_number_of_pages =
  400. b->bm_set =
  401. b->bm_bits =
  402. b->bm_words =
  403. b->bm_dev_capacity = 0;
  404. spin_unlock_irq(&b->bm_lock);
  405. bm_free_pages(opages, onpages);
  406. bm_vk_free(opages, opages_vmalloced);
  407. goto out;
  408. }
  409. bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
  410. /* if we would use
  411. words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL;
  412. a 32bit host could present the wrong number of words
  413. to a 64bit host.
  414. */
  415. words = ALIGN(bits, 64) >> LN2_BPL;
  416. if (get_ldev(mdev)) {
  417. D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12));
  418. put_ldev(mdev);
  419. }
  420. /* one extra long to catch off by one errors */
  421. want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
  422. have = b->bm_number_of_pages;
  423. if (want == have) {
  424. D_ASSERT(b->bm_pages != NULL);
  425. npages = b->bm_pages;
  426. } else {
  427. if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC))
  428. npages = NULL;
  429. else
  430. npages = bm_realloc_pages(b, want);
  431. }
  432. if (!npages) {
  433. err = -ENOMEM;
  434. goto out;
  435. }
  436. spin_lock_irq(&b->bm_lock);
  437. opages = b->bm_pages;
  438. owords = b->bm_words;
  439. obits = b->bm_bits;
  440. growing = bits > obits;
  441. if (opages)
  442. bm_set_surplus(b);
  443. b->bm_pages = npages;
  444. b->bm_number_of_pages = want;
  445. b->bm_bits = bits;
  446. b->bm_words = words;
  447. b->bm_dev_capacity = capacity;
  448. if (growing) {
  449. bm_memset(b, owords, 0xff, words-owords);
  450. b->bm_set += bits - obits;
  451. }
  452. if (want < have) {
  453. /* implicit: (opages != NULL) && (opages != npages) */
  454. bm_free_pages(opages + want, have - want);
  455. }
  456. p_addr = bm_map_paddr(b, words);
  457. bm = p_addr + MLPP(words);
  458. *bm = DRBD_MAGIC;
  459. bm_unmap(p_addr);
  460. (void)bm_clear_surplus(b);
  461. spin_unlock_irq(&b->bm_lock);
  462. if (opages != npages)
  463. bm_vk_free(opages, opages_vmalloced);
  464. if (!growing)
  465. b->bm_set = bm_count_bits(b);
  466. dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words);
  467. out:
  468. drbd_bm_unlock(mdev);
  469. return err;
  470. }
  471. /* inherently racy:
  472. * if not protected by other means, return value may be out of date when
  473. * leaving this function...
  474. * we still need to lock it, since it is important that this returns
  475. * bm_set == 0 precisely.
  476. *
  477. * maybe bm_set should be atomic_t ?
  478. */
  479. static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
  480. {
  481. struct drbd_bitmap *b = mdev->bitmap;
  482. unsigned long s;
  483. unsigned long flags;
  484. ERR_IF(!b) return 0;
  485. ERR_IF(!b->bm_pages) return 0;
  486. spin_lock_irqsave(&b->bm_lock, flags);
  487. s = b->bm_set;
  488. spin_unlock_irqrestore(&b->bm_lock, flags);
  489. return s;
  490. }
  491. unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
  492. {
  493. unsigned long s;
  494. /* if I don't have a disk, I don't know about out-of-sync status */
  495. if (!get_ldev_if_state(mdev, D_NEGOTIATING))
  496. return 0;
  497. s = _drbd_bm_total_weight(mdev);
  498. put_ldev(mdev);
  499. return s;
  500. }
  501. size_t drbd_bm_words(struct drbd_conf *mdev)
  502. {
  503. struct drbd_bitmap *b = mdev->bitmap;
  504. ERR_IF(!b) return 0;
  505. ERR_IF(!b->bm_pages) return 0;
  506. return b->bm_words;
  507. }
  508. unsigned long drbd_bm_bits(struct drbd_conf *mdev)
  509. {
  510. struct drbd_bitmap *b = mdev->bitmap;
  511. ERR_IF(!b) return 0;
  512. return b->bm_bits;
  513. }
  514. /* merge number words from buffer into the bitmap starting at offset.
  515. * buffer[i] is expected to be little endian unsigned long.
  516. * bitmap must be locked by drbd_bm_lock.
  517. * currently only used from receive_bitmap.
  518. */
  519. void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
  520. unsigned long *buffer)
  521. {
  522. struct drbd_bitmap *b = mdev->bitmap;
  523. unsigned long *p_addr, *bm;
  524. unsigned long word, bits;
  525. size_t end, do_now;
  526. end = offset + number;
  527. ERR_IF(!b) return;
  528. ERR_IF(!b->bm_pages) return;
  529. if (number == 0)
  530. return;
  531. WARN_ON(offset >= b->bm_words);
  532. WARN_ON(end > b->bm_words);
  533. spin_lock_irq(&b->bm_lock);
  534. while (offset < end) {
  535. do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
  536. p_addr = bm_map_paddr(b, offset);
  537. bm = p_addr + MLPP(offset);
  538. offset += do_now;
  539. while (do_now--) {
  540. bits = hweight_long(*bm);
  541. word = *bm | lel_to_cpu(*buffer++);
  542. *bm++ = word;
  543. b->bm_set += hweight_long(word) - bits;
  544. }
  545. bm_unmap(p_addr);
  546. }
  547. /* with 32bit <-> 64bit cross-platform connect
  548. * this is only correct for current usage,
  549. * where we _know_ that we are 64 bit aligned,
  550. * and know that this function is used in this way, too...
  551. */
  552. if (end == b->bm_words)
  553. b->bm_set -= bm_clear_surplus(b);
  554. spin_unlock_irq(&b->bm_lock);
  555. }
  556. /* copy number words from the bitmap starting at offset into the buffer.
  557. * buffer[i] will be little endian unsigned long.
  558. */
  559. void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
  560. unsigned long *buffer)
  561. {
  562. struct drbd_bitmap *b = mdev->bitmap;
  563. unsigned long *p_addr, *bm;
  564. size_t end, do_now;
  565. end = offset + number;
  566. ERR_IF(!b) return;
  567. ERR_IF(!b->bm_pages) return;
  568. spin_lock_irq(&b->bm_lock);
  569. if ((offset >= b->bm_words) ||
  570. (end > b->bm_words) ||
  571. (number <= 0))
  572. dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n",
  573. (unsigned long) offset,
  574. (unsigned long) number,
  575. (unsigned long) b->bm_words);
  576. else {
  577. while (offset < end) {
  578. do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
  579. p_addr = bm_map_paddr(b, offset);
  580. bm = p_addr + MLPP(offset);
  581. offset += do_now;
  582. while (do_now--)
  583. *buffer++ = cpu_to_lel(*bm++);
  584. bm_unmap(p_addr);
  585. }
  586. }
  587. spin_unlock_irq(&b->bm_lock);
  588. }
  589. /* set all bits in the bitmap */
  590. void drbd_bm_set_all(struct drbd_conf *mdev)
  591. {
  592. struct drbd_bitmap *b = mdev->bitmap;
  593. ERR_IF(!b) return;
  594. ERR_IF(!b->bm_pages) return;
  595. spin_lock_irq(&b->bm_lock);
  596. bm_memset(b, 0, 0xff, b->bm_words);
  597. (void)bm_clear_surplus(b);
  598. b->bm_set = b->bm_bits;
  599. spin_unlock_irq(&b->bm_lock);
  600. }
  601. /* clear all bits in the bitmap */
  602. void drbd_bm_clear_all(struct drbd_conf *mdev)
  603. {
  604. struct drbd_bitmap *b = mdev->bitmap;
  605. ERR_IF(!b) return;
  606. ERR_IF(!b->bm_pages) return;
  607. spin_lock_irq(&b->bm_lock);
  608. bm_memset(b, 0, 0, b->bm_words);
  609. b->bm_set = 0;
  610. spin_unlock_irq(&b->bm_lock);
  611. }
  612. static void bm_async_io_complete(struct bio *bio, int error)
  613. {
  614. struct drbd_bitmap *b = bio->bi_private;
  615. int uptodate = bio_flagged(bio, BIO_UPTODATE);
  616. /* strange behavior of some lower level drivers...
  617. * fail the request by clearing the uptodate flag,
  618. * but do not return any error?!
  619. * do we want to WARN() on this? */
  620. if (!error && !uptodate)
  621. error = -EIO;
  622. if (error) {
  623. /* doh. what now?
  624. * for now, set all bits, and flag MD_IO_ERROR */
  625. __set_bit(BM_MD_IO_ERROR, &b->bm_flags);
  626. }
  627. if (atomic_dec_and_test(&b->bm_async_io))
  628. wake_up(&b->bm_io_wait);
  629. bio_put(bio);
  630. }
  631. static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local)
  632. {
  633. /* we are process context. we always get a bio */
  634. struct bio *bio = bio_alloc(GFP_KERNEL, 1);
  635. unsigned int len;
  636. sector_t on_disk_sector =
  637. mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
  638. on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
  639. /* this might happen with very small
  640. * flexible external meta data device */
  641. len = min_t(unsigned int, PAGE_SIZE,
  642. (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
  643. bio->bi_bdev = mdev->ldev->md_bdev;
  644. bio->bi_sector = on_disk_sector;
  645. bio_add_page(bio, b->bm_pages[page_nr], len, 0);
  646. bio->bi_private = b;
  647. bio->bi_end_io = bm_async_io_complete;
  648. if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
  649. bio->bi_rw |= rw;
  650. bio_endio(bio, -EIO);
  651. } else {
  652. submit_bio(rw, bio);
  653. }
  654. }
  655. # if defined(__LITTLE_ENDIAN)
  656. /* nothing to do, on disk == in memory */
  657. # define bm_cpu_to_lel(x) ((void)0)
  658. # else
  659. void bm_cpu_to_lel(struct drbd_bitmap *b)
  660. {
  661. /* need to cpu_to_lel all the pages ...
  662. * this may be optimized by using
  663. * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
  664. * the following is still not optimal, but better than nothing */
  665. unsigned int i;
  666. unsigned long *p_addr, *bm;
  667. if (b->bm_set == 0) {
  668. /* no page at all; avoid swap if all is 0 */
  669. i = b->bm_number_of_pages;
  670. } else if (b->bm_set == b->bm_bits) {
  671. /* only the last page */
  672. i = b->bm_number_of_pages - 1;
  673. } else {
  674. /* all pages */
  675. i = 0;
  676. }
  677. for (; i < b->bm_number_of_pages; i++) {
  678. p_addr = kmap_atomic(b->bm_pages[i], KM_USER0);
  679. for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++)
  680. *bm = cpu_to_lel(*bm);
  681. kunmap_atomic(p_addr, KM_USER0);
  682. }
  683. }
  684. # endif
  685. /* lel_to_cpu == cpu_to_lel */
  686. # define bm_lel_to_cpu(x) bm_cpu_to_lel(x)
  687. /*
  688. * bm_rw: read/write the whole bitmap from/to its on disk location.
  689. */
  690. static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
  691. {
  692. struct drbd_bitmap *b = mdev->bitmap;
  693. /* sector_t sector; */
  694. int bm_words, num_pages, i;
  695. unsigned long now;
  696. char ppb[10];
  697. int err = 0;
  698. WARN_ON(!bm_is_locked(b));
  699. /* no spinlock here, the drbd_bm_lock should be enough! */
  700. bm_words = drbd_bm_words(mdev);
  701. num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT;
  702. /* on disk bitmap is little endian */
  703. if (rw == WRITE)
  704. bm_cpu_to_lel(b);
  705. now = jiffies;
  706. atomic_set(&b->bm_async_io, num_pages);
  707. __clear_bit(BM_MD_IO_ERROR, &b->bm_flags);
  708. /* let the layers below us try to merge these bios... */
  709. for (i = 0; i < num_pages; i++)
  710. bm_page_io_async(mdev, b, i, rw);
  711. drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
  712. wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
  713. if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
  714. dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
  715. drbd_chk_io_error(mdev, 1, TRUE);
  716. err = -EIO;
  717. }
  718. now = jiffies;
  719. if (rw == WRITE) {
  720. /* swap back endianness */
  721. bm_lel_to_cpu(b);
  722. /* flush bitmap to stable storage */
  723. drbd_md_flush(mdev);
  724. } else /* rw == READ */ {
  725. /* just read, if necessary adjust endianness */
  726. b->bm_set = bm_count_bits_swap_endian(b);
  727. dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
  728. jiffies - now);
  729. }
  730. now = b->bm_set;
  731. dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
  732. ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
  733. return err;
  734. }
  735. /**
  736. * drbd_bm_read() - Read the whole bitmap from its on disk location.
  737. * @mdev: DRBD device.
  738. */
  739. int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
  740. {
  741. return bm_rw(mdev, READ);
  742. }
  743. /**
  744. * drbd_bm_write() - Write the whole bitmap to its on disk location.
  745. * @mdev: DRBD device.
  746. */
  747. int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
  748. {
  749. return bm_rw(mdev, WRITE);
  750. }
  751. /**
  752. * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap
  753. * @mdev: DRBD device.
  754. * @enr: Extent number in the resync lru (happens to be sector offset)
  755. *
  756. * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered
  757. * by a single sector write. Therefore enr == sector offset from the
  758. * start of the bitmap.
  759. */
  760. int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local)
  761. {
  762. sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
  763. + mdev->ldev->md.bm_offset;
  764. int bm_words, num_words, offset;
  765. int err = 0;
  766. mutex_lock(&mdev->md_io_mutex);
  767. bm_words = drbd_bm_words(mdev);
  768. offset = S2W(enr); /* word offset into bitmap */
  769. num_words = min(S2W(1), bm_words - offset);
  770. if (num_words < S2W(1))
  771. memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE);
  772. drbd_bm_get_lel(mdev, offset, num_words,
  773. page_address(mdev->md_io_page));
  774. if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) {
  775. int i;
  776. err = -EIO;
  777. dev_err(DEV, "IO ERROR writing bitmap sector %lu "
  778. "(meta-disk sector %llus)\n",
  779. enr, (unsigned long long)on_disk_sector);
  780. drbd_chk_io_error(mdev, 1, TRUE);
  781. for (i = 0; i < AL_EXT_PER_BM_SECT; i++)
  782. drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i);
  783. }
  784. mdev->bm_writ_cnt++;
  785. mutex_unlock(&mdev->md_io_mutex);
  786. return err;
  787. }
  788. /* NOTE
  789. * find_first_bit returns int, we return unsigned long.
  790. * should not make much difference anyways, but ...
  791. *
  792. * this returns a bit number, NOT a sector!
  793. */
  794. #define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1)
  795. static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
  796. const int find_zero_bit, const enum km_type km)
  797. {
  798. struct drbd_bitmap *b = mdev->bitmap;
  799. unsigned long i = -1UL;
  800. unsigned long *p_addr;
  801. unsigned long bit_offset; /* bit offset of the mapped page. */
  802. if (bm_fo > b->bm_bits) {
  803. dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
  804. } else {
  805. while (bm_fo < b->bm_bits) {
  806. unsigned long offset;
  807. bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */
  808. offset = bit_offset >> LN2_BPL; /* word offset of the page */
  809. p_addr = __bm_map_paddr(b, offset, km);
  810. if (find_zero_bit)
  811. i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
  812. else
  813. i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
  814. __bm_unmap(p_addr, km);
  815. if (i < PAGE_SIZE*8) {
  816. i = bit_offset + i;
  817. if (i >= b->bm_bits)
  818. break;
  819. goto found;
  820. }
  821. bm_fo = bit_offset + PAGE_SIZE*8;
  822. }
  823. i = -1UL;
  824. }
  825. found:
  826. return i;
  827. }
  828. static unsigned long bm_find_next(struct drbd_conf *mdev,
  829. unsigned long bm_fo, const int find_zero_bit)
  830. {
  831. struct drbd_bitmap *b = mdev->bitmap;
  832. unsigned long i = -1UL;
  833. ERR_IF(!b) return i;
  834. ERR_IF(!b->bm_pages) return i;
  835. spin_lock_irq(&b->bm_lock);
  836. if (bm_is_locked(b))
  837. bm_print_lock_info(mdev);
  838. i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
  839. spin_unlock_irq(&b->bm_lock);
  840. return i;
  841. }
  842. unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
  843. {
  844. return bm_find_next(mdev, bm_fo, 0);
  845. }
  846. #if 0
  847. /* not yet needed for anything. */
  848. unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
  849. {
  850. return bm_find_next(mdev, bm_fo, 1);
  851. }
  852. #endif
  853. /* does not spin_lock_irqsave.
  854. * you must take drbd_bm_lock() first */
  855. unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
  856. {
  857. /* WARN_ON(!bm_is_locked(mdev)); */
  858. return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
  859. }
  860. unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
  861. {
  862. /* WARN_ON(!bm_is_locked(mdev)); */
  863. return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
  864. }
  865. /* returns number of bits actually changed.
  866. * for val != 0, we change 0 -> 1, return code positive
  867. * for val == 0, we change 1 -> 0, return code negative
  868. * wants bitnr, not sector.
  869. * expected to be called for only a few bits (e - s about BITS_PER_LONG).
  870. * Must hold bitmap lock already. */
  871. int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
  872. unsigned long e, int val, const enum km_type km)
  873. {
  874. struct drbd_bitmap *b = mdev->bitmap;
  875. unsigned long *p_addr = NULL;
  876. unsigned long bitnr;
  877. unsigned long last_page_nr = -1UL;
  878. int c = 0;
  879. if (e >= b->bm_bits) {
  880. dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
  881. s, e, b->bm_bits);
  882. e = b->bm_bits ? b->bm_bits -1 : 0;
  883. }
  884. for (bitnr = s; bitnr <= e; bitnr++) {
  885. unsigned long offset = bitnr>>LN2_BPL;
  886. unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
  887. if (page_nr != last_page_nr) {
  888. if (p_addr)
  889. __bm_unmap(p_addr, km);
  890. p_addr = __bm_map_paddr(b, offset, km);
  891. last_page_nr = page_nr;
  892. }
  893. if (val)
  894. c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr));
  895. else
  896. c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr));
  897. }
  898. if (p_addr)
  899. __bm_unmap(p_addr, km);
  900. b->bm_set += c;
  901. return c;
  902. }
  903. /* returns number of bits actually changed.
  904. * for val != 0, we change 0 -> 1, return code positive
  905. * for val == 0, we change 1 -> 0, return code negative
  906. * wants bitnr, not sector */
  907. int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
  908. const unsigned long e, int val)
  909. {
  910. unsigned long flags;
  911. struct drbd_bitmap *b = mdev->bitmap;
  912. int c = 0;
  913. ERR_IF(!b) return 1;
  914. ERR_IF(!b->bm_pages) return 0;
  915. spin_lock_irqsave(&b->bm_lock, flags);
  916. if (bm_is_locked(b))
  917. bm_print_lock_info(mdev);
  918. c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1);
  919. spin_unlock_irqrestore(&b->bm_lock, flags);
  920. return c;
  921. }
  922. /* returns number of bits changed 0 -> 1 */
  923. int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
  924. {
  925. return bm_change_bits_to(mdev, s, e, 1);
  926. }
  927. /* returns number of bits changed 1 -> 0 */
  928. int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
  929. {
  930. return -bm_change_bits_to(mdev, s, e, 0);
  931. }
  932. /* sets all bits in full words,
  933. * from first_word up to, but not including, last_word */
  934. static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
  935. int page_nr, int first_word, int last_word)
  936. {
  937. int i;
  938. int bits;
  939. unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0);
  940. for (i = first_word; i < last_word; i++) {
  941. bits = hweight_long(paddr[i]);
  942. paddr[i] = ~0UL;
  943. b->bm_set += BITS_PER_LONG - bits;
  944. }
  945. kunmap_atomic(paddr, KM_USER0);
  946. }
  947. /* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave.
  948. * You must first drbd_bm_lock().
  949. * Can be called to set the whole bitmap in one go.
  950. * Sets bits from s to e _inclusive_. */
  951. void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
  952. {
  953. /* First set_bit from the first bit (s)
  954. * up to the next long boundary (sl),
  955. * then assign full words up to the last long boundary (el),
  956. * then set_bit up to and including the last bit (e).
  957. *
  958. * Do not use memset, because we must account for changes,
  959. * so we need to loop over the words with hweight() anyways.
  960. */
  961. unsigned long sl = ALIGN(s,BITS_PER_LONG);
  962. unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
  963. int first_page;
  964. int last_page;
  965. int page_nr;
  966. int first_word;
  967. int last_word;
  968. if (e - s <= 3*BITS_PER_LONG) {
  969. /* don't bother; el and sl may even be wrong. */
  970. __bm_change_bits_to(mdev, s, e, 1, KM_USER0);
  971. return;
  972. }
  973. /* difference is large enough that we can trust sl and el */
  974. /* bits filling the current long */
  975. if (sl)
  976. __bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0);
  977. first_page = sl >> (3 + PAGE_SHIFT);
  978. last_page = el >> (3 + PAGE_SHIFT);
  979. /* MLPP: modulo longs per page */
  980. /* LWPP: long words per page */
  981. first_word = MLPP(sl >> LN2_BPL);
  982. last_word = LWPP;
  983. /* first and full pages, unless first page == last page */
  984. for (page_nr = first_page; page_nr < last_page; page_nr++) {
  985. bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
  986. cond_resched();
  987. first_word = 0;
  988. }
  989. /* last page (respectively only page, for first page == last page) */
  990. last_word = MLPP(el >> LN2_BPL);
  991. bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
  992. /* possibly trailing bits.
  993. * example: (e & 63) == 63, el will be e+1.
  994. * if that even was the very last bit,
  995. * it would trigger an assert in __bm_change_bits_to()
  996. */
  997. if (el <= e)
  998. __bm_change_bits_to(mdev, el, e, 1, KM_USER0);
  999. }
  1000. /* returns bit state
  1001. * wants bitnr, NOT sector.
  1002. * inherently racy... area needs to be locked by means of {al,rs}_lru
  1003. * 1 ... bit set
  1004. * 0 ... bit not set
  1005. * -1 ... first out of bounds access, stop testing for bits!
  1006. */
  1007. int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
  1008. {
  1009. unsigned long flags;
  1010. struct drbd_bitmap *b = mdev->bitmap;
  1011. unsigned long *p_addr;
  1012. int i;
  1013. ERR_IF(!b) return 0;
  1014. ERR_IF(!b->bm_pages) return 0;
  1015. spin_lock_irqsave(&b->bm_lock, flags);
  1016. if (bm_is_locked(b))
  1017. bm_print_lock_info(mdev);
  1018. if (bitnr < b->bm_bits) {
  1019. unsigned long offset = bitnr>>LN2_BPL;
  1020. p_addr = bm_map_paddr(b, offset);
  1021. i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0;
  1022. bm_unmap(p_addr);
  1023. } else if (bitnr == b->bm_bits) {
  1024. i = -1;
  1025. } else { /* (bitnr > b->bm_bits) */
  1026. dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
  1027. i = 0;
  1028. }
  1029. spin_unlock_irqrestore(&b->bm_lock, flags);
  1030. return i;
  1031. }
  1032. /* returns number of bits set in the range [s, e] */
  1033. int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
  1034. {
  1035. unsigned long flags;
  1036. struct drbd_bitmap *b = mdev->bitmap;
  1037. unsigned long *p_addr = NULL, page_nr = -1;
  1038. unsigned long bitnr;
  1039. int c = 0;
  1040. size_t w;
  1041. /* If this is called without a bitmap, that is a bug. But just to be
  1042. * robust in case we screwed up elsewhere, in that case pretend there
  1043. * was one dirty bit in the requested area, so we won't try to do a
  1044. * local read there (no bitmap probably implies no disk) */
  1045. ERR_IF(!b) return 1;
  1046. ERR_IF(!b->bm_pages) return 1;
  1047. spin_lock_irqsave(&b->bm_lock, flags);
  1048. if (bm_is_locked(b))
  1049. bm_print_lock_info(mdev);
  1050. for (bitnr = s; bitnr <= e; bitnr++) {
  1051. w = bitnr >> LN2_BPL;
  1052. if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) {
  1053. page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3);
  1054. if (p_addr)
  1055. bm_unmap(p_addr);
  1056. p_addr = bm_map_paddr(b, w);
  1057. }
  1058. ERR_IF (bitnr >= b->bm_bits) {
  1059. dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
  1060. } else {
  1061. c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
  1062. }
  1063. }
  1064. if (p_addr)
  1065. bm_unmap(p_addr);
  1066. spin_unlock_irqrestore(&b->bm_lock, flags);
  1067. return c;
  1068. }
  1069. /* inherently racy...
  1070. * return value may be already out-of-date when this function returns.
  1071. * but the general usage is that this is only use during a cstate when bits are
  1072. * only cleared, not set, and typically only care for the case when the return
  1073. * value is zero, or we already "locked" this "bitmap extent" by other means.
  1074. *
  1075. * enr is bm-extent number, since we chose to name one sector (512 bytes)
  1076. * worth of the bitmap a "bitmap extent".
  1077. *
  1078. * TODO
  1079. * I think since we use it like a reference count, we should use the real
  1080. * reference count of some bitmap extent element from some lru instead...
  1081. *
  1082. */
  1083. int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
  1084. {
  1085. struct drbd_bitmap *b = mdev->bitmap;
  1086. int count, s, e;
  1087. unsigned long flags;
  1088. unsigned long *p_addr, *bm;
  1089. ERR_IF(!b) return 0;
  1090. ERR_IF(!b->bm_pages) return 0;
  1091. spin_lock_irqsave(&b->bm_lock, flags);
  1092. if (bm_is_locked(b))
  1093. bm_print_lock_info(mdev);
  1094. s = S2W(enr);
  1095. e = min((size_t)S2W(enr+1), b->bm_words);
  1096. count = 0;
  1097. if (s < b->bm_words) {
  1098. int n = e-s;
  1099. p_addr = bm_map_paddr(b, s);
  1100. bm = p_addr + MLPP(s);
  1101. while (n--)
  1102. count += hweight_long(*bm++);
  1103. bm_unmap(p_addr);
  1104. } else {
  1105. dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s);
  1106. }
  1107. spin_unlock_irqrestore(&b->bm_lock, flags);
  1108. return count;
  1109. }
  1110. /* set all bits covered by the AL-extent al_enr */
  1111. unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
  1112. {
  1113. struct drbd_bitmap *b = mdev->bitmap;
  1114. unsigned long *p_addr, *bm;
  1115. unsigned long weight;
  1116. int count, s, e, i, do_now;
  1117. ERR_IF(!b) return 0;
  1118. ERR_IF(!b->bm_pages) return 0;
  1119. spin_lock_irq(&b->bm_lock);
  1120. if (bm_is_locked(b))
  1121. bm_print_lock_info(mdev);
  1122. weight = b->bm_set;
  1123. s = al_enr * BM_WORDS_PER_AL_EXT;
  1124. e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
  1125. /* assert that s and e are on the same page */
  1126. D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
  1127. == s >> (PAGE_SHIFT - LN2_BPL + 3));
  1128. count = 0;
  1129. if (s < b->bm_words) {
  1130. i = do_now = e-s;
  1131. p_addr = bm_map_paddr(b, s);
  1132. bm = p_addr + MLPP(s);
  1133. while (i--) {
  1134. count += hweight_long(*bm);
  1135. *bm = -1UL;
  1136. bm++;
  1137. }
  1138. bm_unmap(p_addr);
  1139. b->bm_set += do_now*BITS_PER_LONG - count;
  1140. if (e == b->bm_words)
  1141. b->bm_set -= bm_clear_surplus(b);
  1142. } else {
  1143. dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s);
  1144. }
  1145. weight = b->bm_set - weight;
  1146. spin_unlock_irq(&b->bm_lock);
  1147. return weight;
  1148. }