drbd_bitmap.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328
  1. /*
  2. drbd_bitmap.c
  3. This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
  4. Copyright (C) 2004-2008, LINBIT Information Technologies GmbH.
  5. Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>.
  6. Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
  7. drbd is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 2, or (at your option)
  10. any later version.
  11. drbd is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with drbd; see the file COPYING. If not, write to
  17. the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  18. */
  19. #include <linux/bitops.h>
  20. #include <linux/vmalloc.h>
  21. #include <linux/string.h>
  22. #include <linux/drbd.h>
  23. #include <linux/slab.h>
  24. #include <asm/kmap_types.h>
  25. #include "drbd_int.h"
  26. /* OPAQUE outside this file!
  27. * interface defined in drbd_int.h
  28. * convention:
  29. * function name drbd_bm_... => used elsewhere, "public".
  30. * function name bm_... => internal to implementation, "private".
  31. * Note that since find_first_bit returns int, at the current granularity of
  32. * the bitmap (4KB per byte), this implementation "only" supports up to
  33. * 1<<(32+12) == 16 TB...
  34. */
  35. /*
  36. * NOTE
  37. * Access to the *bm_pages is protected by bm_lock.
  38. * It is safe to read the other members within the lock.
  39. *
  40. * drbd_bm_set_bits is called from bio_endio callbacks,
  41. * We may be called with irq already disabled,
  42. * so we need spin_lock_irqsave().
  43. * And we need the kmap_atomic.
  44. */
  45. struct drbd_bitmap {
  46. struct page **bm_pages;
  47. spinlock_t bm_lock;
  48. /* WARNING unsigned long bm_*:
  49. * 32bit number of bit offset is just enough for 512 MB bitmap.
  50. * it will blow up if we make the bitmap bigger...
  51. * not that it makes much sense to have a bitmap that large,
  52. * rather change the granularity to 16k or 64k or something.
  53. * (that implies other problems, however...)
  54. */
  55. unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */
  56. unsigned long bm_bits;
  57. size_t bm_words;
  58. size_t bm_number_of_pages;
  59. sector_t bm_dev_capacity;
  60. struct mutex bm_change; /* serializes resize operations */
  61. atomic_t bm_async_io;
  62. wait_queue_head_t bm_io_wait;
  63. unsigned long bm_flags;
  64. /* debugging aid, in case we are still racy somewhere */
  65. char *bm_why;
  66. struct task_struct *bm_task;
  67. };
  68. /* definition of bits in bm_flags */
  69. #define BM_LOCKED 0
  70. #define BM_MD_IO_ERROR 1
  71. #define BM_P_VMALLOCED 2
  72. static int bm_is_locked(struct drbd_bitmap *b)
  73. {
  74. return test_bit(BM_LOCKED, &b->bm_flags);
  75. }
  76. #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
  77. static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
  78. {
  79. struct drbd_bitmap *b = mdev->bitmap;
  80. if (!__ratelimit(&drbd_ratelimit_state))
  81. return;
  82. dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
  83. current == mdev->receiver.task ? "receiver" :
  84. current == mdev->asender.task ? "asender" :
  85. current == mdev->worker.task ? "worker" : current->comm,
  86. func, b->bm_why ?: "?",
  87. b->bm_task == mdev->receiver.task ? "receiver" :
  88. b->bm_task == mdev->asender.task ? "asender" :
  89. b->bm_task == mdev->worker.task ? "worker" : "?");
  90. }
  91. void drbd_bm_lock(struct drbd_conf *mdev, char *why)
  92. {
  93. struct drbd_bitmap *b = mdev->bitmap;
  94. int trylock_failed;
  95. if (!b) {
  96. dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n");
  97. return;
  98. }
  99. trylock_failed = !mutex_trylock(&b->bm_change);
  100. if (trylock_failed) {
  101. dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
  102. current == mdev->receiver.task ? "receiver" :
  103. current == mdev->asender.task ? "asender" :
  104. current == mdev->worker.task ? "worker" : current->comm,
  105. why, b->bm_why ?: "?",
  106. b->bm_task == mdev->receiver.task ? "receiver" :
  107. b->bm_task == mdev->asender.task ? "asender" :
  108. b->bm_task == mdev->worker.task ? "worker" : "?");
  109. mutex_lock(&b->bm_change);
  110. }
  111. if (__test_and_set_bit(BM_LOCKED, &b->bm_flags))
  112. dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
  113. b->bm_why = why;
  114. b->bm_task = current;
  115. }
  116. void drbd_bm_unlock(struct drbd_conf *mdev)
  117. {
  118. struct drbd_bitmap *b = mdev->bitmap;
  119. if (!b) {
  120. dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n");
  121. return;
  122. }
  123. if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags))
  124. dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
  125. b->bm_why = NULL;
  126. b->bm_task = NULL;
  127. mutex_unlock(&b->bm_change);
  128. }
  129. /* word offset to long pointer */
  130. static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km)
  131. {
  132. struct page *page;
  133. unsigned long page_nr;
  134. /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */
  135. page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
  136. BUG_ON(page_nr >= b->bm_number_of_pages);
  137. page = b->bm_pages[page_nr];
  138. return (unsigned long *) kmap_atomic(page, km);
  139. }
  140. static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset)
  141. {
  142. return __bm_map_paddr(b, offset, KM_IRQ1);
  143. }
  144. static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
  145. {
  146. kunmap_atomic(p_addr, km);
  147. };
  148. static void bm_unmap(unsigned long *p_addr)
  149. {
  150. return __bm_unmap(p_addr, KM_IRQ1);
  151. }
  152. /* long word offset of _bitmap_ sector */
  153. #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
  154. /* word offset from start of bitmap to word number _in_page_
  155. * modulo longs per page
  156. #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
  157. hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
  158. so do it explicitly:
  159. */
  160. #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
  161. /* Long words per page */
  162. #define LWPP (PAGE_SIZE/sizeof(long))
  163. /*
  164. * actually most functions herein should take a struct drbd_bitmap*, not a
  165. * struct drbd_conf*, but for the debug macros I like to have the mdev around
  166. * to be able to report device specific.
  167. */
  168. static void bm_free_pages(struct page **pages, unsigned long number)
  169. {
  170. unsigned long i;
  171. if (!pages)
  172. return;
  173. for (i = 0; i < number; i++) {
  174. if (!pages[i]) {
  175. printk(KERN_ALERT "drbd: bm_free_pages tried to free "
  176. "a NULL pointer; i=%lu n=%lu\n",
  177. i, number);
  178. continue;
  179. }
  180. __free_page(pages[i]);
  181. pages[i] = NULL;
  182. }
  183. }
  184. static void bm_vk_free(void *ptr, int v)
  185. {
  186. if (v)
  187. vfree(ptr);
  188. else
  189. kfree(ptr);
  190. }
  191. /*
  192. * "have" and "want" are NUMBER OF PAGES.
  193. */
  194. static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
  195. {
  196. struct page **old_pages = b->bm_pages;
  197. struct page **new_pages, *page;
  198. unsigned int i, bytes, vmalloced = 0;
  199. unsigned long have = b->bm_number_of_pages;
  200. BUG_ON(have == 0 && old_pages != NULL);
  201. BUG_ON(have != 0 && old_pages == NULL);
  202. if (have == want)
  203. return old_pages;
  204. /* Trying kmalloc first, falling back to vmalloc.
  205. * GFP_KERNEL is ok, as this is done when a lower level disk is
  206. * "attached" to the drbd. Context is receiver thread or cqueue
  207. * thread. As we have no disk yet, we are not in the IO path,
  208. * not even the IO path of the peer. */
  209. bytes = sizeof(struct page *)*want;
  210. new_pages = kmalloc(bytes, GFP_KERNEL);
  211. if (!new_pages) {
  212. new_pages = vmalloc(bytes);
  213. if (!new_pages)
  214. return NULL;
  215. vmalloced = 1;
  216. }
  217. memset(new_pages, 0, bytes);
  218. if (want >= have) {
  219. for (i = 0; i < have; i++)
  220. new_pages[i] = old_pages[i];
  221. for (; i < want; i++) {
  222. page = alloc_page(GFP_HIGHUSER);
  223. if (!page) {
  224. bm_free_pages(new_pages + have, i - have);
  225. bm_vk_free(new_pages, vmalloced);
  226. return NULL;
  227. }
  228. new_pages[i] = page;
  229. }
  230. } else {
  231. for (i = 0; i < want; i++)
  232. new_pages[i] = old_pages[i];
  233. /* NOT HERE, we are outside the spinlock!
  234. bm_free_pages(old_pages + want, have - want);
  235. */
  236. }
  237. if (vmalloced)
  238. set_bit(BM_P_VMALLOCED, &b->bm_flags);
  239. else
  240. clear_bit(BM_P_VMALLOCED, &b->bm_flags);
  241. return new_pages;
  242. }
  243. /*
  244. * called on driver init only. TODO call when a device is created.
  245. * allocates the drbd_bitmap, and stores it in mdev->bitmap.
  246. */
  247. int drbd_bm_init(struct drbd_conf *mdev)
  248. {
  249. struct drbd_bitmap *b = mdev->bitmap;
  250. WARN_ON(b != NULL);
  251. b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL);
  252. if (!b)
  253. return -ENOMEM;
  254. spin_lock_init(&b->bm_lock);
  255. mutex_init(&b->bm_change);
  256. init_waitqueue_head(&b->bm_io_wait);
  257. mdev->bitmap = b;
  258. return 0;
  259. }
  260. sector_t drbd_bm_capacity(struct drbd_conf *mdev)
  261. {
  262. ERR_IF(!mdev->bitmap) return 0;
  263. return mdev->bitmap->bm_dev_capacity;
  264. }
  265. /* called on driver unload. TODO: call when a device is destroyed.
  266. */
  267. void drbd_bm_cleanup(struct drbd_conf *mdev)
  268. {
  269. ERR_IF (!mdev->bitmap) return;
  270. bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
  271. bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags));
  272. kfree(mdev->bitmap);
  273. mdev->bitmap = NULL;
  274. }
  275. /*
  276. * since (b->bm_bits % BITS_PER_LONG) != 0,
  277. * this masks out the remaining bits.
  278. * Returns the number of bits cleared.
  279. */
  280. static int bm_clear_surplus(struct drbd_bitmap *b)
  281. {
  282. const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
  283. size_t w = b->bm_bits >> LN2_BPL;
  284. int cleared = 0;
  285. unsigned long *p_addr, *bm;
  286. p_addr = bm_map_paddr(b, w);
  287. bm = p_addr + MLPP(w);
  288. if (w < b->bm_words) {
  289. cleared = hweight_long(*bm & ~mask);
  290. *bm &= mask;
  291. w++; bm++;
  292. }
  293. if (w < b->bm_words) {
  294. cleared += hweight_long(*bm);
  295. *bm = 0;
  296. }
  297. bm_unmap(p_addr);
  298. return cleared;
  299. }
  300. static void bm_set_surplus(struct drbd_bitmap *b)
  301. {
  302. const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
  303. size_t w = b->bm_bits >> LN2_BPL;
  304. unsigned long *p_addr, *bm;
  305. p_addr = bm_map_paddr(b, w);
  306. bm = p_addr + MLPP(w);
  307. if (w < b->bm_words) {
  308. *bm |= ~mask;
  309. bm++; w++;
  310. }
  311. if (w < b->bm_words) {
  312. *bm = ~(0UL);
  313. }
  314. bm_unmap(p_addr);
  315. }
  316. static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian)
  317. {
  318. unsigned long *p_addr, *bm, offset = 0;
  319. unsigned long bits = 0;
  320. unsigned long i, do_now;
  321. while (offset < b->bm_words) {
  322. i = do_now = min_t(size_t, b->bm_words-offset, LWPP);
  323. p_addr = __bm_map_paddr(b, offset, KM_USER0);
  324. bm = p_addr + MLPP(offset);
  325. while (i--) {
  326. #ifndef __LITTLE_ENDIAN
  327. if (swap_endian)
  328. *bm = lel_to_cpu(*bm);
  329. #endif
  330. bits += hweight_long(*bm++);
  331. }
  332. __bm_unmap(p_addr, KM_USER0);
  333. offset += do_now;
  334. cond_resched();
  335. }
  336. return bits;
  337. }
  338. static unsigned long bm_count_bits(struct drbd_bitmap *b)
  339. {
  340. return __bm_count_bits(b, 0);
  341. }
  342. static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b)
  343. {
  344. return __bm_count_bits(b, 1);
  345. }
  346. /* offset and len in long words.*/
  347. static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
  348. {
  349. unsigned long *p_addr, *bm;
  350. size_t do_now, end;
  351. #define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512)
  352. end = offset + len;
  353. if (end > b->bm_words) {
  354. printk(KERN_ALERT "drbd: bm_memset end > bm_words\n");
  355. return;
  356. }
  357. while (offset < end) {
  358. do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
  359. p_addr = bm_map_paddr(b, offset);
  360. bm = p_addr + MLPP(offset);
  361. if (bm+do_now > p_addr + LWPP) {
  362. printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
  363. p_addr, bm, (int)do_now);
  364. break; /* breaks to after catch_oob_access_end() only! */
  365. }
  366. memset(bm, c, do_now * sizeof(long));
  367. bm_unmap(p_addr);
  368. offset += do_now;
  369. }
  370. }
  371. /*
  372. * make sure the bitmap has enough room for the attached storage,
  373. * if necessary, resize.
  374. * called whenever we may have changed the device size.
  375. * returns -ENOMEM if we could not allocate enough memory, 0 on success.
  376. * In case this is actually a resize, we copy the old bitmap into the new one.
  377. * Otherwise, the bitmap is initialized to all bits set.
  378. */
  379. int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
  380. {
  381. struct drbd_bitmap *b = mdev->bitmap;
  382. unsigned long bits, words, owords, obits, *p_addr, *bm;
  383. unsigned long want, have, onpages; /* number of pages */
  384. struct page **npages, **opages = NULL;
  385. int err = 0, growing;
  386. int opages_vmalloced;
  387. ERR_IF(!b) return -ENOMEM;
  388. drbd_bm_lock(mdev, "resize");
  389. dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
  390. (unsigned long long)capacity);
  391. if (capacity == b->bm_dev_capacity)
  392. goto out;
  393. opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags);
  394. if (capacity == 0) {
  395. spin_lock_irq(&b->bm_lock);
  396. opages = b->bm_pages;
  397. onpages = b->bm_number_of_pages;
  398. owords = b->bm_words;
  399. b->bm_pages = NULL;
  400. b->bm_number_of_pages =
  401. b->bm_set =
  402. b->bm_bits =
  403. b->bm_words =
  404. b->bm_dev_capacity = 0;
  405. spin_unlock_irq(&b->bm_lock);
  406. bm_free_pages(opages, onpages);
  407. bm_vk_free(opages, opages_vmalloced);
  408. goto out;
  409. }
  410. bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
  411. /* if we would use
  412. words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL;
  413. a 32bit host could present the wrong number of words
  414. to a 64bit host.
  415. */
  416. words = ALIGN(bits, 64) >> LN2_BPL;
  417. if (get_ldev(mdev)) {
  418. D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12));
  419. put_ldev(mdev);
  420. }
  421. /* one extra long to catch off by one errors */
  422. want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
  423. have = b->bm_number_of_pages;
  424. if (want == have) {
  425. D_ASSERT(b->bm_pages != NULL);
  426. npages = b->bm_pages;
  427. } else {
  428. if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC))
  429. npages = NULL;
  430. else
  431. npages = bm_realloc_pages(b, want);
  432. }
  433. if (!npages) {
  434. err = -ENOMEM;
  435. goto out;
  436. }
  437. spin_lock_irq(&b->bm_lock);
  438. opages = b->bm_pages;
  439. owords = b->bm_words;
  440. obits = b->bm_bits;
  441. growing = bits > obits;
  442. if (opages)
  443. bm_set_surplus(b);
  444. b->bm_pages = npages;
  445. b->bm_number_of_pages = want;
  446. b->bm_bits = bits;
  447. b->bm_words = words;
  448. b->bm_dev_capacity = capacity;
  449. if (growing) {
  450. bm_memset(b, owords, 0xff, words-owords);
  451. b->bm_set += bits - obits;
  452. }
  453. if (want < have) {
  454. /* implicit: (opages != NULL) && (opages != npages) */
  455. bm_free_pages(opages + want, have - want);
  456. }
  457. p_addr = bm_map_paddr(b, words);
  458. bm = p_addr + MLPP(words);
  459. *bm = DRBD_MAGIC;
  460. bm_unmap(p_addr);
  461. (void)bm_clear_surplus(b);
  462. spin_unlock_irq(&b->bm_lock);
  463. if (opages != npages)
  464. bm_vk_free(opages, opages_vmalloced);
  465. if (!growing)
  466. b->bm_set = bm_count_bits(b);
  467. dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words);
  468. out:
  469. drbd_bm_unlock(mdev);
  470. return err;
  471. }
  472. /* inherently racy:
  473. * if not protected by other means, return value may be out of date when
  474. * leaving this function...
  475. * we still need to lock it, since it is important that this returns
  476. * bm_set == 0 precisely.
  477. *
  478. * maybe bm_set should be atomic_t ?
  479. */
  480. static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
  481. {
  482. struct drbd_bitmap *b = mdev->bitmap;
  483. unsigned long s;
  484. unsigned long flags;
  485. ERR_IF(!b) return 0;
  486. ERR_IF(!b->bm_pages) return 0;
  487. spin_lock_irqsave(&b->bm_lock, flags);
  488. s = b->bm_set;
  489. spin_unlock_irqrestore(&b->bm_lock, flags);
  490. return s;
  491. }
  492. unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
  493. {
  494. unsigned long s;
  495. /* if I don't have a disk, I don't know about out-of-sync status */
  496. if (!get_ldev_if_state(mdev, D_NEGOTIATING))
  497. return 0;
  498. s = _drbd_bm_total_weight(mdev);
  499. put_ldev(mdev);
  500. return s;
  501. }
  502. size_t drbd_bm_words(struct drbd_conf *mdev)
  503. {
  504. struct drbd_bitmap *b = mdev->bitmap;
  505. ERR_IF(!b) return 0;
  506. ERR_IF(!b->bm_pages) return 0;
  507. return b->bm_words;
  508. }
  509. unsigned long drbd_bm_bits(struct drbd_conf *mdev)
  510. {
  511. struct drbd_bitmap *b = mdev->bitmap;
  512. ERR_IF(!b) return 0;
  513. return b->bm_bits;
  514. }
  515. /* merge number words from buffer into the bitmap starting at offset.
  516. * buffer[i] is expected to be little endian unsigned long.
  517. * bitmap must be locked by drbd_bm_lock.
  518. * currently only used from receive_bitmap.
  519. */
  520. void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
  521. unsigned long *buffer)
  522. {
  523. struct drbd_bitmap *b = mdev->bitmap;
  524. unsigned long *p_addr, *bm;
  525. unsigned long word, bits;
  526. size_t end, do_now;
  527. end = offset + number;
  528. ERR_IF(!b) return;
  529. ERR_IF(!b->bm_pages) return;
  530. if (number == 0)
  531. return;
  532. WARN_ON(offset >= b->bm_words);
  533. WARN_ON(end > b->bm_words);
  534. spin_lock_irq(&b->bm_lock);
  535. while (offset < end) {
  536. do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
  537. p_addr = bm_map_paddr(b, offset);
  538. bm = p_addr + MLPP(offset);
  539. offset += do_now;
  540. while (do_now--) {
  541. bits = hweight_long(*bm);
  542. word = *bm | lel_to_cpu(*buffer++);
  543. *bm++ = word;
  544. b->bm_set += hweight_long(word) - bits;
  545. }
  546. bm_unmap(p_addr);
  547. }
  548. /* with 32bit <-> 64bit cross-platform connect
  549. * this is only correct for current usage,
  550. * where we _know_ that we are 64 bit aligned,
  551. * and know that this function is used in this way, too...
  552. */
  553. if (end == b->bm_words)
  554. b->bm_set -= bm_clear_surplus(b);
  555. spin_unlock_irq(&b->bm_lock);
  556. }
  557. /* copy number words from the bitmap starting at offset into the buffer.
  558. * buffer[i] will be little endian unsigned long.
  559. */
  560. void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
  561. unsigned long *buffer)
  562. {
  563. struct drbd_bitmap *b = mdev->bitmap;
  564. unsigned long *p_addr, *bm;
  565. size_t end, do_now;
  566. end = offset + number;
  567. ERR_IF(!b) return;
  568. ERR_IF(!b->bm_pages) return;
  569. spin_lock_irq(&b->bm_lock);
  570. if ((offset >= b->bm_words) ||
  571. (end > b->bm_words) ||
  572. (number <= 0))
  573. dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n",
  574. (unsigned long) offset,
  575. (unsigned long) number,
  576. (unsigned long) b->bm_words);
  577. else {
  578. while (offset < end) {
  579. do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
  580. p_addr = bm_map_paddr(b, offset);
  581. bm = p_addr + MLPP(offset);
  582. offset += do_now;
  583. while (do_now--)
  584. *buffer++ = cpu_to_lel(*bm++);
  585. bm_unmap(p_addr);
  586. }
  587. }
  588. spin_unlock_irq(&b->bm_lock);
  589. }
  590. /* set all bits in the bitmap */
  591. void drbd_bm_set_all(struct drbd_conf *mdev)
  592. {
  593. struct drbd_bitmap *b = mdev->bitmap;
  594. ERR_IF(!b) return;
  595. ERR_IF(!b->bm_pages) return;
  596. spin_lock_irq(&b->bm_lock);
  597. bm_memset(b, 0, 0xff, b->bm_words);
  598. (void)bm_clear_surplus(b);
  599. b->bm_set = b->bm_bits;
  600. spin_unlock_irq(&b->bm_lock);
  601. }
  602. /* clear all bits in the bitmap */
  603. void drbd_bm_clear_all(struct drbd_conf *mdev)
  604. {
  605. struct drbd_bitmap *b = mdev->bitmap;
  606. ERR_IF(!b) return;
  607. ERR_IF(!b->bm_pages) return;
  608. spin_lock_irq(&b->bm_lock);
  609. bm_memset(b, 0, 0, b->bm_words);
  610. b->bm_set = 0;
  611. spin_unlock_irq(&b->bm_lock);
  612. }
  613. static void bm_async_io_complete(struct bio *bio, int error)
  614. {
  615. struct drbd_bitmap *b = bio->bi_private;
  616. int uptodate = bio_flagged(bio, BIO_UPTODATE);
  617. /* strange behavior of some lower level drivers...
  618. * fail the request by clearing the uptodate flag,
  619. * but do not return any error?!
  620. * do we want to WARN() on this? */
  621. if (!error && !uptodate)
  622. error = -EIO;
  623. if (error) {
  624. /* doh. what now?
  625. * for now, set all bits, and flag MD_IO_ERROR */
  626. __set_bit(BM_MD_IO_ERROR, &b->bm_flags);
  627. }
  628. if (atomic_dec_and_test(&b->bm_async_io))
  629. wake_up(&b->bm_io_wait);
  630. bio_put(bio);
  631. }
  632. static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local)
  633. {
  634. /* we are process context. we always get a bio */
  635. struct bio *bio = bio_alloc(GFP_KERNEL, 1);
  636. unsigned int len;
  637. sector_t on_disk_sector =
  638. mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
  639. on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
  640. /* this might happen with very small
  641. * flexible external meta data device */
  642. len = min_t(unsigned int, PAGE_SIZE,
  643. (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
  644. bio->bi_bdev = mdev->ldev->md_bdev;
  645. bio->bi_sector = on_disk_sector;
  646. bio_add_page(bio, b->bm_pages[page_nr], len, 0);
  647. bio->bi_private = b;
  648. bio->bi_end_io = bm_async_io_complete;
  649. if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
  650. bio->bi_rw |= rw;
  651. bio_endio(bio, -EIO);
  652. } else {
  653. submit_bio(rw, bio);
  654. }
  655. }
  656. # if defined(__LITTLE_ENDIAN)
  657. /* nothing to do, on disk == in memory */
  658. # define bm_cpu_to_lel(x) ((void)0)
  659. # else
  660. void bm_cpu_to_lel(struct drbd_bitmap *b)
  661. {
  662. /* need to cpu_to_lel all the pages ...
  663. * this may be optimized by using
  664. * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
  665. * the following is still not optimal, but better than nothing */
  666. unsigned int i;
  667. unsigned long *p_addr, *bm;
  668. if (b->bm_set == 0) {
  669. /* no page at all; avoid swap if all is 0 */
  670. i = b->bm_number_of_pages;
  671. } else if (b->bm_set == b->bm_bits) {
  672. /* only the last page */
  673. i = b->bm_number_of_pages - 1;
  674. } else {
  675. /* all pages */
  676. i = 0;
  677. }
  678. for (; i < b->bm_number_of_pages; i++) {
  679. p_addr = kmap_atomic(b->bm_pages[i], KM_USER0);
  680. for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++)
  681. *bm = cpu_to_lel(*bm);
  682. kunmap_atomic(p_addr, KM_USER0);
  683. }
  684. }
  685. # endif
  686. /* lel_to_cpu == cpu_to_lel */
  687. # define bm_lel_to_cpu(x) bm_cpu_to_lel(x)
  688. /*
  689. * bm_rw: read/write the whole bitmap from/to its on disk location.
  690. */
  691. static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
  692. {
  693. struct drbd_bitmap *b = mdev->bitmap;
  694. /* sector_t sector; */
  695. int bm_words, num_pages, i;
  696. unsigned long now;
  697. char ppb[10];
  698. int err = 0;
  699. WARN_ON(!bm_is_locked(b));
  700. /* no spinlock here, the drbd_bm_lock should be enough! */
  701. bm_words = drbd_bm_words(mdev);
  702. num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT;
  703. /* on disk bitmap is little endian */
  704. if (rw == WRITE)
  705. bm_cpu_to_lel(b);
  706. now = jiffies;
  707. atomic_set(&b->bm_async_io, num_pages);
  708. __clear_bit(BM_MD_IO_ERROR, &b->bm_flags);
  709. /* let the layers below us try to merge these bios... */
  710. for (i = 0; i < num_pages; i++)
  711. bm_page_io_async(mdev, b, i, rw);
  712. drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
  713. wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
  714. if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
  715. dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
  716. drbd_chk_io_error(mdev, 1, TRUE);
  717. err = -EIO;
  718. }
  719. now = jiffies;
  720. if (rw == WRITE) {
  721. /* swap back endianness */
  722. bm_lel_to_cpu(b);
  723. /* flush bitmap to stable storage */
  724. drbd_md_flush(mdev);
  725. } else /* rw == READ */ {
  726. /* just read, if necessary adjust endianness */
  727. b->bm_set = bm_count_bits_swap_endian(b);
  728. dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
  729. jiffies - now);
  730. }
  731. now = b->bm_set;
  732. dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
  733. ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
  734. return err;
  735. }
  736. /**
  737. * drbd_bm_read() - Read the whole bitmap from its on disk location.
  738. * @mdev: DRBD device.
  739. */
  740. int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
  741. {
  742. return bm_rw(mdev, READ);
  743. }
  744. /**
  745. * drbd_bm_write() - Write the whole bitmap to its on disk location.
  746. * @mdev: DRBD device.
  747. */
  748. int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
  749. {
  750. return bm_rw(mdev, WRITE);
  751. }
  752. /**
  753. * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap
  754. * @mdev: DRBD device.
  755. * @enr: Extent number in the resync lru (happens to be sector offset)
  756. *
  757. * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered
  758. * by a single sector write. Therefore enr == sector offset from the
  759. * start of the bitmap.
  760. */
  761. int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local)
  762. {
  763. sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
  764. + mdev->ldev->md.bm_offset;
  765. int bm_words, num_words, offset;
  766. int err = 0;
  767. mutex_lock(&mdev->md_io_mutex);
  768. bm_words = drbd_bm_words(mdev);
  769. offset = S2W(enr); /* word offset into bitmap */
  770. num_words = min(S2W(1), bm_words - offset);
  771. if (num_words < S2W(1))
  772. memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE);
  773. drbd_bm_get_lel(mdev, offset, num_words,
  774. page_address(mdev->md_io_page));
  775. if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) {
  776. int i;
  777. err = -EIO;
  778. dev_err(DEV, "IO ERROR writing bitmap sector %lu "
  779. "(meta-disk sector %llus)\n",
  780. enr, (unsigned long long)on_disk_sector);
  781. drbd_chk_io_error(mdev, 1, TRUE);
  782. for (i = 0; i < AL_EXT_PER_BM_SECT; i++)
  783. drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i);
  784. }
  785. mdev->bm_writ_cnt++;
  786. mutex_unlock(&mdev->md_io_mutex);
  787. return err;
  788. }
  789. /* NOTE
  790. * find_first_bit returns int, we return unsigned long.
  791. * should not make much difference anyways, but ...
  792. *
  793. * this returns a bit number, NOT a sector!
  794. */
  795. #define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1)
  796. static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
  797. const int find_zero_bit, const enum km_type km)
  798. {
  799. struct drbd_bitmap *b = mdev->bitmap;
  800. unsigned long i = -1UL;
  801. unsigned long *p_addr;
  802. unsigned long bit_offset; /* bit offset of the mapped page. */
  803. if (bm_fo > b->bm_bits) {
  804. dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
  805. } else {
  806. while (bm_fo < b->bm_bits) {
  807. unsigned long offset;
  808. bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */
  809. offset = bit_offset >> LN2_BPL; /* word offset of the page */
  810. p_addr = __bm_map_paddr(b, offset, km);
  811. if (find_zero_bit)
  812. i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
  813. else
  814. i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
  815. __bm_unmap(p_addr, km);
  816. if (i < PAGE_SIZE*8) {
  817. i = bit_offset + i;
  818. if (i >= b->bm_bits)
  819. break;
  820. goto found;
  821. }
  822. bm_fo = bit_offset + PAGE_SIZE*8;
  823. }
  824. i = -1UL;
  825. }
  826. found:
  827. return i;
  828. }
  829. static unsigned long bm_find_next(struct drbd_conf *mdev,
  830. unsigned long bm_fo, const int find_zero_bit)
  831. {
  832. struct drbd_bitmap *b = mdev->bitmap;
  833. unsigned long i = -1UL;
  834. ERR_IF(!b) return i;
  835. ERR_IF(!b->bm_pages) return i;
  836. spin_lock_irq(&b->bm_lock);
  837. if (bm_is_locked(b))
  838. bm_print_lock_info(mdev);
  839. i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
  840. spin_unlock_irq(&b->bm_lock);
  841. return i;
  842. }
  843. unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
  844. {
  845. return bm_find_next(mdev, bm_fo, 0);
  846. }
  847. #if 0
  848. /* not yet needed for anything. */
  849. unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
  850. {
  851. return bm_find_next(mdev, bm_fo, 1);
  852. }
  853. #endif
  854. /* does not spin_lock_irqsave.
  855. * you must take drbd_bm_lock() first */
  856. unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
  857. {
  858. /* WARN_ON(!bm_is_locked(mdev)); */
  859. return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
  860. }
  861. unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
  862. {
  863. /* WARN_ON(!bm_is_locked(mdev)); */
  864. return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
  865. }
  866. /* returns number of bits actually changed.
  867. * for val != 0, we change 0 -> 1, return code positive
  868. * for val == 0, we change 1 -> 0, return code negative
  869. * wants bitnr, not sector.
  870. * expected to be called for only a few bits (e - s about BITS_PER_LONG).
  871. * Must hold bitmap lock already. */
  872. int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
  873. unsigned long e, int val, const enum km_type km)
  874. {
  875. struct drbd_bitmap *b = mdev->bitmap;
  876. unsigned long *p_addr = NULL;
  877. unsigned long bitnr;
  878. unsigned long last_page_nr = -1UL;
  879. int c = 0;
  880. if (e >= b->bm_bits) {
  881. dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
  882. s, e, b->bm_bits);
  883. e = b->bm_bits ? b->bm_bits -1 : 0;
  884. }
  885. for (bitnr = s; bitnr <= e; bitnr++) {
  886. unsigned long offset = bitnr>>LN2_BPL;
  887. unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
  888. if (page_nr != last_page_nr) {
  889. if (p_addr)
  890. __bm_unmap(p_addr, km);
  891. p_addr = __bm_map_paddr(b, offset, km);
  892. last_page_nr = page_nr;
  893. }
  894. if (val)
  895. c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr));
  896. else
  897. c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr));
  898. }
  899. if (p_addr)
  900. __bm_unmap(p_addr, km);
  901. b->bm_set += c;
  902. return c;
  903. }
  904. /* returns number of bits actually changed.
  905. * for val != 0, we change 0 -> 1, return code positive
  906. * for val == 0, we change 1 -> 0, return code negative
  907. * wants bitnr, not sector */
  908. int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
  909. const unsigned long e, int val)
  910. {
  911. unsigned long flags;
  912. struct drbd_bitmap *b = mdev->bitmap;
  913. int c = 0;
  914. ERR_IF(!b) return 1;
  915. ERR_IF(!b->bm_pages) return 0;
  916. spin_lock_irqsave(&b->bm_lock, flags);
  917. if (bm_is_locked(b))
  918. bm_print_lock_info(mdev);
  919. c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1);
  920. spin_unlock_irqrestore(&b->bm_lock, flags);
  921. return c;
  922. }
  923. /* returns number of bits changed 0 -> 1 */
  924. int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
  925. {
  926. return bm_change_bits_to(mdev, s, e, 1);
  927. }
  928. /* returns number of bits changed 1 -> 0 */
  929. int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
  930. {
  931. return -bm_change_bits_to(mdev, s, e, 0);
  932. }
  933. /* sets all bits in full words,
  934. * from first_word up to, but not including, last_word */
  935. static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
  936. int page_nr, int first_word, int last_word)
  937. {
  938. int i;
  939. int bits;
  940. unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0);
  941. for (i = first_word; i < last_word; i++) {
  942. bits = hweight_long(paddr[i]);
  943. paddr[i] = ~0UL;
  944. b->bm_set += BITS_PER_LONG - bits;
  945. }
  946. kunmap_atomic(paddr, KM_USER0);
  947. }
  948. /* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave.
  949. * You must first drbd_bm_lock().
  950. * Can be called to set the whole bitmap in one go.
  951. * Sets bits from s to e _inclusive_. */
  952. void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
  953. {
  954. /* First set_bit from the first bit (s)
  955. * up to the next long boundary (sl),
  956. * then assign full words up to the last long boundary (el),
  957. * then set_bit up to and including the last bit (e).
  958. *
  959. * Do not use memset, because we must account for changes,
  960. * so we need to loop over the words with hweight() anyways.
  961. */
  962. unsigned long sl = ALIGN(s,BITS_PER_LONG);
  963. unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
  964. int first_page;
  965. int last_page;
  966. int page_nr;
  967. int first_word;
  968. int last_word;
  969. if (e - s <= 3*BITS_PER_LONG) {
  970. /* don't bother; el and sl may even be wrong. */
  971. __bm_change_bits_to(mdev, s, e, 1, KM_USER0);
  972. return;
  973. }
  974. /* difference is large enough that we can trust sl and el */
  975. /* bits filling the current long */
  976. if (sl)
  977. __bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0);
  978. first_page = sl >> (3 + PAGE_SHIFT);
  979. last_page = el >> (3 + PAGE_SHIFT);
  980. /* MLPP: modulo longs per page */
  981. /* LWPP: long words per page */
  982. first_word = MLPP(sl >> LN2_BPL);
  983. last_word = LWPP;
  984. /* first and full pages, unless first page == last page */
  985. for (page_nr = first_page; page_nr < last_page; page_nr++) {
  986. bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
  987. cond_resched();
  988. first_word = 0;
  989. }
  990. /* last page (respectively only page, for first page == last page) */
  991. last_word = MLPP(el >> LN2_BPL);
  992. bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
  993. /* possibly trailing bits.
  994. * example: (e & 63) == 63, el will be e+1.
  995. * if that even was the very last bit,
  996. * it would trigger an assert in __bm_change_bits_to()
  997. */
  998. if (el <= e)
  999. __bm_change_bits_to(mdev, el, e, 1, KM_USER0);
  1000. }
  1001. /* returns bit state
  1002. * wants bitnr, NOT sector.
  1003. * inherently racy... area needs to be locked by means of {al,rs}_lru
  1004. * 1 ... bit set
  1005. * 0 ... bit not set
  1006. * -1 ... first out of bounds access, stop testing for bits!
  1007. */
  1008. int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
  1009. {
  1010. unsigned long flags;
  1011. struct drbd_bitmap *b = mdev->bitmap;
  1012. unsigned long *p_addr;
  1013. int i;
  1014. ERR_IF(!b) return 0;
  1015. ERR_IF(!b->bm_pages) return 0;
  1016. spin_lock_irqsave(&b->bm_lock, flags);
  1017. if (bm_is_locked(b))
  1018. bm_print_lock_info(mdev);
  1019. if (bitnr < b->bm_bits) {
  1020. unsigned long offset = bitnr>>LN2_BPL;
  1021. p_addr = bm_map_paddr(b, offset);
  1022. i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0;
  1023. bm_unmap(p_addr);
  1024. } else if (bitnr == b->bm_bits) {
  1025. i = -1;
  1026. } else { /* (bitnr > b->bm_bits) */
  1027. dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
  1028. i = 0;
  1029. }
  1030. spin_unlock_irqrestore(&b->bm_lock, flags);
  1031. return i;
  1032. }
  1033. /* returns number of bits set in the range [s, e] */
  1034. int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
  1035. {
  1036. unsigned long flags;
  1037. struct drbd_bitmap *b = mdev->bitmap;
  1038. unsigned long *p_addr = NULL, page_nr = -1;
  1039. unsigned long bitnr;
  1040. int c = 0;
  1041. size_t w;
  1042. /* If this is called without a bitmap, that is a bug. But just to be
  1043. * robust in case we screwed up elsewhere, in that case pretend there
  1044. * was one dirty bit in the requested area, so we won't try to do a
  1045. * local read there (no bitmap probably implies no disk) */
  1046. ERR_IF(!b) return 1;
  1047. ERR_IF(!b->bm_pages) return 1;
  1048. spin_lock_irqsave(&b->bm_lock, flags);
  1049. if (bm_is_locked(b))
  1050. bm_print_lock_info(mdev);
  1051. for (bitnr = s; bitnr <= e; bitnr++) {
  1052. w = bitnr >> LN2_BPL;
  1053. if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) {
  1054. page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3);
  1055. if (p_addr)
  1056. bm_unmap(p_addr);
  1057. p_addr = bm_map_paddr(b, w);
  1058. }
  1059. ERR_IF (bitnr >= b->bm_bits) {
  1060. dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
  1061. } else {
  1062. c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
  1063. }
  1064. }
  1065. if (p_addr)
  1066. bm_unmap(p_addr);
  1067. spin_unlock_irqrestore(&b->bm_lock, flags);
  1068. return c;
  1069. }
  1070. /* inherently racy...
  1071. * return value may be already out-of-date when this function returns.
  1072. * but the general usage is that this is only use during a cstate when bits are
  1073. * only cleared, not set, and typically only care for the case when the return
  1074. * value is zero, or we already "locked" this "bitmap extent" by other means.
  1075. *
  1076. * enr is bm-extent number, since we chose to name one sector (512 bytes)
  1077. * worth of the bitmap a "bitmap extent".
  1078. *
  1079. * TODO
  1080. * I think since we use it like a reference count, we should use the real
  1081. * reference count of some bitmap extent element from some lru instead...
  1082. *
  1083. */
  1084. int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
  1085. {
  1086. struct drbd_bitmap *b = mdev->bitmap;
  1087. int count, s, e;
  1088. unsigned long flags;
  1089. unsigned long *p_addr, *bm;
  1090. ERR_IF(!b) return 0;
  1091. ERR_IF(!b->bm_pages) return 0;
  1092. spin_lock_irqsave(&b->bm_lock, flags);
  1093. if (bm_is_locked(b))
  1094. bm_print_lock_info(mdev);
  1095. s = S2W(enr);
  1096. e = min((size_t)S2W(enr+1), b->bm_words);
  1097. count = 0;
  1098. if (s < b->bm_words) {
  1099. int n = e-s;
  1100. p_addr = bm_map_paddr(b, s);
  1101. bm = p_addr + MLPP(s);
  1102. while (n--)
  1103. count += hweight_long(*bm++);
  1104. bm_unmap(p_addr);
  1105. } else {
  1106. dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s);
  1107. }
  1108. spin_unlock_irqrestore(&b->bm_lock, flags);
  1109. return count;
  1110. }
  1111. /* set all bits covered by the AL-extent al_enr */
  1112. unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
  1113. {
  1114. struct drbd_bitmap *b = mdev->bitmap;
  1115. unsigned long *p_addr, *bm;
  1116. unsigned long weight;
  1117. int count, s, e, i, do_now;
  1118. ERR_IF(!b) return 0;
  1119. ERR_IF(!b->bm_pages) return 0;
  1120. spin_lock_irq(&b->bm_lock);
  1121. if (bm_is_locked(b))
  1122. bm_print_lock_info(mdev);
  1123. weight = b->bm_set;
  1124. s = al_enr * BM_WORDS_PER_AL_EXT;
  1125. e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
  1126. /* assert that s and e are on the same page */
  1127. D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
  1128. == s >> (PAGE_SHIFT - LN2_BPL + 3));
  1129. count = 0;
  1130. if (s < b->bm_words) {
  1131. i = do_now = e-s;
  1132. p_addr = bm_map_paddr(b, s);
  1133. bm = p_addr + MLPP(s);
  1134. while (i--) {
  1135. count += hweight_long(*bm);
  1136. *bm = -1UL;
  1137. bm++;
  1138. }
  1139. bm_unmap(p_addr);
  1140. b->bm_set += do_now*BITS_PER_LONG - count;
  1141. if (e == b->bm_words)
  1142. b->bm_set -= bm_clear_surplus(b);
  1143. } else {
  1144. dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s);
  1145. }
  1146. weight = b->bm_set - weight;
  1147. spin_unlock_irq(&b->bm_lock);
  1148. return weight;
  1149. }