free-space-cache.c 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364
  1. /*
  2. * Copyright (C) 2008 Red Hat. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/pagemap.h>
  19. #include <linux/sched.h>
  20. #include <linux/math64.h>
  21. #include "ctree.h"
  22. #include "free-space-cache.h"
  23. #include "transaction.h"
  24. #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
  25. #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
  26. static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
  27. u64 offset)
  28. {
  29. BUG_ON(offset < bitmap_start);
  30. offset -= bitmap_start;
  31. return (unsigned long)(div64_u64(offset, sectorsize));
  32. }
  33. static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
  34. {
  35. return (unsigned long)(div64_u64(bytes, sectorsize));
  36. }
  37. static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
  38. u64 offset)
  39. {
  40. u64 bitmap_start;
  41. u64 bytes_per_bitmap;
  42. bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
  43. bitmap_start = offset - block_group->key.objectid;
  44. bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
  45. bitmap_start *= bytes_per_bitmap;
  46. bitmap_start += block_group->key.objectid;
  47. return bitmap_start;
  48. }
  49. static int tree_insert_offset(struct rb_root *root, u64 offset,
  50. struct rb_node *node, int bitmap)
  51. {
  52. struct rb_node **p = &root->rb_node;
  53. struct rb_node *parent = NULL;
  54. struct btrfs_free_space *info;
  55. while (*p) {
  56. parent = *p;
  57. info = rb_entry(parent, struct btrfs_free_space, offset_index);
  58. if (offset < info->offset) {
  59. p = &(*p)->rb_left;
  60. } else if (offset > info->offset) {
  61. p = &(*p)->rb_right;
  62. } else {
  63. /*
  64. * we could have a bitmap entry and an extent entry
  65. * share the same offset. If this is the case, we want
  66. * the extent entry to always be found first if we do a
  67. * linear search through the tree, since we want to have
  68. * the quickest allocation time, and allocating from an
  69. * extent is faster than allocating from a bitmap. So
  70. * if we're inserting a bitmap and we find an entry at
  71. * this offset, we want to go right, or after this entry
  72. * logically. If we are inserting an extent and we've
  73. * found a bitmap, we want to go left, or before
  74. * logically.
  75. */
  76. if (bitmap) {
  77. WARN_ON(info->bitmap);
  78. p = &(*p)->rb_right;
  79. } else {
  80. WARN_ON(!info->bitmap);
  81. p = &(*p)->rb_left;
  82. }
  83. }
  84. }
  85. rb_link_node(node, parent, p);
  86. rb_insert_color(node, root);
  87. return 0;
  88. }
  89. /*
  90. * searches the tree for the given offset.
  91. *
  92. * fuzzy - If this is set, then we are trying to make an allocation, and we just
  93. * want a section that has at least bytes size and comes at or after the given
  94. * offset.
  95. */
  96. static struct btrfs_free_space *
  97. tree_search_offset(struct btrfs_block_group_cache *block_group,
  98. u64 offset, int bitmap_only, int fuzzy)
  99. {
  100. struct rb_node *n = block_group->free_space_offset.rb_node;
  101. struct btrfs_free_space *entry, *prev = NULL;
  102. /* find entry that is closest to the 'offset' */
  103. while (1) {
  104. if (!n) {
  105. entry = NULL;
  106. break;
  107. }
  108. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  109. prev = entry;
  110. if (offset < entry->offset)
  111. n = n->rb_left;
  112. else if (offset > entry->offset)
  113. n = n->rb_right;
  114. else
  115. break;
  116. }
  117. if (bitmap_only) {
  118. if (!entry)
  119. return NULL;
  120. if (entry->bitmap)
  121. return entry;
  122. /*
  123. * bitmap entry and extent entry may share same offset,
  124. * in that case, bitmap entry comes after extent entry.
  125. */
  126. n = rb_next(n);
  127. if (!n)
  128. return NULL;
  129. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  130. if (entry->offset != offset)
  131. return NULL;
  132. WARN_ON(!entry->bitmap);
  133. return entry;
  134. } else if (entry) {
  135. if (entry->bitmap) {
  136. /*
  137. * if previous extent entry covers the offset,
  138. * we should return it instead of the bitmap entry
  139. */
  140. n = &entry->offset_index;
  141. while (1) {
  142. n = rb_prev(n);
  143. if (!n)
  144. break;
  145. prev = rb_entry(n, struct btrfs_free_space,
  146. offset_index);
  147. if (!prev->bitmap) {
  148. if (prev->offset + prev->bytes > offset)
  149. entry = prev;
  150. break;
  151. }
  152. }
  153. }
  154. return entry;
  155. }
  156. if (!prev)
  157. return NULL;
  158. /* find last entry before the 'offset' */
  159. entry = prev;
  160. if (entry->offset > offset) {
  161. n = rb_prev(&entry->offset_index);
  162. if (n) {
  163. entry = rb_entry(n, struct btrfs_free_space,
  164. offset_index);
  165. BUG_ON(entry->offset > offset);
  166. } else {
  167. if (fuzzy)
  168. return entry;
  169. else
  170. return NULL;
  171. }
  172. }
  173. if (entry->bitmap) {
  174. n = &entry->offset_index;
  175. while (1) {
  176. n = rb_prev(n);
  177. if (!n)
  178. break;
  179. prev = rb_entry(n, struct btrfs_free_space,
  180. offset_index);
  181. if (!prev->bitmap) {
  182. if (prev->offset + prev->bytes > offset)
  183. return prev;
  184. break;
  185. }
  186. }
  187. if (entry->offset + BITS_PER_BITMAP *
  188. block_group->sectorsize > offset)
  189. return entry;
  190. } else if (entry->offset + entry->bytes > offset)
  191. return entry;
  192. if (!fuzzy)
  193. return NULL;
  194. while (1) {
  195. if (entry->bitmap) {
  196. if (entry->offset + BITS_PER_BITMAP *
  197. block_group->sectorsize > offset)
  198. break;
  199. } else {
  200. if (entry->offset + entry->bytes > offset)
  201. break;
  202. }
  203. n = rb_next(&entry->offset_index);
  204. if (!n)
  205. return NULL;
  206. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  207. }
  208. return entry;
  209. }
  210. static void unlink_free_space(struct btrfs_block_group_cache *block_group,
  211. struct btrfs_free_space *info)
  212. {
  213. rb_erase(&info->offset_index, &block_group->free_space_offset);
  214. block_group->free_extents--;
  215. block_group->free_space -= info->bytes;
  216. }
  217. static int link_free_space(struct btrfs_block_group_cache *block_group,
  218. struct btrfs_free_space *info)
  219. {
  220. int ret = 0;
  221. BUG_ON(!info->bitmap && !info->bytes);
  222. ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
  223. &info->offset_index, (info->bitmap != NULL));
  224. if (ret)
  225. return ret;
  226. block_group->free_space += info->bytes;
  227. block_group->free_extents++;
  228. return ret;
  229. }
  230. static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
  231. {
  232. u64 max_bytes;
  233. u64 bitmap_bytes;
  234. u64 extent_bytes;
  235. /*
  236. * The goal is to keep the total amount of memory used per 1gb of space
  237. * at or below 32k, so we need to adjust how much memory we allow to be
  238. * used by extent based free space tracking
  239. */
  240. max_bytes = MAX_CACHE_BYTES_PER_GIG *
  241. (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
  242. /*
  243. * we want to account for 1 more bitmap than what we have so we can make
  244. * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
  245. * we add more bitmaps.
  246. */
  247. bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
  248. if (bitmap_bytes >= max_bytes) {
  249. block_group->extents_thresh = 0;
  250. return;
  251. }
  252. /*
  253. * we want the extent entry threshold to always be at most 1/2 the maxw
  254. * bytes we can have, or whatever is less than that.
  255. */
  256. extent_bytes = max_bytes - bitmap_bytes;
  257. extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
  258. block_group->extents_thresh =
  259. div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
  260. }
  261. static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
  262. struct btrfs_free_space *info, u64 offset,
  263. u64 bytes)
  264. {
  265. unsigned long start, end;
  266. unsigned long i;
  267. start = offset_to_bit(info->offset, block_group->sectorsize, offset);
  268. end = start + bytes_to_bits(bytes, block_group->sectorsize);
  269. BUG_ON(end > BITS_PER_BITMAP);
  270. for (i = start; i < end; i++)
  271. clear_bit(i, info->bitmap);
  272. info->bytes -= bytes;
  273. block_group->free_space -= bytes;
  274. }
  275. static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
  276. struct btrfs_free_space *info, u64 offset,
  277. u64 bytes)
  278. {
  279. unsigned long start, end;
  280. unsigned long i;
  281. start = offset_to_bit(info->offset, block_group->sectorsize, offset);
  282. end = start + bytes_to_bits(bytes, block_group->sectorsize);
  283. BUG_ON(end > BITS_PER_BITMAP);
  284. for (i = start; i < end; i++)
  285. set_bit(i, info->bitmap);
  286. info->bytes += bytes;
  287. block_group->free_space += bytes;
  288. }
  289. static int search_bitmap(struct btrfs_block_group_cache *block_group,
  290. struct btrfs_free_space *bitmap_info, u64 *offset,
  291. u64 *bytes)
  292. {
  293. unsigned long found_bits = 0;
  294. unsigned long bits, i;
  295. unsigned long next_zero;
  296. i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
  297. max_t(u64, *offset, bitmap_info->offset));
  298. bits = bytes_to_bits(*bytes, block_group->sectorsize);
  299. for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
  300. i < BITS_PER_BITMAP;
  301. i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
  302. next_zero = find_next_zero_bit(bitmap_info->bitmap,
  303. BITS_PER_BITMAP, i);
  304. if ((next_zero - i) >= bits) {
  305. found_bits = next_zero - i;
  306. break;
  307. }
  308. i = next_zero;
  309. }
  310. if (found_bits) {
  311. *offset = (u64)(i * block_group->sectorsize) +
  312. bitmap_info->offset;
  313. *bytes = (u64)(found_bits) * block_group->sectorsize;
  314. return 0;
  315. }
  316. return -1;
  317. }
  318. static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
  319. *block_group, u64 *offset,
  320. u64 *bytes, int debug)
  321. {
  322. struct btrfs_free_space *entry;
  323. struct rb_node *node;
  324. int ret;
  325. if (!block_group->free_space_offset.rb_node)
  326. return NULL;
  327. entry = tree_search_offset(block_group,
  328. offset_to_bitmap(block_group, *offset),
  329. 0, 1);
  330. if (!entry)
  331. return NULL;
  332. for (node = &entry->offset_index; node; node = rb_next(node)) {
  333. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  334. if (entry->bytes < *bytes)
  335. continue;
  336. if (entry->bitmap) {
  337. ret = search_bitmap(block_group, entry, offset, bytes);
  338. if (!ret)
  339. return entry;
  340. continue;
  341. }
  342. *offset = entry->offset;
  343. *bytes = entry->bytes;
  344. return entry;
  345. }
  346. return NULL;
  347. }
  348. static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
  349. struct btrfs_free_space *info, u64 offset)
  350. {
  351. u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
  352. int max_bitmaps = (int)div64_u64(block_group->key.offset +
  353. bytes_per_bg - 1, bytes_per_bg);
  354. BUG_ON(block_group->total_bitmaps >= max_bitmaps);
  355. info->offset = offset_to_bitmap(block_group, offset);
  356. info->bytes = 0;
  357. link_free_space(block_group, info);
  358. block_group->total_bitmaps++;
  359. recalculate_thresholds(block_group);
  360. }
  361. static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
  362. struct btrfs_free_space *bitmap_info,
  363. u64 *offset, u64 *bytes)
  364. {
  365. u64 end;
  366. u64 search_start, search_bytes;
  367. int ret;
  368. again:
  369. end = bitmap_info->offset +
  370. (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
  371. /*
  372. * XXX - this can go away after a few releases.
  373. *
  374. * since the only user of btrfs_remove_free_space is the tree logging
  375. * stuff, and the only way to test that is under crash conditions, we
  376. * want to have this debug stuff here just in case somethings not
  377. * working. Search the bitmap for the space we are trying to use to
  378. * make sure its actually there. If its not there then we need to stop
  379. * because something has gone wrong.
  380. */
  381. search_start = *offset;
  382. search_bytes = *bytes;
  383. ret = search_bitmap(block_group, bitmap_info, &search_start,
  384. &search_bytes);
  385. BUG_ON(ret < 0 || search_start != *offset);
  386. if (*offset > bitmap_info->offset && *offset + *bytes > end) {
  387. bitmap_clear_bits(block_group, bitmap_info, *offset,
  388. end - *offset + 1);
  389. *bytes -= end - *offset + 1;
  390. *offset = end + 1;
  391. } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
  392. bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
  393. *bytes = 0;
  394. }
  395. if (*bytes) {
  396. struct rb_node *next = rb_next(&bitmap_info->offset_index);
  397. if (!bitmap_info->bytes) {
  398. unlink_free_space(block_group, bitmap_info);
  399. kfree(bitmap_info->bitmap);
  400. kfree(bitmap_info);
  401. block_group->total_bitmaps--;
  402. recalculate_thresholds(block_group);
  403. }
  404. /*
  405. * no entry after this bitmap, but we still have bytes to
  406. * remove, so something has gone wrong.
  407. */
  408. if (!next)
  409. return -EINVAL;
  410. bitmap_info = rb_entry(next, struct btrfs_free_space,
  411. offset_index);
  412. /*
  413. * if the next entry isn't a bitmap we need to return to let the
  414. * extent stuff do its work.
  415. */
  416. if (!bitmap_info->bitmap)
  417. return -EAGAIN;
  418. /*
  419. * Ok the next item is a bitmap, but it may not actually hold
  420. * the information for the rest of this free space stuff, so
  421. * look for it, and if we don't find it return so we can try
  422. * everything over again.
  423. */
  424. search_start = *offset;
  425. search_bytes = *bytes;
  426. ret = search_bitmap(block_group, bitmap_info, &search_start,
  427. &search_bytes);
  428. if (ret < 0 || search_start != *offset)
  429. return -EAGAIN;
  430. goto again;
  431. } else if (!bitmap_info->bytes) {
  432. unlink_free_space(block_group, bitmap_info);
  433. kfree(bitmap_info->bitmap);
  434. kfree(bitmap_info);
  435. block_group->total_bitmaps--;
  436. recalculate_thresholds(block_group);
  437. }
  438. return 0;
  439. }
  440. static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
  441. struct btrfs_free_space *info)
  442. {
  443. struct btrfs_free_space *bitmap_info;
  444. int added = 0;
  445. u64 bytes, offset, end;
  446. int ret;
  447. /*
  448. * If we are below the extents threshold then we can add this as an
  449. * extent, and don't have to deal with the bitmap
  450. */
  451. if (block_group->free_extents < block_group->extents_thresh &&
  452. info->bytes > block_group->sectorsize * 4)
  453. return 0;
  454. /*
  455. * some block groups are so tiny they can't be enveloped by a bitmap, so
  456. * don't even bother to create a bitmap for this
  457. */
  458. if (BITS_PER_BITMAP * block_group->sectorsize >
  459. block_group->key.offset)
  460. return 0;
  461. bytes = info->bytes;
  462. offset = info->offset;
  463. again:
  464. bitmap_info = tree_search_offset(block_group,
  465. offset_to_bitmap(block_group, offset),
  466. 1, 0);
  467. if (!bitmap_info) {
  468. BUG_ON(added);
  469. goto new_bitmap;
  470. }
  471. end = bitmap_info->offset +
  472. (u64)(BITS_PER_BITMAP * block_group->sectorsize);
  473. if (offset >= bitmap_info->offset && offset + bytes > end) {
  474. bitmap_set_bits(block_group, bitmap_info, offset,
  475. end - offset);
  476. bytes -= end - offset;
  477. offset = end;
  478. added = 0;
  479. } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
  480. bitmap_set_bits(block_group, bitmap_info, offset, bytes);
  481. bytes = 0;
  482. } else {
  483. BUG();
  484. }
  485. if (!bytes) {
  486. ret = 1;
  487. goto out;
  488. } else
  489. goto again;
  490. new_bitmap:
  491. if (info && info->bitmap) {
  492. add_new_bitmap(block_group, info, offset);
  493. added = 1;
  494. info = NULL;
  495. goto again;
  496. } else {
  497. spin_unlock(&block_group->tree_lock);
  498. /* no pre-allocated info, allocate a new one */
  499. if (!info) {
  500. info = kzalloc(sizeof(struct btrfs_free_space),
  501. GFP_NOFS);
  502. if (!info) {
  503. spin_lock(&block_group->tree_lock);
  504. ret = -ENOMEM;
  505. goto out;
  506. }
  507. }
  508. /* allocate the bitmap */
  509. info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
  510. spin_lock(&block_group->tree_lock);
  511. if (!info->bitmap) {
  512. ret = -ENOMEM;
  513. goto out;
  514. }
  515. goto again;
  516. }
  517. out:
  518. if (info) {
  519. if (info->bitmap)
  520. kfree(info->bitmap);
  521. kfree(info);
  522. }
  523. return ret;
  524. }
  525. int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
  526. u64 offset, u64 bytes)
  527. {
  528. struct btrfs_free_space *right_info = NULL;
  529. struct btrfs_free_space *left_info = NULL;
  530. struct btrfs_free_space *info = NULL;
  531. int ret = 0;
  532. info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
  533. if (!info)
  534. return -ENOMEM;
  535. info->offset = offset;
  536. info->bytes = bytes;
  537. spin_lock(&block_group->tree_lock);
  538. /*
  539. * first we want to see if there is free space adjacent to the range we
  540. * are adding, if there is remove that struct and add a new one to
  541. * cover the entire range
  542. */
  543. right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
  544. if (right_info && rb_prev(&right_info->offset_index))
  545. left_info = rb_entry(rb_prev(&right_info->offset_index),
  546. struct btrfs_free_space, offset_index);
  547. else
  548. left_info = tree_search_offset(block_group, offset - 1, 0, 0);
  549. /*
  550. * If there was no extent directly to the left or right of this new
  551. * extent then we know we're going to have to allocate a new extent, so
  552. * before we do that see if we need to drop this into a bitmap
  553. */
  554. if ((!left_info || left_info->bitmap) &&
  555. (!right_info || right_info->bitmap)) {
  556. ret = insert_into_bitmap(block_group, info);
  557. if (ret < 0) {
  558. goto out;
  559. } else if (ret) {
  560. ret = 0;
  561. goto out;
  562. }
  563. }
  564. if (right_info && !right_info->bitmap) {
  565. unlink_free_space(block_group, right_info);
  566. info->bytes += right_info->bytes;
  567. kfree(right_info);
  568. }
  569. if (left_info && !left_info->bitmap &&
  570. left_info->offset + left_info->bytes == offset) {
  571. unlink_free_space(block_group, left_info);
  572. info->offset = left_info->offset;
  573. info->bytes += left_info->bytes;
  574. kfree(left_info);
  575. }
  576. ret = link_free_space(block_group, info);
  577. if (ret)
  578. kfree(info);
  579. out:
  580. spin_unlock(&block_group->tree_lock);
  581. if (ret) {
  582. printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
  583. BUG_ON(ret == -EEXIST);
  584. }
  585. return ret;
  586. }
  587. int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
  588. u64 offset, u64 bytes)
  589. {
  590. struct btrfs_free_space *info;
  591. struct btrfs_free_space *next_info = NULL;
  592. int ret = 0;
  593. spin_lock(&block_group->tree_lock);
  594. again:
  595. info = tree_search_offset(block_group, offset, 0, 0);
  596. if (!info) {
  597. /*
  598. * oops didn't find an extent that matched the space we wanted
  599. * to remove, look for a bitmap instead
  600. */
  601. info = tree_search_offset(block_group,
  602. offset_to_bitmap(block_group, offset),
  603. 1, 0);
  604. if (!info) {
  605. WARN_ON(1);
  606. goto out_lock;
  607. }
  608. }
  609. if (info->bytes < bytes && rb_next(&info->offset_index)) {
  610. u64 end;
  611. next_info = rb_entry(rb_next(&info->offset_index),
  612. struct btrfs_free_space,
  613. offset_index);
  614. if (next_info->bitmap)
  615. end = next_info->offset + BITS_PER_BITMAP *
  616. block_group->sectorsize - 1;
  617. else
  618. end = next_info->offset + next_info->bytes;
  619. if (next_info->bytes < bytes ||
  620. next_info->offset > offset || offset > end) {
  621. printk(KERN_CRIT "Found free space at %llu, size %llu,"
  622. " trying to use %llu\n",
  623. (unsigned long long)info->offset,
  624. (unsigned long long)info->bytes,
  625. (unsigned long long)bytes);
  626. WARN_ON(1);
  627. ret = -EINVAL;
  628. goto out_lock;
  629. }
  630. info = next_info;
  631. }
  632. if (info->bytes == bytes) {
  633. unlink_free_space(block_group, info);
  634. if (info->bitmap) {
  635. kfree(info->bitmap);
  636. block_group->total_bitmaps--;
  637. }
  638. kfree(info);
  639. goto out_lock;
  640. }
  641. if (!info->bitmap && info->offset == offset) {
  642. unlink_free_space(block_group, info);
  643. info->offset += bytes;
  644. info->bytes -= bytes;
  645. link_free_space(block_group, info);
  646. goto out_lock;
  647. }
  648. if (!info->bitmap && info->offset <= offset &&
  649. info->offset + info->bytes >= offset + bytes) {
  650. u64 old_start = info->offset;
  651. /*
  652. * we're freeing space in the middle of the info,
  653. * this can happen during tree log replay
  654. *
  655. * first unlink the old info and then
  656. * insert it again after the hole we're creating
  657. */
  658. unlink_free_space(block_group, info);
  659. if (offset + bytes < info->offset + info->bytes) {
  660. u64 old_end = info->offset + info->bytes;
  661. info->offset = offset + bytes;
  662. info->bytes = old_end - info->offset;
  663. ret = link_free_space(block_group, info);
  664. WARN_ON(ret);
  665. if (ret)
  666. goto out_lock;
  667. } else {
  668. /* the hole we're creating ends at the end
  669. * of the info struct, just free the info
  670. */
  671. kfree(info);
  672. }
  673. spin_unlock(&block_group->tree_lock);
  674. /* step two, insert a new info struct to cover
  675. * anything before the hole
  676. */
  677. ret = btrfs_add_free_space(block_group, old_start,
  678. offset - old_start);
  679. WARN_ON(ret);
  680. goto out;
  681. }
  682. ret = remove_from_bitmap(block_group, info, &offset, &bytes);
  683. if (ret == -EAGAIN)
  684. goto again;
  685. BUG_ON(ret);
  686. out_lock:
  687. spin_unlock(&block_group->tree_lock);
  688. out:
  689. return ret;
  690. }
  691. void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
  692. u64 bytes)
  693. {
  694. struct btrfs_free_space *info;
  695. struct rb_node *n;
  696. int count = 0;
  697. for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
  698. info = rb_entry(n, struct btrfs_free_space, offset_index);
  699. if (info->bytes >= bytes)
  700. count++;
  701. printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
  702. (unsigned long long)info->offset,
  703. (unsigned long long)info->bytes,
  704. (info->bitmap) ? "yes" : "no");
  705. }
  706. printk(KERN_INFO "block group has cluster?: %s\n",
  707. list_empty(&block_group->cluster_list) ? "no" : "yes");
  708. printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
  709. "\n", count);
  710. }
  711. u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
  712. {
  713. struct btrfs_free_space *info;
  714. struct rb_node *n;
  715. u64 ret = 0;
  716. for (n = rb_first(&block_group->free_space_offset); n;
  717. n = rb_next(n)) {
  718. info = rb_entry(n, struct btrfs_free_space, offset_index);
  719. ret += info->bytes;
  720. }
  721. return ret;
  722. }
  723. /*
  724. * for a given cluster, put all of its extents back into the free
  725. * space cache. If the block group passed doesn't match the block group
  726. * pointed to by the cluster, someone else raced in and freed the
  727. * cluster already. In that case, we just return without changing anything
  728. */
  729. static int
  730. __btrfs_return_cluster_to_free_space(
  731. struct btrfs_block_group_cache *block_group,
  732. struct btrfs_free_cluster *cluster)
  733. {
  734. struct btrfs_free_space *entry;
  735. struct rb_node *node;
  736. bool bitmap;
  737. spin_lock(&cluster->lock);
  738. if (cluster->block_group != block_group)
  739. goto out;
  740. bitmap = cluster->points_to_bitmap;
  741. cluster->block_group = NULL;
  742. cluster->window_start = 0;
  743. list_del_init(&cluster->block_group_list);
  744. cluster->points_to_bitmap = false;
  745. if (bitmap)
  746. goto out;
  747. node = rb_first(&cluster->root);
  748. while (node) {
  749. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  750. node = rb_next(&entry->offset_index);
  751. rb_erase(&entry->offset_index, &cluster->root);
  752. BUG_ON(entry->bitmap);
  753. tree_insert_offset(&block_group->free_space_offset,
  754. entry->offset, &entry->offset_index, 0);
  755. }
  756. cluster->root.rb_node = NULL;
  757. out:
  758. spin_unlock(&cluster->lock);
  759. btrfs_put_block_group(block_group);
  760. return 0;
  761. }
  762. void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
  763. {
  764. struct btrfs_free_space *info;
  765. struct rb_node *node;
  766. struct btrfs_free_cluster *cluster;
  767. struct list_head *head;
  768. spin_lock(&block_group->tree_lock);
  769. while ((head = block_group->cluster_list.next) !=
  770. &block_group->cluster_list) {
  771. cluster = list_entry(head, struct btrfs_free_cluster,
  772. block_group_list);
  773. WARN_ON(cluster->block_group != block_group);
  774. __btrfs_return_cluster_to_free_space(block_group, cluster);
  775. if (need_resched()) {
  776. spin_unlock(&block_group->tree_lock);
  777. cond_resched();
  778. spin_lock(&block_group->tree_lock);
  779. }
  780. }
  781. while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
  782. info = rb_entry(node, struct btrfs_free_space, offset_index);
  783. unlink_free_space(block_group, info);
  784. if (info->bitmap)
  785. kfree(info->bitmap);
  786. kfree(info);
  787. if (need_resched()) {
  788. spin_unlock(&block_group->tree_lock);
  789. cond_resched();
  790. spin_lock(&block_group->tree_lock);
  791. }
  792. }
  793. spin_unlock(&block_group->tree_lock);
  794. }
  795. u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
  796. u64 offset, u64 bytes, u64 empty_size)
  797. {
  798. struct btrfs_free_space *entry = NULL;
  799. u64 bytes_search = bytes + empty_size;
  800. u64 ret = 0;
  801. spin_lock(&block_group->tree_lock);
  802. entry = find_free_space(block_group, &offset, &bytes_search, 0);
  803. if (!entry)
  804. goto out;
  805. ret = offset;
  806. if (entry->bitmap) {
  807. bitmap_clear_bits(block_group, entry, offset, bytes);
  808. if (!entry->bytes) {
  809. unlink_free_space(block_group, entry);
  810. kfree(entry->bitmap);
  811. kfree(entry);
  812. block_group->total_bitmaps--;
  813. recalculate_thresholds(block_group);
  814. }
  815. } else {
  816. unlink_free_space(block_group, entry);
  817. entry->offset += bytes;
  818. entry->bytes -= bytes;
  819. if (!entry->bytes)
  820. kfree(entry);
  821. else
  822. link_free_space(block_group, entry);
  823. }
  824. out:
  825. spin_unlock(&block_group->tree_lock);
  826. return ret;
  827. }
  828. /*
  829. * given a cluster, put all of its extents back into the free space
  830. * cache. If a block group is passed, this function will only free
  831. * a cluster that belongs to the passed block group.
  832. *
  833. * Otherwise, it'll get a reference on the block group pointed to by the
  834. * cluster and remove the cluster from it.
  835. */
  836. int btrfs_return_cluster_to_free_space(
  837. struct btrfs_block_group_cache *block_group,
  838. struct btrfs_free_cluster *cluster)
  839. {
  840. int ret;
  841. /* first, get a safe pointer to the block group */
  842. spin_lock(&cluster->lock);
  843. if (!block_group) {
  844. block_group = cluster->block_group;
  845. if (!block_group) {
  846. spin_unlock(&cluster->lock);
  847. return 0;
  848. }
  849. } else if (cluster->block_group != block_group) {
  850. /* someone else has already freed it don't redo their work */
  851. spin_unlock(&cluster->lock);
  852. return 0;
  853. }
  854. atomic_inc(&block_group->count);
  855. spin_unlock(&cluster->lock);
  856. /* now return any extents the cluster had on it */
  857. spin_lock(&block_group->tree_lock);
  858. ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
  859. spin_unlock(&block_group->tree_lock);
  860. /* finally drop our ref */
  861. btrfs_put_block_group(block_group);
  862. return ret;
  863. }
  864. static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
  865. struct btrfs_free_cluster *cluster,
  866. u64 bytes, u64 min_start)
  867. {
  868. struct btrfs_free_space *entry;
  869. int err;
  870. u64 search_start = cluster->window_start;
  871. u64 search_bytes = bytes;
  872. u64 ret = 0;
  873. spin_lock(&block_group->tree_lock);
  874. spin_lock(&cluster->lock);
  875. if (!cluster->points_to_bitmap)
  876. goto out;
  877. if (cluster->block_group != block_group)
  878. goto out;
  879. /*
  880. * search_start is the beginning of the bitmap, but at some point it may
  881. * be a good idea to point to the actual start of the free area in the
  882. * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
  883. * to 1 to make sure we get the bitmap entry
  884. */
  885. entry = tree_search_offset(block_group,
  886. offset_to_bitmap(block_group, search_start),
  887. 1, 0);
  888. if (!entry || !entry->bitmap)
  889. goto out;
  890. search_start = min_start;
  891. search_bytes = bytes;
  892. err = search_bitmap(block_group, entry, &search_start,
  893. &search_bytes);
  894. if (err)
  895. goto out;
  896. ret = search_start;
  897. bitmap_clear_bits(block_group, entry, ret, bytes);
  898. out:
  899. spin_unlock(&cluster->lock);
  900. spin_unlock(&block_group->tree_lock);
  901. return ret;
  902. }
  903. /*
  904. * given a cluster, try to allocate 'bytes' from it, returns 0
  905. * if it couldn't find anything suitably large, or a logical disk offset
  906. * if things worked out
  907. */
  908. u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
  909. struct btrfs_free_cluster *cluster, u64 bytes,
  910. u64 min_start)
  911. {
  912. struct btrfs_free_space *entry = NULL;
  913. struct rb_node *node;
  914. u64 ret = 0;
  915. if (cluster->points_to_bitmap)
  916. return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
  917. min_start);
  918. spin_lock(&cluster->lock);
  919. if (bytes > cluster->max_size)
  920. goto out;
  921. if (cluster->block_group != block_group)
  922. goto out;
  923. node = rb_first(&cluster->root);
  924. if (!node)
  925. goto out;
  926. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  927. while(1) {
  928. if (entry->bytes < bytes || entry->offset < min_start) {
  929. struct rb_node *node;
  930. node = rb_next(&entry->offset_index);
  931. if (!node)
  932. break;
  933. entry = rb_entry(node, struct btrfs_free_space,
  934. offset_index);
  935. continue;
  936. }
  937. ret = entry->offset;
  938. entry->offset += bytes;
  939. entry->bytes -= bytes;
  940. if (entry->bytes == 0) {
  941. rb_erase(&entry->offset_index, &cluster->root);
  942. kfree(entry);
  943. }
  944. break;
  945. }
  946. out:
  947. spin_unlock(&cluster->lock);
  948. return ret;
  949. }
  950. static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
  951. struct btrfs_free_space *entry,
  952. struct btrfs_free_cluster *cluster,
  953. u64 offset, u64 bytes, u64 min_bytes)
  954. {
  955. unsigned long next_zero;
  956. unsigned long i;
  957. unsigned long search_bits;
  958. unsigned long total_bits;
  959. unsigned long found_bits;
  960. unsigned long start = 0;
  961. unsigned long total_found = 0;
  962. bool found = false;
  963. i = offset_to_bit(entry->offset, block_group->sectorsize,
  964. max_t(u64, offset, entry->offset));
  965. search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
  966. total_bits = bytes_to_bits(bytes, block_group->sectorsize);
  967. again:
  968. found_bits = 0;
  969. for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
  970. i < BITS_PER_BITMAP;
  971. i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
  972. next_zero = find_next_zero_bit(entry->bitmap,
  973. BITS_PER_BITMAP, i);
  974. if (next_zero - i >= search_bits) {
  975. found_bits = next_zero - i;
  976. break;
  977. }
  978. i = next_zero;
  979. }
  980. if (!found_bits)
  981. return -1;
  982. if (!found) {
  983. start = i;
  984. found = true;
  985. }
  986. total_found += found_bits;
  987. if (cluster->max_size < found_bits * block_group->sectorsize)
  988. cluster->max_size = found_bits * block_group->sectorsize;
  989. if (total_found < total_bits) {
  990. i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
  991. if (i - start > total_bits * 2) {
  992. total_found = 0;
  993. cluster->max_size = 0;
  994. found = false;
  995. }
  996. goto again;
  997. }
  998. cluster->window_start = start * block_group->sectorsize +
  999. entry->offset;
  1000. cluster->points_to_bitmap = true;
  1001. return 0;
  1002. }
  1003. /*
  1004. * here we try to find a cluster of blocks in a block group. The goal
  1005. * is to find at least bytes free and up to empty_size + bytes free.
  1006. * We might not find them all in one contiguous area.
  1007. *
  1008. * returns zero and sets up cluster if things worked out, otherwise
  1009. * it returns -enospc
  1010. */
  1011. int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
  1012. struct btrfs_root *root,
  1013. struct btrfs_block_group_cache *block_group,
  1014. struct btrfs_free_cluster *cluster,
  1015. u64 offset, u64 bytes, u64 empty_size)
  1016. {
  1017. struct btrfs_free_space *entry = NULL;
  1018. struct rb_node *node;
  1019. struct btrfs_free_space *next;
  1020. struct btrfs_free_space *last = NULL;
  1021. u64 min_bytes;
  1022. u64 window_start;
  1023. u64 window_free;
  1024. u64 max_extent = 0;
  1025. bool found_bitmap = false;
  1026. int ret;
  1027. /* for metadata, allow allocates with more holes */
  1028. if (btrfs_test_opt(root, SSD_SPREAD)) {
  1029. min_bytes = bytes + empty_size;
  1030. } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
  1031. /*
  1032. * we want to do larger allocations when we are
  1033. * flushing out the delayed refs, it helps prevent
  1034. * making more work as we go along.
  1035. */
  1036. if (trans->transaction->delayed_refs.flushing)
  1037. min_bytes = max(bytes, (bytes + empty_size) >> 1);
  1038. else
  1039. min_bytes = max(bytes, (bytes + empty_size) >> 4);
  1040. } else
  1041. min_bytes = max(bytes, (bytes + empty_size) >> 2);
  1042. spin_lock(&block_group->tree_lock);
  1043. spin_lock(&cluster->lock);
  1044. /* someone already found a cluster, hooray */
  1045. if (cluster->block_group) {
  1046. ret = 0;
  1047. goto out;
  1048. }
  1049. again:
  1050. entry = tree_search_offset(block_group, offset, found_bitmap, 1);
  1051. if (!entry) {
  1052. ret = -ENOSPC;
  1053. goto out;
  1054. }
  1055. /*
  1056. * If found_bitmap is true, we exhausted our search for extent entries,
  1057. * and we just want to search all of the bitmaps that we can find, and
  1058. * ignore any extent entries we find.
  1059. */
  1060. while (entry->bitmap || found_bitmap ||
  1061. (!entry->bitmap && entry->bytes < min_bytes)) {
  1062. struct rb_node *node = rb_next(&entry->offset_index);
  1063. if (entry->bitmap && entry->bytes > bytes + empty_size) {
  1064. ret = btrfs_bitmap_cluster(block_group, entry, cluster,
  1065. offset, bytes + empty_size,
  1066. min_bytes);
  1067. if (!ret)
  1068. goto got_it;
  1069. }
  1070. if (!node) {
  1071. ret = -ENOSPC;
  1072. goto out;
  1073. }
  1074. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1075. }
  1076. /*
  1077. * We already searched all the extent entries from the passed in offset
  1078. * to the end and didn't find enough space for the cluster, and we also
  1079. * didn't find any bitmaps that met our criteria, just go ahead and exit
  1080. */
  1081. if (found_bitmap) {
  1082. ret = -ENOSPC;
  1083. goto out;
  1084. }
  1085. cluster->points_to_bitmap = false;
  1086. window_start = entry->offset;
  1087. window_free = entry->bytes;
  1088. last = entry;
  1089. max_extent = entry->bytes;
  1090. while (1) {
  1091. /* out window is just right, lets fill it */
  1092. if (window_free >= bytes + empty_size)
  1093. break;
  1094. node = rb_next(&last->offset_index);
  1095. if (!node) {
  1096. if (found_bitmap)
  1097. goto again;
  1098. ret = -ENOSPC;
  1099. goto out;
  1100. }
  1101. next = rb_entry(node, struct btrfs_free_space, offset_index);
  1102. /*
  1103. * we found a bitmap, so if this search doesn't result in a
  1104. * cluster, we know to go and search again for the bitmaps and
  1105. * start looking for space there
  1106. */
  1107. if (next->bitmap) {
  1108. if (!found_bitmap)
  1109. offset = next->offset;
  1110. found_bitmap = true;
  1111. last = next;
  1112. continue;
  1113. }
  1114. /*
  1115. * we haven't filled the empty size and the window is
  1116. * very large. reset and try again
  1117. */
  1118. if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
  1119. next->offset - window_start > (bytes + empty_size) * 2) {
  1120. entry = next;
  1121. window_start = entry->offset;
  1122. window_free = entry->bytes;
  1123. last = entry;
  1124. max_extent = entry->bytes;
  1125. } else {
  1126. last = next;
  1127. window_free += next->bytes;
  1128. if (entry->bytes > max_extent)
  1129. max_extent = entry->bytes;
  1130. }
  1131. }
  1132. cluster->window_start = entry->offset;
  1133. /*
  1134. * now we've found our entries, pull them out of the free space
  1135. * cache and put them into the cluster rbtree
  1136. *
  1137. * The cluster includes an rbtree, but only uses the offset index
  1138. * of each free space cache entry.
  1139. */
  1140. while (1) {
  1141. node = rb_next(&entry->offset_index);
  1142. if (entry->bitmap && node) {
  1143. entry = rb_entry(node, struct btrfs_free_space,
  1144. offset_index);
  1145. continue;
  1146. } else if (entry->bitmap && !node) {
  1147. break;
  1148. }
  1149. rb_erase(&entry->offset_index, &block_group->free_space_offset);
  1150. ret = tree_insert_offset(&cluster->root, entry->offset,
  1151. &entry->offset_index, 0);
  1152. BUG_ON(ret);
  1153. if (!node || entry == last)
  1154. break;
  1155. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1156. }
  1157. cluster->max_size = max_extent;
  1158. got_it:
  1159. ret = 0;
  1160. atomic_inc(&block_group->count);
  1161. list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
  1162. cluster->block_group = block_group;
  1163. out:
  1164. spin_unlock(&cluster->lock);
  1165. spin_unlock(&block_group->tree_lock);
  1166. return ret;
  1167. }
  1168. /*
  1169. * simple code to zero out a cluster
  1170. */
  1171. void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
  1172. {
  1173. spin_lock_init(&cluster->lock);
  1174. spin_lock_init(&cluster->refill_lock);
  1175. cluster->root.rb_node = NULL;
  1176. cluster->max_size = 0;
  1177. cluster->points_to_bitmap = false;
  1178. INIT_LIST_HEAD(&cluster->block_group_list);
  1179. cluster->block_group = NULL;
  1180. }