free-space-cache.c 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301
  1. /*
  2. * Copyright (C) 2008 Red Hat. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/pagemap.h>
  19. #include <linux/sched.h>
  20. #include <linux/math64.h>
  21. #include "ctree.h"
  22. #include "free-space-cache.h"
  23. #include "transaction.h"
  24. #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
  25. #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
  26. static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
  27. u64 offset)
  28. {
  29. BUG_ON(offset < bitmap_start);
  30. offset -= bitmap_start;
  31. return (unsigned long)(div64_u64(offset, sectorsize));
  32. }
  33. static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
  34. {
  35. return (unsigned long)(div64_u64(bytes, sectorsize));
  36. }
  37. static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
  38. u64 offset)
  39. {
  40. u64 bitmap_start;
  41. u64 bytes_per_bitmap;
  42. bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
  43. bitmap_start = offset - block_group->key.objectid;
  44. bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
  45. bitmap_start *= bytes_per_bitmap;
  46. bitmap_start += block_group->key.objectid;
  47. return bitmap_start;
  48. }
  49. static int tree_insert_offset(struct rb_root *root, u64 offset,
  50. struct rb_node *node, int bitmap)
  51. {
  52. struct rb_node **p = &root->rb_node;
  53. struct rb_node *parent = NULL;
  54. struct btrfs_free_space *info;
  55. while (*p) {
  56. parent = *p;
  57. info = rb_entry(parent, struct btrfs_free_space, offset_index);
  58. if (offset < info->offset) {
  59. p = &(*p)->rb_left;
  60. } else if (offset > info->offset) {
  61. p = &(*p)->rb_right;
  62. } else {
  63. /*
  64. * we could have a bitmap entry and an extent entry
  65. * share the same offset. If this is the case, we want
  66. * the extent entry to always be found first if we do a
  67. * linear search through the tree, since we want to have
  68. * the quickest allocation time, and allocating from an
  69. * extent is faster than allocating from a bitmap. So
  70. * if we're inserting a bitmap and we find an entry at
  71. * this offset, we want to go right, or after this entry
  72. * logically. If we are inserting an extent and we've
  73. * found a bitmap, we want to go left, or before
  74. * logically.
  75. */
  76. if (bitmap) {
  77. WARN_ON(info->bitmap);
  78. p = &(*p)->rb_right;
  79. } else {
  80. WARN_ON(!info->bitmap);
  81. p = &(*p)->rb_left;
  82. }
  83. }
  84. }
  85. rb_link_node(node, parent, p);
  86. rb_insert_color(node, root);
  87. return 0;
  88. }
  89. /*
  90. * searches the tree for the given offset.
  91. *
  92. * fuzzy - If this is set, then we are trying to make an allocation, and we just
  93. * want a section that has at least bytes size and comes at or after the given
  94. * offset.
  95. */
  96. static struct btrfs_free_space *
  97. tree_search_offset(struct btrfs_block_group_cache *block_group,
  98. u64 offset, int bitmap_only, int fuzzy)
  99. {
  100. struct rb_node *n = block_group->free_space_offset.rb_node;
  101. struct btrfs_free_space *entry, *prev = NULL;
  102. /* find entry that is closest to the 'offset' */
  103. while (1) {
  104. if (!n) {
  105. entry = NULL;
  106. break;
  107. }
  108. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  109. prev = entry;
  110. if (offset < entry->offset)
  111. n = n->rb_left;
  112. else if (offset > entry->offset)
  113. n = n->rb_right;
  114. else
  115. break;
  116. }
  117. if (bitmap_only) {
  118. if (!entry)
  119. return NULL;
  120. if (entry->bitmap)
  121. return entry;
  122. /*
  123. * bitmap entry and extent entry may share same offset,
  124. * in that case, bitmap entry comes after extent entry.
  125. */
  126. n = rb_next(n);
  127. if (!n)
  128. return NULL;
  129. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  130. if (entry->offset != offset)
  131. return NULL;
  132. WARN_ON(!entry->bitmap);
  133. return entry;
  134. } else if (entry) {
  135. if (entry->bitmap) {
  136. /*
  137. * if previous extent entry covers the offset,
  138. * we should return it instead of the bitmap entry
  139. */
  140. n = &entry->offset_index;
  141. while (1) {
  142. n = rb_prev(n);
  143. if (!n)
  144. break;
  145. prev = rb_entry(n, struct btrfs_free_space,
  146. offset_index);
  147. if (!prev->bitmap) {
  148. if (prev->offset + prev->bytes > offset)
  149. entry = prev;
  150. break;
  151. }
  152. }
  153. }
  154. return entry;
  155. }
  156. if (!prev)
  157. return NULL;
  158. /* find last entry before the 'offset' */
  159. entry = prev;
  160. if (entry->offset > offset) {
  161. n = rb_prev(&entry->offset_index);
  162. if (n) {
  163. entry = rb_entry(n, struct btrfs_free_space,
  164. offset_index);
  165. BUG_ON(entry->offset > offset);
  166. } else {
  167. if (fuzzy)
  168. return entry;
  169. else
  170. return NULL;
  171. }
  172. }
  173. if (entry->bitmap) {
  174. n = &entry->offset_index;
  175. while (1) {
  176. n = rb_prev(n);
  177. if (!n)
  178. break;
  179. prev = rb_entry(n, struct btrfs_free_space,
  180. offset_index);
  181. if (!prev->bitmap) {
  182. if (prev->offset + prev->bytes > offset)
  183. return prev;
  184. break;
  185. }
  186. }
  187. if (entry->offset + BITS_PER_BITMAP *
  188. block_group->sectorsize > offset)
  189. return entry;
  190. } else if (entry->offset + entry->bytes > offset)
  191. return entry;
  192. if (!fuzzy)
  193. return NULL;
  194. while (1) {
  195. if (entry->bitmap) {
  196. if (entry->offset + BITS_PER_BITMAP *
  197. block_group->sectorsize > offset)
  198. break;
  199. } else {
  200. if (entry->offset + entry->bytes > offset)
  201. break;
  202. }
  203. n = rb_next(&entry->offset_index);
  204. if (!n)
  205. return NULL;
  206. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  207. }
  208. return entry;
  209. }
  210. static void unlink_free_space(struct btrfs_block_group_cache *block_group,
  211. struct btrfs_free_space *info)
  212. {
  213. rb_erase(&info->offset_index, &block_group->free_space_offset);
  214. block_group->free_extents--;
  215. block_group->free_space -= info->bytes;
  216. }
  217. static int link_free_space(struct btrfs_block_group_cache *block_group,
  218. struct btrfs_free_space *info)
  219. {
  220. int ret = 0;
  221. BUG_ON(!info->bitmap && !info->bytes);
  222. ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
  223. &info->offset_index, (info->bitmap != NULL));
  224. if (ret)
  225. return ret;
  226. block_group->free_space += info->bytes;
  227. block_group->free_extents++;
  228. return ret;
  229. }
  230. static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
  231. {
  232. u64 max_bytes, possible_bytes;
  233. /*
  234. * The goal is to keep the total amount of memory used per 1gb of space
  235. * at or below 32k, so we need to adjust how much memory we allow to be
  236. * used by extent based free space tracking
  237. */
  238. max_bytes = MAX_CACHE_BYTES_PER_GIG *
  239. (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
  240. possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) +
  241. (sizeof(struct btrfs_free_space) *
  242. block_group->extents_thresh);
  243. if (possible_bytes > max_bytes) {
  244. int extent_bytes = max_bytes -
  245. (block_group->total_bitmaps * PAGE_CACHE_SIZE);
  246. if (extent_bytes <= 0) {
  247. block_group->extents_thresh = 0;
  248. return;
  249. }
  250. block_group->extents_thresh = extent_bytes /
  251. (sizeof(struct btrfs_free_space));
  252. }
  253. }
  254. static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
  255. struct btrfs_free_space *info, u64 offset,
  256. u64 bytes)
  257. {
  258. unsigned long start, end;
  259. unsigned long i;
  260. start = offset_to_bit(info->offset, block_group->sectorsize, offset);
  261. end = start + bytes_to_bits(bytes, block_group->sectorsize);
  262. BUG_ON(end > BITS_PER_BITMAP);
  263. for (i = start; i < end; i++)
  264. clear_bit(i, info->bitmap);
  265. info->bytes -= bytes;
  266. block_group->free_space -= bytes;
  267. }
  268. static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
  269. struct btrfs_free_space *info, u64 offset,
  270. u64 bytes)
  271. {
  272. unsigned long start, end;
  273. unsigned long i;
  274. start = offset_to_bit(info->offset, block_group->sectorsize, offset);
  275. end = start + bytes_to_bits(bytes, block_group->sectorsize);
  276. BUG_ON(end > BITS_PER_BITMAP);
  277. for (i = start; i < end; i++)
  278. set_bit(i, info->bitmap);
  279. info->bytes += bytes;
  280. block_group->free_space += bytes;
  281. }
  282. static int search_bitmap(struct btrfs_block_group_cache *block_group,
  283. struct btrfs_free_space *bitmap_info, u64 *offset,
  284. u64 *bytes)
  285. {
  286. unsigned long found_bits = 0;
  287. unsigned long bits, i;
  288. unsigned long next_zero;
  289. i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
  290. max_t(u64, *offset, bitmap_info->offset));
  291. bits = bytes_to_bits(*bytes, block_group->sectorsize);
  292. for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
  293. i < BITS_PER_BITMAP;
  294. i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
  295. next_zero = find_next_zero_bit(bitmap_info->bitmap,
  296. BITS_PER_BITMAP, i);
  297. if ((next_zero - i) >= bits) {
  298. found_bits = next_zero - i;
  299. break;
  300. }
  301. i = next_zero;
  302. }
  303. if (found_bits) {
  304. *offset = (u64)(i * block_group->sectorsize) +
  305. bitmap_info->offset;
  306. *bytes = (u64)(found_bits) * block_group->sectorsize;
  307. return 0;
  308. }
  309. return -1;
  310. }
  311. static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
  312. *block_group, u64 *offset,
  313. u64 *bytes, int debug)
  314. {
  315. struct btrfs_free_space *entry;
  316. struct rb_node *node;
  317. int ret;
  318. if (!block_group->free_space_offset.rb_node)
  319. return NULL;
  320. entry = tree_search_offset(block_group,
  321. offset_to_bitmap(block_group, *offset),
  322. 0, 1);
  323. if (!entry)
  324. return NULL;
  325. for (node = &entry->offset_index; node; node = rb_next(node)) {
  326. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  327. if (entry->bytes < *bytes)
  328. continue;
  329. if (entry->bitmap) {
  330. ret = search_bitmap(block_group, entry, offset, bytes);
  331. if (!ret)
  332. return entry;
  333. continue;
  334. }
  335. *offset = entry->offset;
  336. *bytes = entry->bytes;
  337. return entry;
  338. }
  339. return NULL;
  340. }
  341. static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
  342. struct btrfs_free_space *info, u64 offset)
  343. {
  344. u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
  345. int max_bitmaps = (int)div64_u64(block_group->key.offset +
  346. bytes_per_bg - 1, bytes_per_bg);
  347. BUG_ON(block_group->total_bitmaps >= max_bitmaps);
  348. info->offset = offset_to_bitmap(block_group, offset);
  349. link_free_space(block_group, info);
  350. block_group->total_bitmaps++;
  351. recalculate_thresholds(block_group);
  352. }
  353. static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
  354. struct btrfs_free_space *bitmap_info,
  355. u64 *offset, u64 *bytes)
  356. {
  357. u64 end;
  358. again:
  359. end = bitmap_info->offset +
  360. (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
  361. if (*offset > bitmap_info->offset && *offset + *bytes > end) {
  362. bitmap_clear_bits(block_group, bitmap_info, *offset,
  363. end - *offset + 1);
  364. *bytes -= end - *offset + 1;
  365. *offset = end + 1;
  366. } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
  367. bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
  368. *bytes = 0;
  369. }
  370. if (*bytes) {
  371. if (!bitmap_info->bytes) {
  372. unlink_free_space(block_group, bitmap_info);
  373. kfree(bitmap_info->bitmap);
  374. kfree(bitmap_info);
  375. block_group->total_bitmaps--;
  376. recalculate_thresholds(block_group);
  377. }
  378. bitmap_info = tree_search_offset(block_group,
  379. offset_to_bitmap(block_group,
  380. *offset),
  381. 1, 0);
  382. if (!bitmap_info)
  383. return -EINVAL;
  384. if (!bitmap_info->bitmap)
  385. return -EAGAIN;
  386. goto again;
  387. } else if (!bitmap_info->bytes) {
  388. unlink_free_space(block_group, bitmap_info);
  389. kfree(bitmap_info->bitmap);
  390. kfree(bitmap_info);
  391. block_group->total_bitmaps--;
  392. recalculate_thresholds(block_group);
  393. }
  394. return 0;
  395. }
  396. static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
  397. struct btrfs_free_space *info)
  398. {
  399. struct btrfs_free_space *bitmap_info;
  400. int added = 0;
  401. u64 bytes, offset, end;
  402. int ret;
  403. /*
  404. * If we are below the extents threshold then we can add this as an
  405. * extent, and don't have to deal with the bitmap
  406. */
  407. if (block_group->free_extents < block_group->extents_thresh &&
  408. info->bytes > block_group->sectorsize * 4)
  409. return 0;
  410. /*
  411. * some block groups are so tiny they can't be enveloped by a bitmap, so
  412. * don't even bother to create a bitmap for this
  413. */
  414. if (BITS_PER_BITMAP * block_group->sectorsize >
  415. block_group->key.offset)
  416. return 0;
  417. bytes = info->bytes;
  418. offset = info->offset;
  419. again:
  420. bitmap_info = tree_search_offset(block_group,
  421. offset_to_bitmap(block_group, offset),
  422. 1, 0);
  423. if (!bitmap_info) {
  424. BUG_ON(added);
  425. goto new_bitmap;
  426. }
  427. end = bitmap_info->offset +
  428. (u64)(BITS_PER_BITMAP * block_group->sectorsize);
  429. if (offset >= bitmap_info->offset && offset + bytes > end) {
  430. bitmap_set_bits(block_group, bitmap_info, offset,
  431. end - offset);
  432. bytes -= end - offset;
  433. offset = end;
  434. added = 0;
  435. } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
  436. bitmap_set_bits(block_group, bitmap_info, offset, bytes);
  437. bytes = 0;
  438. } else {
  439. BUG();
  440. }
  441. if (!bytes) {
  442. ret = 1;
  443. goto out;
  444. } else
  445. goto again;
  446. new_bitmap:
  447. if (info && info->bitmap) {
  448. add_new_bitmap(block_group, info, offset);
  449. added = 1;
  450. info = NULL;
  451. goto again;
  452. } else {
  453. spin_unlock(&block_group->tree_lock);
  454. /* no pre-allocated info, allocate a new one */
  455. if (!info) {
  456. info = kzalloc(sizeof(struct btrfs_free_space),
  457. GFP_NOFS);
  458. if (!info) {
  459. spin_lock(&block_group->tree_lock);
  460. ret = -ENOMEM;
  461. goto out;
  462. }
  463. }
  464. /* allocate the bitmap */
  465. info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
  466. spin_lock(&block_group->tree_lock);
  467. if (!info->bitmap) {
  468. ret = -ENOMEM;
  469. goto out;
  470. }
  471. goto again;
  472. }
  473. out:
  474. if (info) {
  475. if (info->bitmap)
  476. kfree(info->bitmap);
  477. kfree(info);
  478. }
  479. return ret;
  480. }
  481. int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
  482. u64 offset, u64 bytes)
  483. {
  484. struct btrfs_free_space *right_info = NULL;
  485. struct btrfs_free_space *left_info = NULL;
  486. struct btrfs_free_space *info = NULL;
  487. int ret = 0;
  488. info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
  489. if (!info)
  490. return -ENOMEM;
  491. info->offset = offset;
  492. info->bytes = bytes;
  493. spin_lock(&block_group->tree_lock);
  494. /*
  495. * first we want to see if there is free space adjacent to the range we
  496. * are adding, if there is remove that struct and add a new one to
  497. * cover the entire range
  498. */
  499. right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
  500. if (right_info && rb_prev(&right_info->offset_index))
  501. left_info = rb_entry(rb_prev(&right_info->offset_index),
  502. struct btrfs_free_space, offset_index);
  503. else
  504. left_info = tree_search_offset(block_group, offset - 1, 0, 0);
  505. /*
  506. * If there was no extent directly to the left or right of this new
  507. * extent then we know we're going to have to allocate a new extent, so
  508. * before we do that see if we need to drop this into a bitmap
  509. */
  510. if ((!left_info || left_info->bitmap) &&
  511. (!right_info || right_info->bitmap)) {
  512. ret = insert_into_bitmap(block_group, info);
  513. if (ret < 0) {
  514. goto out;
  515. } else if (ret) {
  516. ret = 0;
  517. goto out;
  518. }
  519. }
  520. if (right_info && !right_info->bitmap) {
  521. unlink_free_space(block_group, right_info);
  522. info->bytes += right_info->bytes;
  523. kfree(right_info);
  524. }
  525. if (left_info && !left_info->bitmap &&
  526. left_info->offset + left_info->bytes == offset) {
  527. unlink_free_space(block_group, left_info);
  528. info->offset = left_info->offset;
  529. info->bytes += left_info->bytes;
  530. kfree(left_info);
  531. }
  532. ret = link_free_space(block_group, info);
  533. if (ret)
  534. kfree(info);
  535. out:
  536. spin_unlock(&block_group->tree_lock);
  537. if (ret) {
  538. printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
  539. BUG_ON(ret == -EEXIST);
  540. }
  541. return ret;
  542. }
  543. int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
  544. u64 offset, u64 bytes)
  545. {
  546. struct btrfs_free_space *info;
  547. struct btrfs_free_space *next_info = NULL;
  548. int ret = 0;
  549. spin_lock(&block_group->tree_lock);
  550. again:
  551. info = tree_search_offset(block_group, offset, 0, 0);
  552. if (!info) {
  553. WARN_ON(1);
  554. goto out_lock;
  555. }
  556. if (info->bytes < bytes && rb_next(&info->offset_index)) {
  557. u64 end;
  558. next_info = rb_entry(rb_next(&info->offset_index),
  559. struct btrfs_free_space,
  560. offset_index);
  561. if (next_info->bitmap)
  562. end = next_info->offset + BITS_PER_BITMAP *
  563. block_group->sectorsize - 1;
  564. else
  565. end = next_info->offset + next_info->bytes;
  566. if (next_info->bytes < bytes ||
  567. next_info->offset > offset || offset > end) {
  568. printk(KERN_CRIT "Found free space at %llu, size %llu,"
  569. " trying to use %llu\n",
  570. (unsigned long long)info->offset,
  571. (unsigned long long)info->bytes,
  572. (unsigned long long)bytes);
  573. WARN_ON(1);
  574. ret = -EINVAL;
  575. goto out_lock;
  576. }
  577. info = next_info;
  578. }
  579. if (info->bytes == bytes) {
  580. unlink_free_space(block_group, info);
  581. if (info->bitmap) {
  582. kfree(info->bitmap);
  583. block_group->total_bitmaps--;
  584. }
  585. kfree(info);
  586. goto out_lock;
  587. }
  588. if (!info->bitmap && info->offset == offset) {
  589. unlink_free_space(block_group, info);
  590. info->offset += bytes;
  591. info->bytes -= bytes;
  592. link_free_space(block_group, info);
  593. goto out_lock;
  594. }
  595. if (!info->bitmap && info->offset <= offset &&
  596. info->offset + info->bytes >= offset + bytes) {
  597. u64 old_start = info->offset;
  598. /*
  599. * we're freeing space in the middle of the info,
  600. * this can happen during tree log replay
  601. *
  602. * first unlink the old info and then
  603. * insert it again after the hole we're creating
  604. */
  605. unlink_free_space(block_group, info);
  606. if (offset + bytes < info->offset + info->bytes) {
  607. u64 old_end = info->offset + info->bytes;
  608. info->offset = offset + bytes;
  609. info->bytes = old_end - info->offset;
  610. ret = link_free_space(block_group, info);
  611. WARN_ON(ret);
  612. if (ret)
  613. goto out_lock;
  614. } else {
  615. /* the hole we're creating ends at the end
  616. * of the info struct, just free the info
  617. */
  618. kfree(info);
  619. }
  620. spin_unlock(&block_group->tree_lock);
  621. /* step two, insert a new info struct to cover
  622. * anything before the hole
  623. */
  624. ret = btrfs_add_free_space(block_group, old_start,
  625. offset - old_start);
  626. WARN_ON(ret);
  627. goto out;
  628. }
  629. ret = remove_from_bitmap(block_group, info, &offset, &bytes);
  630. if (ret == -EAGAIN)
  631. goto again;
  632. BUG_ON(ret);
  633. out_lock:
  634. spin_unlock(&block_group->tree_lock);
  635. out:
  636. return ret;
  637. }
  638. void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
  639. u64 bytes)
  640. {
  641. struct btrfs_free_space *info;
  642. struct rb_node *n;
  643. int count = 0;
  644. for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
  645. info = rb_entry(n, struct btrfs_free_space, offset_index);
  646. if (info->bytes >= bytes)
  647. count++;
  648. printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
  649. (unsigned long long)info->offset,
  650. (unsigned long long)info->bytes,
  651. (info->bitmap) ? "yes" : "no");
  652. }
  653. printk(KERN_INFO "block group has cluster?: %s\n",
  654. list_empty(&block_group->cluster_list) ? "no" : "yes");
  655. printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
  656. "\n", count);
  657. }
  658. u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
  659. {
  660. struct btrfs_free_space *info;
  661. struct rb_node *n;
  662. u64 ret = 0;
  663. for (n = rb_first(&block_group->free_space_offset); n;
  664. n = rb_next(n)) {
  665. info = rb_entry(n, struct btrfs_free_space, offset_index);
  666. ret += info->bytes;
  667. }
  668. return ret;
  669. }
  670. /*
  671. * for a given cluster, put all of its extents back into the free
  672. * space cache. If the block group passed doesn't match the block group
  673. * pointed to by the cluster, someone else raced in and freed the
  674. * cluster already. In that case, we just return without changing anything
  675. */
  676. static int
  677. __btrfs_return_cluster_to_free_space(
  678. struct btrfs_block_group_cache *block_group,
  679. struct btrfs_free_cluster *cluster)
  680. {
  681. struct btrfs_free_space *entry;
  682. struct rb_node *node;
  683. bool bitmap;
  684. spin_lock(&cluster->lock);
  685. if (cluster->block_group != block_group)
  686. goto out;
  687. bitmap = cluster->points_to_bitmap;
  688. cluster->block_group = NULL;
  689. cluster->window_start = 0;
  690. list_del_init(&cluster->block_group_list);
  691. cluster->points_to_bitmap = false;
  692. if (bitmap)
  693. goto out;
  694. node = rb_first(&cluster->root);
  695. while (node) {
  696. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  697. node = rb_next(&entry->offset_index);
  698. rb_erase(&entry->offset_index, &cluster->root);
  699. BUG_ON(entry->bitmap);
  700. tree_insert_offset(&block_group->free_space_offset,
  701. entry->offset, &entry->offset_index, 0);
  702. }
  703. cluster->root.rb_node = NULL;
  704. out:
  705. spin_unlock(&cluster->lock);
  706. btrfs_put_block_group(block_group);
  707. return 0;
  708. }
  709. void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
  710. {
  711. struct btrfs_free_space *info;
  712. struct rb_node *node;
  713. struct btrfs_free_cluster *cluster;
  714. struct list_head *head;
  715. spin_lock(&block_group->tree_lock);
  716. while ((head = block_group->cluster_list.next) !=
  717. &block_group->cluster_list) {
  718. cluster = list_entry(head, struct btrfs_free_cluster,
  719. block_group_list);
  720. WARN_ON(cluster->block_group != block_group);
  721. __btrfs_return_cluster_to_free_space(block_group, cluster);
  722. if (need_resched()) {
  723. spin_unlock(&block_group->tree_lock);
  724. cond_resched();
  725. spin_lock(&block_group->tree_lock);
  726. }
  727. }
  728. while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
  729. info = rb_entry(node, struct btrfs_free_space, offset_index);
  730. unlink_free_space(block_group, info);
  731. if (info->bitmap)
  732. kfree(info->bitmap);
  733. kfree(info);
  734. if (need_resched()) {
  735. spin_unlock(&block_group->tree_lock);
  736. cond_resched();
  737. spin_lock(&block_group->tree_lock);
  738. }
  739. }
  740. spin_unlock(&block_group->tree_lock);
  741. }
  742. u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
  743. u64 offset, u64 bytes, u64 empty_size)
  744. {
  745. struct btrfs_free_space *entry = NULL;
  746. u64 bytes_search = bytes + empty_size;
  747. u64 ret = 0;
  748. spin_lock(&block_group->tree_lock);
  749. entry = find_free_space(block_group, &offset, &bytes_search, 0);
  750. if (!entry)
  751. goto out;
  752. ret = offset;
  753. if (entry->bitmap) {
  754. bitmap_clear_bits(block_group, entry, offset, bytes);
  755. if (!entry->bytes) {
  756. unlink_free_space(block_group, entry);
  757. kfree(entry->bitmap);
  758. kfree(entry);
  759. block_group->total_bitmaps--;
  760. recalculate_thresholds(block_group);
  761. }
  762. } else {
  763. unlink_free_space(block_group, entry);
  764. entry->offset += bytes;
  765. entry->bytes -= bytes;
  766. if (!entry->bytes)
  767. kfree(entry);
  768. else
  769. link_free_space(block_group, entry);
  770. }
  771. out:
  772. spin_unlock(&block_group->tree_lock);
  773. return ret;
  774. }
  775. /*
  776. * given a cluster, put all of its extents back into the free space
  777. * cache. If a block group is passed, this function will only free
  778. * a cluster that belongs to the passed block group.
  779. *
  780. * Otherwise, it'll get a reference on the block group pointed to by the
  781. * cluster and remove the cluster from it.
  782. */
  783. int btrfs_return_cluster_to_free_space(
  784. struct btrfs_block_group_cache *block_group,
  785. struct btrfs_free_cluster *cluster)
  786. {
  787. int ret;
  788. /* first, get a safe pointer to the block group */
  789. spin_lock(&cluster->lock);
  790. if (!block_group) {
  791. block_group = cluster->block_group;
  792. if (!block_group) {
  793. spin_unlock(&cluster->lock);
  794. return 0;
  795. }
  796. } else if (cluster->block_group != block_group) {
  797. /* someone else has already freed it don't redo their work */
  798. spin_unlock(&cluster->lock);
  799. return 0;
  800. }
  801. atomic_inc(&block_group->count);
  802. spin_unlock(&cluster->lock);
  803. /* now return any extents the cluster had on it */
  804. spin_lock(&block_group->tree_lock);
  805. ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
  806. spin_unlock(&block_group->tree_lock);
  807. /* finally drop our ref */
  808. btrfs_put_block_group(block_group);
  809. return ret;
  810. }
  811. static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
  812. struct btrfs_free_cluster *cluster,
  813. u64 bytes, u64 min_start)
  814. {
  815. struct btrfs_free_space *entry;
  816. int err;
  817. u64 search_start = cluster->window_start;
  818. u64 search_bytes = bytes;
  819. u64 ret = 0;
  820. spin_lock(&block_group->tree_lock);
  821. spin_lock(&cluster->lock);
  822. if (!cluster->points_to_bitmap)
  823. goto out;
  824. if (cluster->block_group != block_group)
  825. goto out;
  826. entry = tree_search_offset(block_group, search_start, 0, 0);
  827. if (!entry || !entry->bitmap)
  828. goto out;
  829. search_start = min_start;
  830. search_bytes = bytes;
  831. err = search_bitmap(block_group, entry, &search_start,
  832. &search_bytes);
  833. if (err)
  834. goto out;
  835. ret = search_start;
  836. bitmap_clear_bits(block_group, entry, ret, bytes);
  837. out:
  838. spin_unlock(&cluster->lock);
  839. spin_unlock(&block_group->tree_lock);
  840. return ret;
  841. }
  842. /*
  843. * given a cluster, try to allocate 'bytes' from it, returns 0
  844. * if it couldn't find anything suitably large, or a logical disk offset
  845. * if things worked out
  846. */
  847. u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
  848. struct btrfs_free_cluster *cluster, u64 bytes,
  849. u64 min_start)
  850. {
  851. struct btrfs_free_space *entry = NULL;
  852. struct rb_node *node;
  853. u64 ret = 0;
  854. if (cluster->points_to_bitmap)
  855. return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
  856. min_start);
  857. spin_lock(&cluster->lock);
  858. if (bytes > cluster->max_size)
  859. goto out;
  860. if (cluster->block_group != block_group)
  861. goto out;
  862. node = rb_first(&cluster->root);
  863. if (!node)
  864. goto out;
  865. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  866. while(1) {
  867. if (entry->bytes < bytes || entry->offset < min_start) {
  868. struct rb_node *node;
  869. node = rb_next(&entry->offset_index);
  870. if (!node)
  871. break;
  872. entry = rb_entry(node, struct btrfs_free_space,
  873. offset_index);
  874. continue;
  875. }
  876. ret = entry->offset;
  877. entry->offset += bytes;
  878. entry->bytes -= bytes;
  879. if (entry->bytes == 0) {
  880. rb_erase(&entry->offset_index, &cluster->root);
  881. kfree(entry);
  882. }
  883. break;
  884. }
  885. out:
  886. spin_unlock(&cluster->lock);
  887. return ret;
  888. }
  889. static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
  890. struct btrfs_free_space *entry,
  891. struct btrfs_free_cluster *cluster,
  892. u64 offset, u64 bytes, u64 min_bytes)
  893. {
  894. unsigned long next_zero;
  895. unsigned long i;
  896. unsigned long search_bits;
  897. unsigned long total_bits;
  898. unsigned long found_bits;
  899. unsigned long start = 0;
  900. unsigned long total_found = 0;
  901. bool found = false;
  902. i = offset_to_bit(entry->offset, block_group->sectorsize,
  903. max_t(u64, offset, entry->offset));
  904. search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
  905. total_bits = bytes_to_bits(bytes, block_group->sectorsize);
  906. again:
  907. found_bits = 0;
  908. for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
  909. i < BITS_PER_BITMAP;
  910. i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
  911. next_zero = find_next_zero_bit(entry->bitmap,
  912. BITS_PER_BITMAP, i);
  913. if (next_zero - i >= search_bits) {
  914. found_bits = next_zero - i;
  915. break;
  916. }
  917. i = next_zero;
  918. }
  919. if (!found_bits)
  920. return -1;
  921. if (!found) {
  922. start = i;
  923. found = true;
  924. }
  925. total_found += found_bits;
  926. if (cluster->max_size < found_bits * block_group->sectorsize)
  927. cluster->max_size = found_bits * block_group->sectorsize;
  928. if (total_found < total_bits) {
  929. i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
  930. if (i - start > total_bits * 2) {
  931. total_found = 0;
  932. cluster->max_size = 0;
  933. found = false;
  934. }
  935. goto again;
  936. }
  937. cluster->window_start = start * block_group->sectorsize +
  938. entry->offset;
  939. cluster->points_to_bitmap = true;
  940. return 0;
  941. }
  942. /*
  943. * here we try to find a cluster of blocks in a block group. The goal
  944. * is to find at least bytes free and up to empty_size + bytes free.
  945. * We might not find them all in one contiguous area.
  946. *
  947. * returns zero and sets up cluster if things worked out, otherwise
  948. * it returns -enospc
  949. */
  950. int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
  951. struct btrfs_root *root,
  952. struct btrfs_block_group_cache *block_group,
  953. struct btrfs_free_cluster *cluster,
  954. u64 offset, u64 bytes, u64 empty_size)
  955. {
  956. struct btrfs_free_space *entry = NULL;
  957. struct rb_node *node;
  958. struct btrfs_free_space *next;
  959. struct btrfs_free_space *last = NULL;
  960. u64 min_bytes;
  961. u64 window_start;
  962. u64 window_free;
  963. u64 max_extent = 0;
  964. bool found_bitmap = false;
  965. int ret;
  966. /* for metadata, allow allocates with more holes */
  967. if (btrfs_test_opt(root, SSD_SPREAD)) {
  968. min_bytes = bytes + empty_size;
  969. } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
  970. /*
  971. * we want to do larger allocations when we are
  972. * flushing out the delayed refs, it helps prevent
  973. * making more work as we go along.
  974. */
  975. if (trans->transaction->delayed_refs.flushing)
  976. min_bytes = max(bytes, (bytes + empty_size) >> 1);
  977. else
  978. min_bytes = max(bytes, (bytes + empty_size) >> 4);
  979. } else
  980. min_bytes = max(bytes, (bytes + empty_size) >> 2);
  981. spin_lock(&block_group->tree_lock);
  982. spin_lock(&cluster->lock);
  983. /* someone already found a cluster, hooray */
  984. if (cluster->block_group) {
  985. ret = 0;
  986. goto out;
  987. }
  988. again:
  989. entry = tree_search_offset(block_group, offset, found_bitmap, 1);
  990. if (!entry) {
  991. ret = -ENOSPC;
  992. goto out;
  993. }
  994. /*
  995. * If found_bitmap is true, we exhausted our search for extent entries,
  996. * and we just want to search all of the bitmaps that we can find, and
  997. * ignore any extent entries we find.
  998. */
  999. while (entry->bitmap || found_bitmap ||
  1000. (!entry->bitmap && entry->bytes < min_bytes)) {
  1001. struct rb_node *node = rb_next(&entry->offset_index);
  1002. if (entry->bitmap && entry->bytes > bytes + empty_size) {
  1003. ret = btrfs_bitmap_cluster(block_group, entry, cluster,
  1004. offset, bytes + empty_size,
  1005. min_bytes);
  1006. if (!ret)
  1007. goto got_it;
  1008. }
  1009. if (!node) {
  1010. ret = -ENOSPC;
  1011. goto out;
  1012. }
  1013. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1014. }
  1015. /*
  1016. * We already searched all the extent entries from the passed in offset
  1017. * to the end and didn't find enough space for the cluster, and we also
  1018. * didn't find any bitmaps that met our criteria, just go ahead and exit
  1019. */
  1020. if (found_bitmap) {
  1021. ret = -ENOSPC;
  1022. goto out;
  1023. }
  1024. cluster->points_to_bitmap = false;
  1025. window_start = entry->offset;
  1026. window_free = entry->bytes;
  1027. last = entry;
  1028. max_extent = entry->bytes;
  1029. while (1) {
  1030. /* out window is just right, lets fill it */
  1031. if (window_free >= bytes + empty_size)
  1032. break;
  1033. node = rb_next(&last->offset_index);
  1034. if (!node) {
  1035. if (found_bitmap)
  1036. goto again;
  1037. ret = -ENOSPC;
  1038. goto out;
  1039. }
  1040. next = rb_entry(node, struct btrfs_free_space, offset_index);
  1041. /*
  1042. * we found a bitmap, so if this search doesn't result in a
  1043. * cluster, we know to go and search again for the bitmaps and
  1044. * start looking for space there
  1045. */
  1046. if (next->bitmap) {
  1047. if (!found_bitmap)
  1048. offset = next->offset;
  1049. found_bitmap = true;
  1050. last = next;
  1051. continue;
  1052. }
  1053. /*
  1054. * we haven't filled the empty size and the window is
  1055. * very large. reset and try again
  1056. */
  1057. if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
  1058. next->offset - window_start > (bytes + empty_size) * 2) {
  1059. entry = next;
  1060. window_start = entry->offset;
  1061. window_free = entry->bytes;
  1062. last = entry;
  1063. max_extent = 0;
  1064. } else {
  1065. last = next;
  1066. window_free += next->bytes;
  1067. if (entry->bytes > max_extent)
  1068. max_extent = entry->bytes;
  1069. }
  1070. }
  1071. cluster->window_start = entry->offset;
  1072. /*
  1073. * now we've found our entries, pull them out of the free space
  1074. * cache and put them into the cluster rbtree
  1075. *
  1076. * The cluster includes an rbtree, but only uses the offset index
  1077. * of each free space cache entry.
  1078. */
  1079. while (1) {
  1080. node = rb_next(&entry->offset_index);
  1081. if (entry->bitmap && node) {
  1082. entry = rb_entry(node, struct btrfs_free_space,
  1083. offset_index);
  1084. continue;
  1085. } else if (entry->bitmap && !node) {
  1086. break;
  1087. }
  1088. rb_erase(&entry->offset_index, &block_group->free_space_offset);
  1089. ret = tree_insert_offset(&cluster->root, entry->offset,
  1090. &entry->offset_index, 0);
  1091. BUG_ON(ret);
  1092. if (!node || entry == last)
  1093. break;
  1094. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1095. }
  1096. cluster->max_size = max_extent;
  1097. got_it:
  1098. ret = 0;
  1099. atomic_inc(&block_group->count);
  1100. list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
  1101. cluster->block_group = block_group;
  1102. out:
  1103. spin_unlock(&cluster->lock);
  1104. spin_unlock(&block_group->tree_lock);
  1105. return ret;
  1106. }
  1107. /*
  1108. * simple code to zero out a cluster
  1109. */
  1110. void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
  1111. {
  1112. spin_lock_init(&cluster->lock);
  1113. spin_lock_init(&cluster->refill_lock);
  1114. cluster->root.rb_node = NULL;
  1115. cluster->max_size = 0;
  1116. cluster->points_to_bitmap = false;
  1117. INIT_LIST_HEAD(&cluster->block_group_list);
  1118. cluster->block_group = NULL;
  1119. }