free-space-cache.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365
  1. /*
  2. * Copyright (C) 2008 Red Hat. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/pagemap.h>
  19. #include <linux/sched.h>
  20. #include <linux/slab.h>
  21. #include <linux/math64.h>
  22. #include "ctree.h"
  23. #include "free-space-cache.h"
  24. #include "transaction.h"
  25. #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
  26. #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
  27. static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
  28. u64 offset)
  29. {
  30. BUG_ON(offset < bitmap_start);
  31. offset -= bitmap_start;
  32. return (unsigned long)(div64_u64(offset, sectorsize));
  33. }
  34. static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
  35. {
  36. return (unsigned long)(div64_u64(bytes, sectorsize));
  37. }
  38. static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
  39. u64 offset)
  40. {
  41. u64 bitmap_start;
  42. u64 bytes_per_bitmap;
  43. bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
  44. bitmap_start = offset - block_group->key.objectid;
  45. bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
  46. bitmap_start *= bytes_per_bitmap;
  47. bitmap_start += block_group->key.objectid;
  48. return bitmap_start;
  49. }
  50. static int tree_insert_offset(struct rb_root *root, u64 offset,
  51. struct rb_node *node, int bitmap)
  52. {
  53. struct rb_node **p = &root->rb_node;
  54. struct rb_node *parent = NULL;
  55. struct btrfs_free_space *info;
  56. while (*p) {
  57. parent = *p;
  58. info = rb_entry(parent, struct btrfs_free_space, offset_index);
  59. if (offset < info->offset) {
  60. p = &(*p)->rb_left;
  61. } else if (offset > info->offset) {
  62. p = &(*p)->rb_right;
  63. } else {
  64. /*
  65. * we could have a bitmap entry and an extent entry
  66. * share the same offset. If this is the case, we want
  67. * the extent entry to always be found first if we do a
  68. * linear search through the tree, since we want to have
  69. * the quickest allocation time, and allocating from an
  70. * extent is faster than allocating from a bitmap. So
  71. * if we're inserting a bitmap and we find an entry at
  72. * this offset, we want to go right, or after this entry
  73. * logically. If we are inserting an extent and we've
  74. * found a bitmap, we want to go left, or before
  75. * logically.
  76. */
  77. if (bitmap) {
  78. WARN_ON(info->bitmap);
  79. p = &(*p)->rb_right;
  80. } else {
  81. WARN_ON(!info->bitmap);
  82. p = &(*p)->rb_left;
  83. }
  84. }
  85. }
  86. rb_link_node(node, parent, p);
  87. rb_insert_color(node, root);
  88. return 0;
  89. }
  90. /*
  91. * searches the tree for the given offset.
  92. *
  93. * fuzzy - If this is set, then we are trying to make an allocation, and we just
  94. * want a section that has at least bytes size and comes at or after the given
  95. * offset.
  96. */
  97. static struct btrfs_free_space *
  98. tree_search_offset(struct btrfs_block_group_cache *block_group,
  99. u64 offset, int bitmap_only, int fuzzy)
  100. {
  101. struct rb_node *n = block_group->free_space_offset.rb_node;
  102. struct btrfs_free_space *entry, *prev = NULL;
  103. /* find entry that is closest to the 'offset' */
  104. while (1) {
  105. if (!n) {
  106. entry = NULL;
  107. break;
  108. }
  109. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  110. prev = entry;
  111. if (offset < entry->offset)
  112. n = n->rb_left;
  113. else if (offset > entry->offset)
  114. n = n->rb_right;
  115. else
  116. break;
  117. }
  118. if (bitmap_only) {
  119. if (!entry)
  120. return NULL;
  121. if (entry->bitmap)
  122. return entry;
  123. /*
  124. * bitmap entry and extent entry may share same offset,
  125. * in that case, bitmap entry comes after extent entry.
  126. */
  127. n = rb_next(n);
  128. if (!n)
  129. return NULL;
  130. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  131. if (entry->offset != offset)
  132. return NULL;
  133. WARN_ON(!entry->bitmap);
  134. return entry;
  135. } else if (entry) {
  136. if (entry->bitmap) {
  137. /*
  138. * if previous extent entry covers the offset,
  139. * we should return it instead of the bitmap entry
  140. */
  141. n = &entry->offset_index;
  142. while (1) {
  143. n = rb_prev(n);
  144. if (!n)
  145. break;
  146. prev = rb_entry(n, struct btrfs_free_space,
  147. offset_index);
  148. if (!prev->bitmap) {
  149. if (prev->offset + prev->bytes > offset)
  150. entry = prev;
  151. break;
  152. }
  153. }
  154. }
  155. return entry;
  156. }
  157. if (!prev)
  158. return NULL;
  159. /* find last entry before the 'offset' */
  160. entry = prev;
  161. if (entry->offset > offset) {
  162. n = rb_prev(&entry->offset_index);
  163. if (n) {
  164. entry = rb_entry(n, struct btrfs_free_space,
  165. offset_index);
  166. BUG_ON(entry->offset > offset);
  167. } else {
  168. if (fuzzy)
  169. return entry;
  170. else
  171. return NULL;
  172. }
  173. }
  174. if (entry->bitmap) {
  175. n = &entry->offset_index;
  176. while (1) {
  177. n = rb_prev(n);
  178. if (!n)
  179. break;
  180. prev = rb_entry(n, struct btrfs_free_space,
  181. offset_index);
  182. if (!prev->bitmap) {
  183. if (prev->offset + prev->bytes > offset)
  184. return prev;
  185. break;
  186. }
  187. }
  188. if (entry->offset + BITS_PER_BITMAP *
  189. block_group->sectorsize > offset)
  190. return entry;
  191. } else if (entry->offset + entry->bytes > offset)
  192. return entry;
  193. if (!fuzzy)
  194. return NULL;
  195. while (1) {
  196. if (entry->bitmap) {
  197. if (entry->offset + BITS_PER_BITMAP *
  198. block_group->sectorsize > offset)
  199. break;
  200. } else {
  201. if (entry->offset + entry->bytes > offset)
  202. break;
  203. }
  204. n = rb_next(&entry->offset_index);
  205. if (!n)
  206. return NULL;
  207. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  208. }
  209. return entry;
  210. }
  211. static void unlink_free_space(struct btrfs_block_group_cache *block_group,
  212. struct btrfs_free_space *info)
  213. {
  214. rb_erase(&info->offset_index, &block_group->free_space_offset);
  215. block_group->free_extents--;
  216. block_group->free_space -= info->bytes;
  217. }
  218. static int link_free_space(struct btrfs_block_group_cache *block_group,
  219. struct btrfs_free_space *info)
  220. {
  221. int ret = 0;
  222. BUG_ON(!info->bitmap && !info->bytes);
  223. ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
  224. &info->offset_index, (info->bitmap != NULL));
  225. if (ret)
  226. return ret;
  227. block_group->free_space += info->bytes;
  228. block_group->free_extents++;
  229. return ret;
  230. }
  231. static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
  232. {
  233. u64 max_bytes;
  234. u64 bitmap_bytes;
  235. u64 extent_bytes;
  236. /*
  237. * The goal is to keep the total amount of memory used per 1gb of space
  238. * at or below 32k, so we need to adjust how much memory we allow to be
  239. * used by extent based free space tracking
  240. */
  241. max_bytes = MAX_CACHE_BYTES_PER_GIG *
  242. (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
  243. /*
  244. * we want to account for 1 more bitmap than what we have so we can make
  245. * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
  246. * we add more bitmaps.
  247. */
  248. bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
  249. if (bitmap_bytes >= max_bytes) {
  250. block_group->extents_thresh = 0;
  251. return;
  252. }
  253. /*
  254. * we want the extent entry threshold to always be at most 1/2 the maxw
  255. * bytes we can have, or whatever is less than that.
  256. */
  257. extent_bytes = max_bytes - bitmap_bytes;
  258. extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
  259. block_group->extents_thresh =
  260. div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
  261. }
  262. static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
  263. struct btrfs_free_space *info, u64 offset,
  264. u64 bytes)
  265. {
  266. unsigned long start, end;
  267. unsigned long i;
  268. start = offset_to_bit(info->offset, block_group->sectorsize, offset);
  269. end = start + bytes_to_bits(bytes, block_group->sectorsize);
  270. BUG_ON(end > BITS_PER_BITMAP);
  271. for (i = start; i < end; i++)
  272. clear_bit(i, info->bitmap);
  273. info->bytes -= bytes;
  274. block_group->free_space -= bytes;
  275. }
  276. static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
  277. struct btrfs_free_space *info, u64 offset,
  278. u64 bytes)
  279. {
  280. unsigned long start, end;
  281. unsigned long i;
  282. start = offset_to_bit(info->offset, block_group->sectorsize, offset);
  283. end = start + bytes_to_bits(bytes, block_group->sectorsize);
  284. BUG_ON(end > BITS_PER_BITMAP);
  285. for (i = start; i < end; i++)
  286. set_bit(i, info->bitmap);
  287. info->bytes += bytes;
  288. block_group->free_space += bytes;
  289. }
  290. static int search_bitmap(struct btrfs_block_group_cache *block_group,
  291. struct btrfs_free_space *bitmap_info, u64 *offset,
  292. u64 *bytes)
  293. {
  294. unsigned long found_bits = 0;
  295. unsigned long bits, i;
  296. unsigned long next_zero;
  297. i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
  298. max_t(u64, *offset, bitmap_info->offset));
  299. bits = bytes_to_bits(*bytes, block_group->sectorsize);
  300. for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
  301. i < BITS_PER_BITMAP;
  302. i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
  303. next_zero = find_next_zero_bit(bitmap_info->bitmap,
  304. BITS_PER_BITMAP, i);
  305. if ((next_zero - i) >= bits) {
  306. found_bits = next_zero - i;
  307. break;
  308. }
  309. i = next_zero;
  310. }
  311. if (found_bits) {
  312. *offset = (u64)(i * block_group->sectorsize) +
  313. bitmap_info->offset;
  314. *bytes = (u64)(found_bits) * block_group->sectorsize;
  315. return 0;
  316. }
  317. return -1;
  318. }
  319. static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
  320. *block_group, u64 *offset,
  321. u64 *bytes, int debug)
  322. {
  323. struct btrfs_free_space *entry;
  324. struct rb_node *node;
  325. int ret;
  326. if (!block_group->free_space_offset.rb_node)
  327. return NULL;
  328. entry = tree_search_offset(block_group,
  329. offset_to_bitmap(block_group, *offset),
  330. 0, 1);
  331. if (!entry)
  332. return NULL;
  333. for (node = &entry->offset_index; node; node = rb_next(node)) {
  334. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  335. if (entry->bytes < *bytes)
  336. continue;
  337. if (entry->bitmap) {
  338. ret = search_bitmap(block_group, entry, offset, bytes);
  339. if (!ret)
  340. return entry;
  341. continue;
  342. }
  343. *offset = entry->offset;
  344. *bytes = entry->bytes;
  345. return entry;
  346. }
  347. return NULL;
  348. }
  349. static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
  350. struct btrfs_free_space *info, u64 offset)
  351. {
  352. u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
  353. int max_bitmaps = (int)div64_u64(block_group->key.offset +
  354. bytes_per_bg - 1, bytes_per_bg);
  355. BUG_ON(block_group->total_bitmaps >= max_bitmaps);
  356. info->offset = offset_to_bitmap(block_group, offset);
  357. info->bytes = 0;
  358. link_free_space(block_group, info);
  359. block_group->total_bitmaps++;
  360. recalculate_thresholds(block_group);
  361. }
  362. static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
  363. struct btrfs_free_space *bitmap_info,
  364. u64 *offset, u64 *bytes)
  365. {
  366. u64 end;
  367. u64 search_start, search_bytes;
  368. int ret;
  369. again:
  370. end = bitmap_info->offset +
  371. (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
  372. /*
  373. * XXX - this can go away after a few releases.
  374. *
  375. * since the only user of btrfs_remove_free_space is the tree logging
  376. * stuff, and the only way to test that is under crash conditions, we
  377. * want to have this debug stuff here just in case somethings not
  378. * working. Search the bitmap for the space we are trying to use to
  379. * make sure its actually there. If its not there then we need to stop
  380. * because something has gone wrong.
  381. */
  382. search_start = *offset;
  383. search_bytes = *bytes;
  384. ret = search_bitmap(block_group, bitmap_info, &search_start,
  385. &search_bytes);
  386. BUG_ON(ret < 0 || search_start != *offset);
  387. if (*offset > bitmap_info->offset && *offset + *bytes > end) {
  388. bitmap_clear_bits(block_group, bitmap_info, *offset,
  389. end - *offset + 1);
  390. *bytes -= end - *offset + 1;
  391. *offset = end + 1;
  392. } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
  393. bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
  394. *bytes = 0;
  395. }
  396. if (*bytes) {
  397. struct rb_node *next = rb_next(&bitmap_info->offset_index);
  398. if (!bitmap_info->bytes) {
  399. unlink_free_space(block_group, bitmap_info);
  400. kfree(bitmap_info->bitmap);
  401. kfree(bitmap_info);
  402. block_group->total_bitmaps--;
  403. recalculate_thresholds(block_group);
  404. }
  405. /*
  406. * no entry after this bitmap, but we still have bytes to
  407. * remove, so something has gone wrong.
  408. */
  409. if (!next)
  410. return -EINVAL;
  411. bitmap_info = rb_entry(next, struct btrfs_free_space,
  412. offset_index);
  413. /*
  414. * if the next entry isn't a bitmap we need to return to let the
  415. * extent stuff do its work.
  416. */
  417. if (!bitmap_info->bitmap)
  418. return -EAGAIN;
  419. /*
  420. * Ok the next item is a bitmap, but it may not actually hold
  421. * the information for the rest of this free space stuff, so
  422. * look for it, and if we don't find it return so we can try
  423. * everything over again.
  424. */
  425. search_start = *offset;
  426. search_bytes = *bytes;
  427. ret = search_bitmap(block_group, bitmap_info, &search_start,
  428. &search_bytes);
  429. if (ret < 0 || search_start != *offset)
  430. return -EAGAIN;
  431. goto again;
  432. } else if (!bitmap_info->bytes) {
  433. unlink_free_space(block_group, bitmap_info);
  434. kfree(bitmap_info->bitmap);
  435. kfree(bitmap_info);
  436. block_group->total_bitmaps--;
  437. recalculate_thresholds(block_group);
  438. }
  439. return 0;
  440. }
  441. static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
  442. struct btrfs_free_space *info)
  443. {
  444. struct btrfs_free_space *bitmap_info;
  445. int added = 0;
  446. u64 bytes, offset, end;
  447. int ret;
  448. /*
  449. * If we are below the extents threshold then we can add this as an
  450. * extent, and don't have to deal with the bitmap
  451. */
  452. if (block_group->free_extents < block_group->extents_thresh &&
  453. info->bytes > block_group->sectorsize * 4)
  454. return 0;
  455. /*
  456. * some block groups are so tiny they can't be enveloped by a bitmap, so
  457. * don't even bother to create a bitmap for this
  458. */
  459. if (BITS_PER_BITMAP * block_group->sectorsize >
  460. block_group->key.offset)
  461. return 0;
  462. bytes = info->bytes;
  463. offset = info->offset;
  464. again:
  465. bitmap_info = tree_search_offset(block_group,
  466. offset_to_bitmap(block_group, offset),
  467. 1, 0);
  468. if (!bitmap_info) {
  469. BUG_ON(added);
  470. goto new_bitmap;
  471. }
  472. end = bitmap_info->offset +
  473. (u64)(BITS_PER_BITMAP * block_group->sectorsize);
  474. if (offset >= bitmap_info->offset && offset + bytes > end) {
  475. bitmap_set_bits(block_group, bitmap_info, offset,
  476. end - offset);
  477. bytes -= end - offset;
  478. offset = end;
  479. added = 0;
  480. } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
  481. bitmap_set_bits(block_group, bitmap_info, offset, bytes);
  482. bytes = 0;
  483. } else {
  484. BUG();
  485. }
  486. if (!bytes) {
  487. ret = 1;
  488. goto out;
  489. } else
  490. goto again;
  491. new_bitmap:
  492. if (info && info->bitmap) {
  493. add_new_bitmap(block_group, info, offset);
  494. added = 1;
  495. info = NULL;
  496. goto again;
  497. } else {
  498. spin_unlock(&block_group->tree_lock);
  499. /* no pre-allocated info, allocate a new one */
  500. if (!info) {
  501. info = kzalloc(sizeof(struct btrfs_free_space),
  502. GFP_NOFS);
  503. if (!info) {
  504. spin_lock(&block_group->tree_lock);
  505. ret = -ENOMEM;
  506. goto out;
  507. }
  508. }
  509. /* allocate the bitmap */
  510. info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
  511. spin_lock(&block_group->tree_lock);
  512. if (!info->bitmap) {
  513. ret = -ENOMEM;
  514. goto out;
  515. }
  516. goto again;
  517. }
  518. out:
  519. if (info) {
  520. if (info->bitmap)
  521. kfree(info->bitmap);
  522. kfree(info);
  523. }
  524. return ret;
  525. }
  526. int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
  527. u64 offset, u64 bytes)
  528. {
  529. struct btrfs_free_space *right_info = NULL;
  530. struct btrfs_free_space *left_info = NULL;
  531. struct btrfs_free_space *info = NULL;
  532. int ret = 0;
  533. info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
  534. if (!info)
  535. return -ENOMEM;
  536. info->offset = offset;
  537. info->bytes = bytes;
  538. spin_lock(&block_group->tree_lock);
  539. /*
  540. * first we want to see if there is free space adjacent to the range we
  541. * are adding, if there is remove that struct and add a new one to
  542. * cover the entire range
  543. */
  544. right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
  545. if (right_info && rb_prev(&right_info->offset_index))
  546. left_info = rb_entry(rb_prev(&right_info->offset_index),
  547. struct btrfs_free_space, offset_index);
  548. else
  549. left_info = tree_search_offset(block_group, offset - 1, 0, 0);
  550. /*
  551. * If there was no extent directly to the left or right of this new
  552. * extent then we know we're going to have to allocate a new extent, so
  553. * before we do that see if we need to drop this into a bitmap
  554. */
  555. if ((!left_info || left_info->bitmap) &&
  556. (!right_info || right_info->bitmap)) {
  557. ret = insert_into_bitmap(block_group, info);
  558. if (ret < 0) {
  559. goto out;
  560. } else if (ret) {
  561. ret = 0;
  562. goto out;
  563. }
  564. }
  565. if (right_info && !right_info->bitmap) {
  566. unlink_free_space(block_group, right_info);
  567. info->bytes += right_info->bytes;
  568. kfree(right_info);
  569. }
  570. if (left_info && !left_info->bitmap &&
  571. left_info->offset + left_info->bytes == offset) {
  572. unlink_free_space(block_group, left_info);
  573. info->offset = left_info->offset;
  574. info->bytes += left_info->bytes;
  575. kfree(left_info);
  576. }
  577. ret = link_free_space(block_group, info);
  578. if (ret)
  579. kfree(info);
  580. out:
  581. spin_unlock(&block_group->tree_lock);
  582. if (ret) {
  583. printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
  584. BUG_ON(ret == -EEXIST);
  585. }
  586. return ret;
  587. }
  588. int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
  589. u64 offset, u64 bytes)
  590. {
  591. struct btrfs_free_space *info;
  592. struct btrfs_free_space *next_info = NULL;
  593. int ret = 0;
  594. spin_lock(&block_group->tree_lock);
  595. again:
  596. info = tree_search_offset(block_group, offset, 0, 0);
  597. if (!info) {
  598. /*
  599. * oops didn't find an extent that matched the space we wanted
  600. * to remove, look for a bitmap instead
  601. */
  602. info = tree_search_offset(block_group,
  603. offset_to_bitmap(block_group, offset),
  604. 1, 0);
  605. if (!info) {
  606. WARN_ON(1);
  607. goto out_lock;
  608. }
  609. }
  610. if (info->bytes < bytes && rb_next(&info->offset_index)) {
  611. u64 end;
  612. next_info = rb_entry(rb_next(&info->offset_index),
  613. struct btrfs_free_space,
  614. offset_index);
  615. if (next_info->bitmap)
  616. end = next_info->offset + BITS_PER_BITMAP *
  617. block_group->sectorsize - 1;
  618. else
  619. end = next_info->offset + next_info->bytes;
  620. if (next_info->bytes < bytes ||
  621. next_info->offset > offset || offset > end) {
  622. printk(KERN_CRIT "Found free space at %llu, size %llu,"
  623. " trying to use %llu\n",
  624. (unsigned long long)info->offset,
  625. (unsigned long long)info->bytes,
  626. (unsigned long long)bytes);
  627. WARN_ON(1);
  628. ret = -EINVAL;
  629. goto out_lock;
  630. }
  631. info = next_info;
  632. }
  633. if (info->bytes == bytes) {
  634. unlink_free_space(block_group, info);
  635. if (info->bitmap) {
  636. kfree(info->bitmap);
  637. block_group->total_bitmaps--;
  638. }
  639. kfree(info);
  640. goto out_lock;
  641. }
  642. if (!info->bitmap && info->offset == offset) {
  643. unlink_free_space(block_group, info);
  644. info->offset += bytes;
  645. info->bytes -= bytes;
  646. link_free_space(block_group, info);
  647. goto out_lock;
  648. }
  649. if (!info->bitmap && info->offset <= offset &&
  650. info->offset + info->bytes >= offset + bytes) {
  651. u64 old_start = info->offset;
  652. /*
  653. * we're freeing space in the middle of the info,
  654. * this can happen during tree log replay
  655. *
  656. * first unlink the old info and then
  657. * insert it again after the hole we're creating
  658. */
  659. unlink_free_space(block_group, info);
  660. if (offset + bytes < info->offset + info->bytes) {
  661. u64 old_end = info->offset + info->bytes;
  662. info->offset = offset + bytes;
  663. info->bytes = old_end - info->offset;
  664. ret = link_free_space(block_group, info);
  665. WARN_ON(ret);
  666. if (ret)
  667. goto out_lock;
  668. } else {
  669. /* the hole we're creating ends at the end
  670. * of the info struct, just free the info
  671. */
  672. kfree(info);
  673. }
  674. spin_unlock(&block_group->tree_lock);
  675. /* step two, insert a new info struct to cover
  676. * anything before the hole
  677. */
  678. ret = btrfs_add_free_space(block_group, old_start,
  679. offset - old_start);
  680. WARN_ON(ret);
  681. goto out;
  682. }
  683. ret = remove_from_bitmap(block_group, info, &offset, &bytes);
  684. if (ret == -EAGAIN)
  685. goto again;
  686. BUG_ON(ret);
  687. out_lock:
  688. spin_unlock(&block_group->tree_lock);
  689. out:
  690. return ret;
  691. }
  692. void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
  693. u64 bytes)
  694. {
  695. struct btrfs_free_space *info;
  696. struct rb_node *n;
  697. int count = 0;
  698. for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
  699. info = rb_entry(n, struct btrfs_free_space, offset_index);
  700. if (info->bytes >= bytes)
  701. count++;
  702. printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
  703. (unsigned long long)info->offset,
  704. (unsigned long long)info->bytes,
  705. (info->bitmap) ? "yes" : "no");
  706. }
  707. printk(KERN_INFO "block group has cluster?: %s\n",
  708. list_empty(&block_group->cluster_list) ? "no" : "yes");
  709. printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
  710. "\n", count);
  711. }
  712. u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
  713. {
  714. struct btrfs_free_space *info;
  715. struct rb_node *n;
  716. u64 ret = 0;
  717. for (n = rb_first(&block_group->free_space_offset); n;
  718. n = rb_next(n)) {
  719. info = rb_entry(n, struct btrfs_free_space, offset_index);
  720. ret += info->bytes;
  721. }
  722. return ret;
  723. }
  724. /*
  725. * for a given cluster, put all of its extents back into the free
  726. * space cache. If the block group passed doesn't match the block group
  727. * pointed to by the cluster, someone else raced in and freed the
  728. * cluster already. In that case, we just return without changing anything
  729. */
  730. static int
  731. __btrfs_return_cluster_to_free_space(
  732. struct btrfs_block_group_cache *block_group,
  733. struct btrfs_free_cluster *cluster)
  734. {
  735. struct btrfs_free_space *entry;
  736. struct rb_node *node;
  737. bool bitmap;
  738. spin_lock(&cluster->lock);
  739. if (cluster->block_group != block_group)
  740. goto out;
  741. bitmap = cluster->points_to_bitmap;
  742. cluster->block_group = NULL;
  743. cluster->window_start = 0;
  744. list_del_init(&cluster->block_group_list);
  745. cluster->points_to_bitmap = false;
  746. if (bitmap)
  747. goto out;
  748. node = rb_first(&cluster->root);
  749. while (node) {
  750. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  751. node = rb_next(&entry->offset_index);
  752. rb_erase(&entry->offset_index, &cluster->root);
  753. BUG_ON(entry->bitmap);
  754. tree_insert_offset(&block_group->free_space_offset,
  755. entry->offset, &entry->offset_index, 0);
  756. }
  757. cluster->root = RB_ROOT;
  758. out:
  759. spin_unlock(&cluster->lock);
  760. btrfs_put_block_group(block_group);
  761. return 0;
  762. }
  763. void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
  764. {
  765. struct btrfs_free_space *info;
  766. struct rb_node *node;
  767. struct btrfs_free_cluster *cluster;
  768. struct list_head *head;
  769. spin_lock(&block_group->tree_lock);
  770. while ((head = block_group->cluster_list.next) !=
  771. &block_group->cluster_list) {
  772. cluster = list_entry(head, struct btrfs_free_cluster,
  773. block_group_list);
  774. WARN_ON(cluster->block_group != block_group);
  775. __btrfs_return_cluster_to_free_space(block_group, cluster);
  776. if (need_resched()) {
  777. spin_unlock(&block_group->tree_lock);
  778. cond_resched();
  779. spin_lock(&block_group->tree_lock);
  780. }
  781. }
  782. while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
  783. info = rb_entry(node, struct btrfs_free_space, offset_index);
  784. unlink_free_space(block_group, info);
  785. if (info->bitmap)
  786. kfree(info->bitmap);
  787. kfree(info);
  788. if (need_resched()) {
  789. spin_unlock(&block_group->tree_lock);
  790. cond_resched();
  791. spin_lock(&block_group->tree_lock);
  792. }
  793. }
  794. spin_unlock(&block_group->tree_lock);
  795. }
  796. u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
  797. u64 offset, u64 bytes, u64 empty_size)
  798. {
  799. struct btrfs_free_space *entry = NULL;
  800. u64 bytes_search = bytes + empty_size;
  801. u64 ret = 0;
  802. spin_lock(&block_group->tree_lock);
  803. entry = find_free_space(block_group, &offset, &bytes_search, 0);
  804. if (!entry)
  805. goto out;
  806. ret = offset;
  807. if (entry->bitmap) {
  808. bitmap_clear_bits(block_group, entry, offset, bytes);
  809. if (!entry->bytes) {
  810. unlink_free_space(block_group, entry);
  811. kfree(entry->bitmap);
  812. kfree(entry);
  813. block_group->total_bitmaps--;
  814. recalculate_thresholds(block_group);
  815. }
  816. } else {
  817. unlink_free_space(block_group, entry);
  818. entry->offset += bytes;
  819. entry->bytes -= bytes;
  820. if (!entry->bytes)
  821. kfree(entry);
  822. else
  823. link_free_space(block_group, entry);
  824. }
  825. out:
  826. spin_unlock(&block_group->tree_lock);
  827. return ret;
  828. }
  829. /*
  830. * given a cluster, put all of its extents back into the free space
  831. * cache. If a block group is passed, this function will only free
  832. * a cluster that belongs to the passed block group.
  833. *
  834. * Otherwise, it'll get a reference on the block group pointed to by the
  835. * cluster and remove the cluster from it.
  836. */
  837. int btrfs_return_cluster_to_free_space(
  838. struct btrfs_block_group_cache *block_group,
  839. struct btrfs_free_cluster *cluster)
  840. {
  841. int ret;
  842. /* first, get a safe pointer to the block group */
  843. spin_lock(&cluster->lock);
  844. if (!block_group) {
  845. block_group = cluster->block_group;
  846. if (!block_group) {
  847. spin_unlock(&cluster->lock);
  848. return 0;
  849. }
  850. } else if (cluster->block_group != block_group) {
  851. /* someone else has already freed it don't redo their work */
  852. spin_unlock(&cluster->lock);
  853. return 0;
  854. }
  855. atomic_inc(&block_group->count);
  856. spin_unlock(&cluster->lock);
  857. /* now return any extents the cluster had on it */
  858. spin_lock(&block_group->tree_lock);
  859. ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
  860. spin_unlock(&block_group->tree_lock);
  861. /* finally drop our ref */
  862. btrfs_put_block_group(block_group);
  863. return ret;
  864. }
  865. static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
  866. struct btrfs_free_cluster *cluster,
  867. u64 bytes, u64 min_start)
  868. {
  869. struct btrfs_free_space *entry;
  870. int err;
  871. u64 search_start = cluster->window_start;
  872. u64 search_bytes = bytes;
  873. u64 ret = 0;
  874. spin_lock(&block_group->tree_lock);
  875. spin_lock(&cluster->lock);
  876. if (!cluster->points_to_bitmap)
  877. goto out;
  878. if (cluster->block_group != block_group)
  879. goto out;
  880. /*
  881. * search_start is the beginning of the bitmap, but at some point it may
  882. * be a good idea to point to the actual start of the free area in the
  883. * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
  884. * to 1 to make sure we get the bitmap entry
  885. */
  886. entry = tree_search_offset(block_group,
  887. offset_to_bitmap(block_group, search_start),
  888. 1, 0);
  889. if (!entry || !entry->bitmap)
  890. goto out;
  891. search_start = min_start;
  892. search_bytes = bytes;
  893. err = search_bitmap(block_group, entry, &search_start,
  894. &search_bytes);
  895. if (err)
  896. goto out;
  897. ret = search_start;
  898. bitmap_clear_bits(block_group, entry, ret, bytes);
  899. out:
  900. spin_unlock(&cluster->lock);
  901. spin_unlock(&block_group->tree_lock);
  902. return ret;
  903. }
  904. /*
  905. * given a cluster, try to allocate 'bytes' from it, returns 0
  906. * if it couldn't find anything suitably large, or a logical disk offset
  907. * if things worked out
  908. */
  909. u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
  910. struct btrfs_free_cluster *cluster, u64 bytes,
  911. u64 min_start)
  912. {
  913. struct btrfs_free_space *entry = NULL;
  914. struct rb_node *node;
  915. u64 ret = 0;
  916. if (cluster->points_to_bitmap)
  917. return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
  918. min_start);
  919. spin_lock(&cluster->lock);
  920. if (bytes > cluster->max_size)
  921. goto out;
  922. if (cluster->block_group != block_group)
  923. goto out;
  924. node = rb_first(&cluster->root);
  925. if (!node)
  926. goto out;
  927. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  928. while(1) {
  929. if (entry->bytes < bytes || entry->offset < min_start) {
  930. struct rb_node *node;
  931. node = rb_next(&entry->offset_index);
  932. if (!node)
  933. break;
  934. entry = rb_entry(node, struct btrfs_free_space,
  935. offset_index);
  936. continue;
  937. }
  938. ret = entry->offset;
  939. entry->offset += bytes;
  940. entry->bytes -= bytes;
  941. if (entry->bytes == 0) {
  942. rb_erase(&entry->offset_index, &cluster->root);
  943. kfree(entry);
  944. }
  945. break;
  946. }
  947. out:
  948. spin_unlock(&cluster->lock);
  949. return ret;
  950. }
  951. static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
  952. struct btrfs_free_space *entry,
  953. struct btrfs_free_cluster *cluster,
  954. u64 offset, u64 bytes, u64 min_bytes)
  955. {
  956. unsigned long next_zero;
  957. unsigned long i;
  958. unsigned long search_bits;
  959. unsigned long total_bits;
  960. unsigned long found_bits;
  961. unsigned long start = 0;
  962. unsigned long total_found = 0;
  963. bool found = false;
  964. i = offset_to_bit(entry->offset, block_group->sectorsize,
  965. max_t(u64, offset, entry->offset));
  966. search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
  967. total_bits = bytes_to_bits(bytes, block_group->sectorsize);
  968. again:
  969. found_bits = 0;
  970. for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
  971. i < BITS_PER_BITMAP;
  972. i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
  973. next_zero = find_next_zero_bit(entry->bitmap,
  974. BITS_PER_BITMAP, i);
  975. if (next_zero - i >= search_bits) {
  976. found_bits = next_zero - i;
  977. break;
  978. }
  979. i = next_zero;
  980. }
  981. if (!found_bits)
  982. return -1;
  983. if (!found) {
  984. start = i;
  985. found = true;
  986. }
  987. total_found += found_bits;
  988. if (cluster->max_size < found_bits * block_group->sectorsize)
  989. cluster->max_size = found_bits * block_group->sectorsize;
  990. if (total_found < total_bits) {
  991. i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
  992. if (i - start > total_bits * 2) {
  993. total_found = 0;
  994. cluster->max_size = 0;
  995. found = false;
  996. }
  997. goto again;
  998. }
  999. cluster->window_start = start * block_group->sectorsize +
  1000. entry->offset;
  1001. cluster->points_to_bitmap = true;
  1002. return 0;
  1003. }
  1004. /*
  1005. * here we try to find a cluster of blocks in a block group. The goal
  1006. * is to find at least bytes free and up to empty_size + bytes free.
  1007. * We might not find them all in one contiguous area.
  1008. *
  1009. * returns zero and sets up cluster if things worked out, otherwise
  1010. * it returns -enospc
  1011. */
  1012. int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
  1013. struct btrfs_root *root,
  1014. struct btrfs_block_group_cache *block_group,
  1015. struct btrfs_free_cluster *cluster,
  1016. u64 offset, u64 bytes, u64 empty_size)
  1017. {
  1018. struct btrfs_free_space *entry = NULL;
  1019. struct rb_node *node;
  1020. struct btrfs_free_space *next;
  1021. struct btrfs_free_space *last = NULL;
  1022. u64 min_bytes;
  1023. u64 window_start;
  1024. u64 window_free;
  1025. u64 max_extent = 0;
  1026. bool found_bitmap = false;
  1027. int ret;
  1028. /* for metadata, allow allocates with more holes */
  1029. if (btrfs_test_opt(root, SSD_SPREAD)) {
  1030. min_bytes = bytes + empty_size;
  1031. } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
  1032. /*
  1033. * we want to do larger allocations when we are
  1034. * flushing out the delayed refs, it helps prevent
  1035. * making more work as we go along.
  1036. */
  1037. if (trans->transaction->delayed_refs.flushing)
  1038. min_bytes = max(bytes, (bytes + empty_size) >> 1);
  1039. else
  1040. min_bytes = max(bytes, (bytes + empty_size) >> 4);
  1041. } else
  1042. min_bytes = max(bytes, (bytes + empty_size) >> 2);
  1043. spin_lock(&block_group->tree_lock);
  1044. spin_lock(&cluster->lock);
  1045. /* someone already found a cluster, hooray */
  1046. if (cluster->block_group) {
  1047. ret = 0;
  1048. goto out;
  1049. }
  1050. again:
  1051. entry = tree_search_offset(block_group, offset, found_bitmap, 1);
  1052. if (!entry) {
  1053. ret = -ENOSPC;
  1054. goto out;
  1055. }
  1056. /*
  1057. * If found_bitmap is true, we exhausted our search for extent entries,
  1058. * and we just want to search all of the bitmaps that we can find, and
  1059. * ignore any extent entries we find.
  1060. */
  1061. while (entry->bitmap || found_bitmap ||
  1062. (!entry->bitmap && entry->bytes < min_bytes)) {
  1063. struct rb_node *node = rb_next(&entry->offset_index);
  1064. if (entry->bitmap && entry->bytes > bytes + empty_size) {
  1065. ret = btrfs_bitmap_cluster(block_group, entry, cluster,
  1066. offset, bytes + empty_size,
  1067. min_bytes);
  1068. if (!ret)
  1069. goto got_it;
  1070. }
  1071. if (!node) {
  1072. ret = -ENOSPC;
  1073. goto out;
  1074. }
  1075. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1076. }
  1077. /*
  1078. * We already searched all the extent entries from the passed in offset
  1079. * to the end and didn't find enough space for the cluster, and we also
  1080. * didn't find any bitmaps that met our criteria, just go ahead and exit
  1081. */
  1082. if (found_bitmap) {
  1083. ret = -ENOSPC;
  1084. goto out;
  1085. }
  1086. cluster->points_to_bitmap = false;
  1087. window_start = entry->offset;
  1088. window_free = entry->bytes;
  1089. last = entry;
  1090. max_extent = entry->bytes;
  1091. while (1) {
  1092. /* out window is just right, lets fill it */
  1093. if (window_free >= bytes + empty_size)
  1094. break;
  1095. node = rb_next(&last->offset_index);
  1096. if (!node) {
  1097. if (found_bitmap)
  1098. goto again;
  1099. ret = -ENOSPC;
  1100. goto out;
  1101. }
  1102. next = rb_entry(node, struct btrfs_free_space, offset_index);
  1103. /*
  1104. * we found a bitmap, so if this search doesn't result in a
  1105. * cluster, we know to go and search again for the bitmaps and
  1106. * start looking for space there
  1107. */
  1108. if (next->bitmap) {
  1109. if (!found_bitmap)
  1110. offset = next->offset;
  1111. found_bitmap = true;
  1112. last = next;
  1113. continue;
  1114. }
  1115. /*
  1116. * we haven't filled the empty size and the window is
  1117. * very large. reset and try again
  1118. */
  1119. if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
  1120. next->offset - window_start > (bytes + empty_size) * 2) {
  1121. entry = next;
  1122. window_start = entry->offset;
  1123. window_free = entry->bytes;
  1124. last = entry;
  1125. max_extent = entry->bytes;
  1126. } else {
  1127. last = next;
  1128. window_free += next->bytes;
  1129. if (entry->bytes > max_extent)
  1130. max_extent = entry->bytes;
  1131. }
  1132. }
  1133. cluster->window_start = entry->offset;
  1134. /*
  1135. * now we've found our entries, pull them out of the free space
  1136. * cache and put them into the cluster rbtree
  1137. *
  1138. * The cluster includes an rbtree, but only uses the offset index
  1139. * of each free space cache entry.
  1140. */
  1141. while (1) {
  1142. node = rb_next(&entry->offset_index);
  1143. if (entry->bitmap && node) {
  1144. entry = rb_entry(node, struct btrfs_free_space,
  1145. offset_index);
  1146. continue;
  1147. } else if (entry->bitmap && !node) {
  1148. break;
  1149. }
  1150. rb_erase(&entry->offset_index, &block_group->free_space_offset);
  1151. ret = tree_insert_offset(&cluster->root, entry->offset,
  1152. &entry->offset_index, 0);
  1153. BUG_ON(ret);
  1154. if (!node || entry == last)
  1155. break;
  1156. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1157. }
  1158. cluster->max_size = max_extent;
  1159. got_it:
  1160. ret = 0;
  1161. atomic_inc(&block_group->count);
  1162. list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
  1163. cluster->block_group = block_group;
  1164. out:
  1165. spin_unlock(&cluster->lock);
  1166. spin_unlock(&block_group->tree_lock);
  1167. return ret;
  1168. }
  1169. /*
  1170. * simple code to zero out a cluster
  1171. */
  1172. void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
  1173. {
  1174. spin_lock_init(&cluster->lock);
  1175. spin_lock_init(&cluster->refill_lock);
  1176. cluster->root = RB_ROOT;
  1177. cluster->max_size = 0;
  1178. cluster->points_to_bitmap = false;
  1179. INIT_LIST_HEAD(&cluster->block_group_list);
  1180. cluster->block_group = NULL;
  1181. }