free-space-cache.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356
  1. /*
  2. * Copyright (C) 2008 Red Hat. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/pagemap.h>
  19. #include <linux/sched.h>
  20. #include <linux/math64.h>
  21. #include "ctree.h"
  22. #include "free-space-cache.h"
  23. #include "transaction.h"
  24. #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
  25. #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
  26. static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
  27. u64 offset)
  28. {
  29. BUG_ON(offset < bitmap_start);
  30. offset -= bitmap_start;
  31. return (unsigned long)(div64_u64(offset, sectorsize));
  32. }
  33. static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
  34. {
  35. return (unsigned long)(div64_u64(bytes, sectorsize));
  36. }
  37. static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
  38. u64 offset)
  39. {
  40. u64 bitmap_start;
  41. u64 bytes_per_bitmap;
  42. bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
  43. bitmap_start = offset - block_group->key.objectid;
  44. bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
  45. bitmap_start *= bytes_per_bitmap;
  46. bitmap_start += block_group->key.objectid;
  47. return bitmap_start;
  48. }
  49. static int tree_insert_offset(struct rb_root *root, u64 offset,
  50. struct rb_node *node, int bitmap)
  51. {
  52. struct rb_node **p = &root->rb_node;
  53. struct rb_node *parent = NULL;
  54. struct btrfs_free_space *info;
  55. while (*p) {
  56. parent = *p;
  57. info = rb_entry(parent, struct btrfs_free_space, offset_index);
  58. if (offset < info->offset) {
  59. p = &(*p)->rb_left;
  60. } else if (offset > info->offset) {
  61. p = &(*p)->rb_right;
  62. } else {
  63. /*
  64. * we could have a bitmap entry and an extent entry
  65. * share the same offset. If this is the case, we want
  66. * the extent entry to always be found first if we do a
  67. * linear search through the tree, since we want to have
  68. * the quickest allocation time, and allocating from an
  69. * extent is faster than allocating from a bitmap. So
  70. * if we're inserting a bitmap and we find an entry at
  71. * this offset, we want to go right, or after this entry
  72. * logically. If we are inserting an extent and we've
  73. * found a bitmap, we want to go left, or before
  74. * logically.
  75. */
  76. if (bitmap) {
  77. WARN_ON(info->bitmap);
  78. p = &(*p)->rb_right;
  79. } else {
  80. WARN_ON(!info->bitmap);
  81. p = &(*p)->rb_left;
  82. }
  83. }
  84. }
  85. rb_link_node(node, parent, p);
  86. rb_insert_color(node, root);
  87. return 0;
  88. }
  89. /*
  90. * searches the tree for the given offset.
  91. *
  92. * fuzzy - If this is set, then we are trying to make an allocation, and we just
  93. * want a section that has at least bytes size and comes at or after the given
  94. * offset.
  95. */
  96. static struct btrfs_free_space *
  97. tree_search_offset(struct btrfs_block_group_cache *block_group,
  98. u64 offset, int bitmap_only, int fuzzy)
  99. {
  100. struct rb_node *n = block_group->free_space_offset.rb_node;
  101. struct btrfs_free_space *entry, *prev = NULL;
  102. /* find entry that is closest to the 'offset' */
  103. while (1) {
  104. if (!n) {
  105. entry = NULL;
  106. break;
  107. }
  108. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  109. prev = entry;
  110. if (offset < entry->offset)
  111. n = n->rb_left;
  112. else if (offset > entry->offset)
  113. n = n->rb_right;
  114. else
  115. break;
  116. }
  117. if (bitmap_only) {
  118. if (!entry)
  119. return NULL;
  120. if (entry->bitmap)
  121. return entry;
  122. /*
  123. * bitmap entry and extent entry may share same offset,
  124. * in that case, bitmap entry comes after extent entry.
  125. */
  126. n = rb_next(n);
  127. if (!n)
  128. return NULL;
  129. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  130. if (entry->offset != offset)
  131. return NULL;
  132. WARN_ON(!entry->bitmap);
  133. return entry;
  134. } else if (entry) {
  135. if (entry->bitmap) {
  136. /*
  137. * if previous extent entry covers the offset,
  138. * we should return it instead of the bitmap entry
  139. */
  140. n = &entry->offset_index;
  141. while (1) {
  142. n = rb_prev(n);
  143. if (!n)
  144. break;
  145. prev = rb_entry(n, struct btrfs_free_space,
  146. offset_index);
  147. if (!prev->bitmap) {
  148. if (prev->offset + prev->bytes > offset)
  149. entry = prev;
  150. break;
  151. }
  152. }
  153. }
  154. return entry;
  155. }
  156. if (!prev)
  157. return NULL;
  158. /* find last entry before the 'offset' */
  159. entry = prev;
  160. if (entry->offset > offset) {
  161. n = rb_prev(&entry->offset_index);
  162. if (n) {
  163. entry = rb_entry(n, struct btrfs_free_space,
  164. offset_index);
  165. BUG_ON(entry->offset > offset);
  166. } else {
  167. if (fuzzy)
  168. return entry;
  169. else
  170. return NULL;
  171. }
  172. }
  173. if (entry->bitmap) {
  174. n = &entry->offset_index;
  175. while (1) {
  176. n = rb_prev(n);
  177. if (!n)
  178. break;
  179. prev = rb_entry(n, struct btrfs_free_space,
  180. offset_index);
  181. if (!prev->bitmap) {
  182. if (prev->offset + prev->bytes > offset)
  183. return prev;
  184. break;
  185. }
  186. }
  187. if (entry->offset + BITS_PER_BITMAP *
  188. block_group->sectorsize > offset)
  189. return entry;
  190. } else if (entry->offset + entry->bytes > offset)
  191. return entry;
  192. if (!fuzzy)
  193. return NULL;
  194. while (1) {
  195. if (entry->bitmap) {
  196. if (entry->offset + BITS_PER_BITMAP *
  197. block_group->sectorsize > offset)
  198. break;
  199. } else {
  200. if (entry->offset + entry->bytes > offset)
  201. break;
  202. }
  203. n = rb_next(&entry->offset_index);
  204. if (!n)
  205. return NULL;
  206. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  207. }
  208. return entry;
  209. }
  210. static void unlink_free_space(struct btrfs_block_group_cache *block_group,
  211. struct btrfs_free_space *info)
  212. {
  213. rb_erase(&info->offset_index, &block_group->free_space_offset);
  214. block_group->free_extents--;
  215. block_group->free_space -= info->bytes;
  216. }
  217. static int link_free_space(struct btrfs_block_group_cache *block_group,
  218. struct btrfs_free_space *info)
  219. {
  220. int ret = 0;
  221. BUG_ON(!info->bitmap && !info->bytes);
  222. ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
  223. &info->offset_index, (info->bitmap != NULL));
  224. if (ret)
  225. return ret;
  226. block_group->free_space += info->bytes;
  227. block_group->free_extents++;
  228. return ret;
  229. }
  230. static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
  231. {
  232. u64 max_bytes, possible_bytes;
  233. /*
  234. * The goal is to keep the total amount of memory used per 1gb of space
  235. * at or below 32k, so we need to adjust how much memory we allow to be
  236. * used by extent based free space tracking
  237. */
  238. max_bytes = MAX_CACHE_BYTES_PER_GIG *
  239. (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
  240. possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) +
  241. (sizeof(struct btrfs_free_space) *
  242. block_group->extents_thresh);
  243. if (possible_bytes > max_bytes) {
  244. int extent_bytes = max_bytes -
  245. (block_group->total_bitmaps * PAGE_CACHE_SIZE);
  246. if (extent_bytes <= 0) {
  247. block_group->extents_thresh = 0;
  248. return;
  249. }
  250. block_group->extents_thresh = extent_bytes /
  251. (sizeof(struct btrfs_free_space));
  252. }
  253. }
  254. static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
  255. struct btrfs_free_space *info, u64 offset,
  256. u64 bytes)
  257. {
  258. unsigned long start, end;
  259. unsigned long i;
  260. start = offset_to_bit(info->offset, block_group->sectorsize, offset);
  261. end = start + bytes_to_bits(bytes, block_group->sectorsize);
  262. BUG_ON(end > BITS_PER_BITMAP);
  263. for (i = start; i < end; i++)
  264. clear_bit(i, info->bitmap);
  265. info->bytes -= bytes;
  266. block_group->free_space -= bytes;
  267. }
  268. static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
  269. struct btrfs_free_space *info, u64 offset,
  270. u64 bytes)
  271. {
  272. unsigned long start, end;
  273. unsigned long i;
  274. start = offset_to_bit(info->offset, block_group->sectorsize, offset);
  275. end = start + bytes_to_bits(bytes, block_group->sectorsize);
  276. BUG_ON(end > BITS_PER_BITMAP);
  277. for (i = start; i < end; i++)
  278. set_bit(i, info->bitmap);
  279. info->bytes += bytes;
  280. block_group->free_space += bytes;
  281. }
  282. static int search_bitmap(struct btrfs_block_group_cache *block_group,
  283. struct btrfs_free_space *bitmap_info, u64 *offset,
  284. u64 *bytes)
  285. {
  286. unsigned long found_bits = 0;
  287. unsigned long bits, i;
  288. unsigned long next_zero;
  289. i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
  290. max_t(u64, *offset, bitmap_info->offset));
  291. bits = bytes_to_bits(*bytes, block_group->sectorsize);
  292. for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
  293. i < BITS_PER_BITMAP;
  294. i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
  295. next_zero = find_next_zero_bit(bitmap_info->bitmap,
  296. BITS_PER_BITMAP, i);
  297. if ((next_zero - i) >= bits) {
  298. found_bits = next_zero - i;
  299. break;
  300. }
  301. i = next_zero;
  302. }
  303. if (found_bits) {
  304. *offset = (u64)(i * block_group->sectorsize) +
  305. bitmap_info->offset;
  306. *bytes = (u64)(found_bits) * block_group->sectorsize;
  307. return 0;
  308. }
  309. return -1;
  310. }
  311. static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
  312. *block_group, u64 *offset,
  313. u64 *bytes, int debug)
  314. {
  315. struct btrfs_free_space *entry;
  316. struct rb_node *node;
  317. int ret;
  318. if (!block_group->free_space_offset.rb_node)
  319. return NULL;
  320. entry = tree_search_offset(block_group,
  321. offset_to_bitmap(block_group, *offset),
  322. 0, 1);
  323. if (!entry)
  324. return NULL;
  325. for (node = &entry->offset_index; node; node = rb_next(node)) {
  326. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  327. if (entry->bytes < *bytes)
  328. continue;
  329. if (entry->bitmap) {
  330. ret = search_bitmap(block_group, entry, offset, bytes);
  331. if (!ret)
  332. return entry;
  333. continue;
  334. }
  335. *offset = entry->offset;
  336. *bytes = entry->bytes;
  337. return entry;
  338. }
  339. return NULL;
  340. }
  341. static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
  342. struct btrfs_free_space *info, u64 offset)
  343. {
  344. u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
  345. int max_bitmaps = (int)div64_u64(block_group->key.offset +
  346. bytes_per_bg - 1, bytes_per_bg);
  347. BUG_ON(block_group->total_bitmaps >= max_bitmaps);
  348. info->offset = offset_to_bitmap(block_group, offset);
  349. link_free_space(block_group, info);
  350. block_group->total_bitmaps++;
  351. recalculate_thresholds(block_group);
  352. }
  353. static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
  354. struct btrfs_free_space *bitmap_info,
  355. u64 *offset, u64 *bytes)
  356. {
  357. u64 end;
  358. u64 search_start, search_bytes;
  359. int ret;
  360. again:
  361. end = bitmap_info->offset +
  362. (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
  363. /*
  364. * XXX - this can go away after a few releases.
  365. *
  366. * since the only user of btrfs_remove_free_space is the tree logging
  367. * stuff, and the only way to test that is under crash conditions, we
  368. * want to have this debug stuff here just in case somethings not
  369. * working. Search the bitmap for the space we are trying to use to
  370. * make sure its actually there. If its not there then we need to stop
  371. * because something has gone wrong.
  372. */
  373. search_start = *offset;
  374. search_bytes = *bytes;
  375. ret = search_bitmap(block_group, bitmap_info, &search_start,
  376. &search_bytes);
  377. BUG_ON(ret < 0 || search_start != *offset);
  378. if (*offset > bitmap_info->offset && *offset + *bytes > end) {
  379. bitmap_clear_bits(block_group, bitmap_info, *offset,
  380. end - *offset + 1);
  381. *bytes -= end - *offset + 1;
  382. *offset = end + 1;
  383. } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
  384. bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
  385. *bytes = 0;
  386. }
  387. if (*bytes) {
  388. struct rb_node *next = rb_next(&bitmap_info->offset_index);
  389. if (!bitmap_info->bytes) {
  390. unlink_free_space(block_group, bitmap_info);
  391. kfree(bitmap_info->bitmap);
  392. kfree(bitmap_info);
  393. block_group->total_bitmaps--;
  394. recalculate_thresholds(block_group);
  395. }
  396. /*
  397. * no entry after this bitmap, but we still have bytes to
  398. * remove, so something has gone wrong.
  399. */
  400. if (!next)
  401. return -EINVAL;
  402. bitmap_info = rb_entry(next, struct btrfs_free_space,
  403. offset_index);
  404. /*
  405. * if the next entry isn't a bitmap we need to return to let the
  406. * extent stuff do its work.
  407. */
  408. if (!bitmap_info->bitmap)
  409. return -EAGAIN;
  410. /*
  411. * Ok the next item is a bitmap, but it may not actually hold
  412. * the information for the rest of this free space stuff, so
  413. * look for it, and if we don't find it return so we can try
  414. * everything over again.
  415. */
  416. search_start = *offset;
  417. search_bytes = *bytes;
  418. ret = search_bitmap(block_group, bitmap_info, &search_start,
  419. &search_bytes);
  420. if (ret < 0 || search_start != *offset)
  421. return -EAGAIN;
  422. goto again;
  423. } else if (!bitmap_info->bytes) {
  424. unlink_free_space(block_group, bitmap_info);
  425. kfree(bitmap_info->bitmap);
  426. kfree(bitmap_info);
  427. block_group->total_bitmaps--;
  428. recalculate_thresholds(block_group);
  429. }
  430. return 0;
  431. }
  432. static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
  433. struct btrfs_free_space *info)
  434. {
  435. struct btrfs_free_space *bitmap_info;
  436. int added = 0;
  437. u64 bytes, offset, end;
  438. int ret;
  439. /*
  440. * If we are below the extents threshold then we can add this as an
  441. * extent, and don't have to deal with the bitmap
  442. */
  443. if (block_group->free_extents < block_group->extents_thresh &&
  444. info->bytes > block_group->sectorsize * 4)
  445. return 0;
  446. /*
  447. * some block groups are so tiny they can't be enveloped by a bitmap, so
  448. * don't even bother to create a bitmap for this
  449. */
  450. if (BITS_PER_BITMAP * block_group->sectorsize >
  451. block_group->key.offset)
  452. return 0;
  453. bytes = info->bytes;
  454. offset = info->offset;
  455. again:
  456. bitmap_info = tree_search_offset(block_group,
  457. offset_to_bitmap(block_group, offset),
  458. 1, 0);
  459. if (!bitmap_info) {
  460. BUG_ON(added);
  461. goto new_bitmap;
  462. }
  463. end = bitmap_info->offset +
  464. (u64)(BITS_PER_BITMAP * block_group->sectorsize);
  465. if (offset >= bitmap_info->offset && offset + bytes > end) {
  466. bitmap_set_bits(block_group, bitmap_info, offset,
  467. end - offset);
  468. bytes -= end - offset;
  469. offset = end;
  470. added = 0;
  471. } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
  472. bitmap_set_bits(block_group, bitmap_info, offset, bytes);
  473. bytes = 0;
  474. } else {
  475. BUG();
  476. }
  477. if (!bytes) {
  478. ret = 1;
  479. goto out;
  480. } else
  481. goto again;
  482. new_bitmap:
  483. if (info && info->bitmap) {
  484. add_new_bitmap(block_group, info, offset);
  485. added = 1;
  486. info = NULL;
  487. goto again;
  488. } else {
  489. spin_unlock(&block_group->tree_lock);
  490. /* no pre-allocated info, allocate a new one */
  491. if (!info) {
  492. info = kzalloc(sizeof(struct btrfs_free_space),
  493. GFP_NOFS);
  494. if (!info) {
  495. spin_lock(&block_group->tree_lock);
  496. ret = -ENOMEM;
  497. goto out;
  498. }
  499. }
  500. /* allocate the bitmap */
  501. info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
  502. spin_lock(&block_group->tree_lock);
  503. if (!info->bitmap) {
  504. ret = -ENOMEM;
  505. goto out;
  506. }
  507. goto again;
  508. }
  509. out:
  510. if (info) {
  511. if (info->bitmap)
  512. kfree(info->bitmap);
  513. kfree(info);
  514. }
  515. return ret;
  516. }
  517. int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
  518. u64 offset, u64 bytes)
  519. {
  520. struct btrfs_free_space *right_info = NULL;
  521. struct btrfs_free_space *left_info = NULL;
  522. struct btrfs_free_space *info = NULL;
  523. int ret = 0;
  524. info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
  525. if (!info)
  526. return -ENOMEM;
  527. info->offset = offset;
  528. info->bytes = bytes;
  529. spin_lock(&block_group->tree_lock);
  530. /*
  531. * first we want to see if there is free space adjacent to the range we
  532. * are adding, if there is remove that struct and add a new one to
  533. * cover the entire range
  534. */
  535. right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
  536. if (right_info && rb_prev(&right_info->offset_index))
  537. left_info = rb_entry(rb_prev(&right_info->offset_index),
  538. struct btrfs_free_space, offset_index);
  539. else
  540. left_info = tree_search_offset(block_group, offset - 1, 0, 0);
  541. /*
  542. * If there was no extent directly to the left or right of this new
  543. * extent then we know we're going to have to allocate a new extent, so
  544. * before we do that see if we need to drop this into a bitmap
  545. */
  546. if ((!left_info || left_info->bitmap) &&
  547. (!right_info || right_info->bitmap)) {
  548. ret = insert_into_bitmap(block_group, info);
  549. if (ret < 0) {
  550. goto out;
  551. } else if (ret) {
  552. ret = 0;
  553. goto out;
  554. }
  555. }
  556. if (right_info && !right_info->bitmap) {
  557. unlink_free_space(block_group, right_info);
  558. info->bytes += right_info->bytes;
  559. kfree(right_info);
  560. }
  561. if (left_info && !left_info->bitmap &&
  562. left_info->offset + left_info->bytes == offset) {
  563. unlink_free_space(block_group, left_info);
  564. info->offset = left_info->offset;
  565. info->bytes += left_info->bytes;
  566. kfree(left_info);
  567. }
  568. ret = link_free_space(block_group, info);
  569. if (ret)
  570. kfree(info);
  571. out:
  572. spin_unlock(&block_group->tree_lock);
  573. if (ret) {
  574. printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
  575. BUG_ON(ret == -EEXIST);
  576. }
  577. return ret;
  578. }
  579. int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
  580. u64 offset, u64 bytes)
  581. {
  582. struct btrfs_free_space *info;
  583. struct btrfs_free_space *next_info = NULL;
  584. int ret = 0;
  585. spin_lock(&block_group->tree_lock);
  586. again:
  587. info = tree_search_offset(block_group, offset, 0, 0);
  588. if (!info) {
  589. /*
  590. * oops didn't find an extent that matched the space we wanted
  591. * to remove, look for a bitmap instead
  592. */
  593. info = tree_search_offset(block_group,
  594. offset_to_bitmap(block_group, offset),
  595. 1, 0);
  596. if (!info) {
  597. WARN_ON(1);
  598. goto out_lock;
  599. }
  600. }
  601. if (info->bytes < bytes && rb_next(&info->offset_index)) {
  602. u64 end;
  603. next_info = rb_entry(rb_next(&info->offset_index),
  604. struct btrfs_free_space,
  605. offset_index);
  606. if (next_info->bitmap)
  607. end = next_info->offset + BITS_PER_BITMAP *
  608. block_group->sectorsize - 1;
  609. else
  610. end = next_info->offset + next_info->bytes;
  611. if (next_info->bytes < bytes ||
  612. next_info->offset > offset || offset > end) {
  613. printk(KERN_CRIT "Found free space at %llu, size %llu,"
  614. " trying to use %llu\n",
  615. (unsigned long long)info->offset,
  616. (unsigned long long)info->bytes,
  617. (unsigned long long)bytes);
  618. WARN_ON(1);
  619. ret = -EINVAL;
  620. goto out_lock;
  621. }
  622. info = next_info;
  623. }
  624. if (info->bytes == bytes) {
  625. unlink_free_space(block_group, info);
  626. if (info->bitmap) {
  627. kfree(info->bitmap);
  628. block_group->total_bitmaps--;
  629. }
  630. kfree(info);
  631. goto out_lock;
  632. }
  633. if (!info->bitmap && info->offset == offset) {
  634. unlink_free_space(block_group, info);
  635. info->offset += bytes;
  636. info->bytes -= bytes;
  637. link_free_space(block_group, info);
  638. goto out_lock;
  639. }
  640. if (!info->bitmap && info->offset <= offset &&
  641. info->offset + info->bytes >= offset + bytes) {
  642. u64 old_start = info->offset;
  643. /*
  644. * we're freeing space in the middle of the info,
  645. * this can happen during tree log replay
  646. *
  647. * first unlink the old info and then
  648. * insert it again after the hole we're creating
  649. */
  650. unlink_free_space(block_group, info);
  651. if (offset + bytes < info->offset + info->bytes) {
  652. u64 old_end = info->offset + info->bytes;
  653. info->offset = offset + bytes;
  654. info->bytes = old_end - info->offset;
  655. ret = link_free_space(block_group, info);
  656. WARN_ON(ret);
  657. if (ret)
  658. goto out_lock;
  659. } else {
  660. /* the hole we're creating ends at the end
  661. * of the info struct, just free the info
  662. */
  663. kfree(info);
  664. }
  665. spin_unlock(&block_group->tree_lock);
  666. /* step two, insert a new info struct to cover
  667. * anything before the hole
  668. */
  669. ret = btrfs_add_free_space(block_group, old_start,
  670. offset - old_start);
  671. WARN_ON(ret);
  672. goto out;
  673. }
  674. ret = remove_from_bitmap(block_group, info, &offset, &bytes);
  675. if (ret == -EAGAIN)
  676. goto again;
  677. BUG_ON(ret);
  678. out_lock:
  679. spin_unlock(&block_group->tree_lock);
  680. out:
  681. return ret;
  682. }
  683. void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
  684. u64 bytes)
  685. {
  686. struct btrfs_free_space *info;
  687. struct rb_node *n;
  688. int count = 0;
  689. for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
  690. info = rb_entry(n, struct btrfs_free_space, offset_index);
  691. if (info->bytes >= bytes)
  692. count++;
  693. printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
  694. (unsigned long long)info->offset,
  695. (unsigned long long)info->bytes,
  696. (info->bitmap) ? "yes" : "no");
  697. }
  698. printk(KERN_INFO "block group has cluster?: %s\n",
  699. list_empty(&block_group->cluster_list) ? "no" : "yes");
  700. printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
  701. "\n", count);
  702. }
  703. u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
  704. {
  705. struct btrfs_free_space *info;
  706. struct rb_node *n;
  707. u64 ret = 0;
  708. for (n = rb_first(&block_group->free_space_offset); n;
  709. n = rb_next(n)) {
  710. info = rb_entry(n, struct btrfs_free_space, offset_index);
  711. ret += info->bytes;
  712. }
  713. return ret;
  714. }
  715. /*
  716. * for a given cluster, put all of its extents back into the free
  717. * space cache. If the block group passed doesn't match the block group
  718. * pointed to by the cluster, someone else raced in and freed the
  719. * cluster already. In that case, we just return without changing anything
  720. */
  721. static int
  722. __btrfs_return_cluster_to_free_space(
  723. struct btrfs_block_group_cache *block_group,
  724. struct btrfs_free_cluster *cluster)
  725. {
  726. struct btrfs_free_space *entry;
  727. struct rb_node *node;
  728. bool bitmap;
  729. spin_lock(&cluster->lock);
  730. if (cluster->block_group != block_group)
  731. goto out;
  732. bitmap = cluster->points_to_bitmap;
  733. cluster->block_group = NULL;
  734. cluster->window_start = 0;
  735. list_del_init(&cluster->block_group_list);
  736. cluster->points_to_bitmap = false;
  737. if (bitmap)
  738. goto out;
  739. node = rb_first(&cluster->root);
  740. while (node) {
  741. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  742. node = rb_next(&entry->offset_index);
  743. rb_erase(&entry->offset_index, &cluster->root);
  744. BUG_ON(entry->bitmap);
  745. tree_insert_offset(&block_group->free_space_offset,
  746. entry->offset, &entry->offset_index, 0);
  747. }
  748. cluster->root.rb_node = NULL;
  749. out:
  750. spin_unlock(&cluster->lock);
  751. btrfs_put_block_group(block_group);
  752. return 0;
  753. }
  754. void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
  755. {
  756. struct btrfs_free_space *info;
  757. struct rb_node *node;
  758. struct btrfs_free_cluster *cluster;
  759. struct list_head *head;
  760. spin_lock(&block_group->tree_lock);
  761. while ((head = block_group->cluster_list.next) !=
  762. &block_group->cluster_list) {
  763. cluster = list_entry(head, struct btrfs_free_cluster,
  764. block_group_list);
  765. WARN_ON(cluster->block_group != block_group);
  766. __btrfs_return_cluster_to_free_space(block_group, cluster);
  767. if (need_resched()) {
  768. spin_unlock(&block_group->tree_lock);
  769. cond_resched();
  770. spin_lock(&block_group->tree_lock);
  771. }
  772. }
  773. while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
  774. info = rb_entry(node, struct btrfs_free_space, offset_index);
  775. unlink_free_space(block_group, info);
  776. if (info->bitmap)
  777. kfree(info->bitmap);
  778. kfree(info);
  779. if (need_resched()) {
  780. spin_unlock(&block_group->tree_lock);
  781. cond_resched();
  782. spin_lock(&block_group->tree_lock);
  783. }
  784. }
  785. spin_unlock(&block_group->tree_lock);
  786. }
  787. u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
  788. u64 offset, u64 bytes, u64 empty_size)
  789. {
  790. struct btrfs_free_space *entry = NULL;
  791. u64 bytes_search = bytes + empty_size;
  792. u64 ret = 0;
  793. spin_lock(&block_group->tree_lock);
  794. entry = find_free_space(block_group, &offset, &bytes_search, 0);
  795. if (!entry)
  796. goto out;
  797. ret = offset;
  798. if (entry->bitmap) {
  799. bitmap_clear_bits(block_group, entry, offset, bytes);
  800. if (!entry->bytes) {
  801. unlink_free_space(block_group, entry);
  802. kfree(entry->bitmap);
  803. kfree(entry);
  804. block_group->total_bitmaps--;
  805. recalculate_thresholds(block_group);
  806. }
  807. } else {
  808. unlink_free_space(block_group, entry);
  809. entry->offset += bytes;
  810. entry->bytes -= bytes;
  811. if (!entry->bytes)
  812. kfree(entry);
  813. else
  814. link_free_space(block_group, entry);
  815. }
  816. out:
  817. spin_unlock(&block_group->tree_lock);
  818. return ret;
  819. }
  820. /*
  821. * given a cluster, put all of its extents back into the free space
  822. * cache. If a block group is passed, this function will only free
  823. * a cluster that belongs to the passed block group.
  824. *
  825. * Otherwise, it'll get a reference on the block group pointed to by the
  826. * cluster and remove the cluster from it.
  827. */
  828. int btrfs_return_cluster_to_free_space(
  829. struct btrfs_block_group_cache *block_group,
  830. struct btrfs_free_cluster *cluster)
  831. {
  832. int ret;
  833. /* first, get a safe pointer to the block group */
  834. spin_lock(&cluster->lock);
  835. if (!block_group) {
  836. block_group = cluster->block_group;
  837. if (!block_group) {
  838. spin_unlock(&cluster->lock);
  839. return 0;
  840. }
  841. } else if (cluster->block_group != block_group) {
  842. /* someone else has already freed it don't redo their work */
  843. spin_unlock(&cluster->lock);
  844. return 0;
  845. }
  846. atomic_inc(&block_group->count);
  847. spin_unlock(&cluster->lock);
  848. /* now return any extents the cluster had on it */
  849. spin_lock(&block_group->tree_lock);
  850. ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
  851. spin_unlock(&block_group->tree_lock);
  852. /* finally drop our ref */
  853. btrfs_put_block_group(block_group);
  854. return ret;
  855. }
  856. static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
  857. struct btrfs_free_cluster *cluster,
  858. u64 bytes, u64 min_start)
  859. {
  860. struct btrfs_free_space *entry;
  861. int err;
  862. u64 search_start = cluster->window_start;
  863. u64 search_bytes = bytes;
  864. u64 ret = 0;
  865. spin_lock(&block_group->tree_lock);
  866. spin_lock(&cluster->lock);
  867. if (!cluster->points_to_bitmap)
  868. goto out;
  869. if (cluster->block_group != block_group)
  870. goto out;
  871. /*
  872. * search_start is the beginning of the bitmap, but at some point it may
  873. * be a good idea to point to the actual start of the free area in the
  874. * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
  875. * to 1 to make sure we get the bitmap entry
  876. */
  877. entry = tree_search_offset(block_group,
  878. offset_to_bitmap(block_group, search_start),
  879. 1, 0);
  880. if (!entry || !entry->bitmap)
  881. goto out;
  882. search_start = min_start;
  883. search_bytes = bytes;
  884. err = search_bitmap(block_group, entry, &search_start,
  885. &search_bytes);
  886. if (err)
  887. goto out;
  888. ret = search_start;
  889. bitmap_clear_bits(block_group, entry, ret, bytes);
  890. out:
  891. spin_unlock(&cluster->lock);
  892. spin_unlock(&block_group->tree_lock);
  893. return ret;
  894. }
  895. /*
  896. * given a cluster, try to allocate 'bytes' from it, returns 0
  897. * if it couldn't find anything suitably large, or a logical disk offset
  898. * if things worked out
  899. */
  900. u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
  901. struct btrfs_free_cluster *cluster, u64 bytes,
  902. u64 min_start)
  903. {
  904. struct btrfs_free_space *entry = NULL;
  905. struct rb_node *node;
  906. u64 ret = 0;
  907. if (cluster->points_to_bitmap)
  908. return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
  909. min_start);
  910. spin_lock(&cluster->lock);
  911. if (bytes > cluster->max_size)
  912. goto out;
  913. if (cluster->block_group != block_group)
  914. goto out;
  915. node = rb_first(&cluster->root);
  916. if (!node)
  917. goto out;
  918. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  919. while(1) {
  920. if (entry->bytes < bytes || entry->offset < min_start) {
  921. struct rb_node *node;
  922. node = rb_next(&entry->offset_index);
  923. if (!node)
  924. break;
  925. entry = rb_entry(node, struct btrfs_free_space,
  926. offset_index);
  927. continue;
  928. }
  929. ret = entry->offset;
  930. entry->offset += bytes;
  931. entry->bytes -= bytes;
  932. if (entry->bytes == 0) {
  933. rb_erase(&entry->offset_index, &cluster->root);
  934. kfree(entry);
  935. }
  936. break;
  937. }
  938. out:
  939. spin_unlock(&cluster->lock);
  940. return ret;
  941. }
  942. static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
  943. struct btrfs_free_space *entry,
  944. struct btrfs_free_cluster *cluster,
  945. u64 offset, u64 bytes, u64 min_bytes)
  946. {
  947. unsigned long next_zero;
  948. unsigned long i;
  949. unsigned long search_bits;
  950. unsigned long total_bits;
  951. unsigned long found_bits;
  952. unsigned long start = 0;
  953. unsigned long total_found = 0;
  954. bool found = false;
  955. i = offset_to_bit(entry->offset, block_group->sectorsize,
  956. max_t(u64, offset, entry->offset));
  957. search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
  958. total_bits = bytes_to_bits(bytes, block_group->sectorsize);
  959. again:
  960. found_bits = 0;
  961. for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
  962. i < BITS_PER_BITMAP;
  963. i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
  964. next_zero = find_next_zero_bit(entry->bitmap,
  965. BITS_PER_BITMAP, i);
  966. if (next_zero - i >= search_bits) {
  967. found_bits = next_zero - i;
  968. break;
  969. }
  970. i = next_zero;
  971. }
  972. if (!found_bits)
  973. return -1;
  974. if (!found) {
  975. start = i;
  976. found = true;
  977. }
  978. total_found += found_bits;
  979. if (cluster->max_size < found_bits * block_group->sectorsize)
  980. cluster->max_size = found_bits * block_group->sectorsize;
  981. if (total_found < total_bits) {
  982. i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
  983. if (i - start > total_bits * 2) {
  984. total_found = 0;
  985. cluster->max_size = 0;
  986. found = false;
  987. }
  988. goto again;
  989. }
  990. cluster->window_start = start * block_group->sectorsize +
  991. entry->offset;
  992. cluster->points_to_bitmap = true;
  993. return 0;
  994. }
  995. /*
  996. * here we try to find a cluster of blocks in a block group. The goal
  997. * is to find at least bytes free and up to empty_size + bytes free.
  998. * We might not find them all in one contiguous area.
  999. *
  1000. * returns zero and sets up cluster if things worked out, otherwise
  1001. * it returns -enospc
  1002. */
  1003. int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
  1004. struct btrfs_root *root,
  1005. struct btrfs_block_group_cache *block_group,
  1006. struct btrfs_free_cluster *cluster,
  1007. u64 offset, u64 bytes, u64 empty_size)
  1008. {
  1009. struct btrfs_free_space *entry = NULL;
  1010. struct rb_node *node;
  1011. struct btrfs_free_space *next;
  1012. struct btrfs_free_space *last = NULL;
  1013. u64 min_bytes;
  1014. u64 window_start;
  1015. u64 window_free;
  1016. u64 max_extent = 0;
  1017. bool found_bitmap = false;
  1018. int ret;
  1019. /* for metadata, allow allocates with more holes */
  1020. if (btrfs_test_opt(root, SSD_SPREAD)) {
  1021. min_bytes = bytes + empty_size;
  1022. } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
  1023. /*
  1024. * we want to do larger allocations when we are
  1025. * flushing out the delayed refs, it helps prevent
  1026. * making more work as we go along.
  1027. */
  1028. if (trans->transaction->delayed_refs.flushing)
  1029. min_bytes = max(bytes, (bytes + empty_size) >> 1);
  1030. else
  1031. min_bytes = max(bytes, (bytes + empty_size) >> 4);
  1032. } else
  1033. min_bytes = max(bytes, (bytes + empty_size) >> 2);
  1034. spin_lock(&block_group->tree_lock);
  1035. spin_lock(&cluster->lock);
  1036. /* someone already found a cluster, hooray */
  1037. if (cluster->block_group) {
  1038. ret = 0;
  1039. goto out;
  1040. }
  1041. again:
  1042. entry = tree_search_offset(block_group, offset, found_bitmap, 1);
  1043. if (!entry) {
  1044. ret = -ENOSPC;
  1045. goto out;
  1046. }
  1047. /*
  1048. * If found_bitmap is true, we exhausted our search for extent entries,
  1049. * and we just want to search all of the bitmaps that we can find, and
  1050. * ignore any extent entries we find.
  1051. */
  1052. while (entry->bitmap || found_bitmap ||
  1053. (!entry->bitmap && entry->bytes < min_bytes)) {
  1054. struct rb_node *node = rb_next(&entry->offset_index);
  1055. if (entry->bitmap && entry->bytes > bytes + empty_size) {
  1056. ret = btrfs_bitmap_cluster(block_group, entry, cluster,
  1057. offset, bytes + empty_size,
  1058. min_bytes);
  1059. if (!ret)
  1060. goto got_it;
  1061. }
  1062. if (!node) {
  1063. ret = -ENOSPC;
  1064. goto out;
  1065. }
  1066. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1067. }
  1068. /*
  1069. * We already searched all the extent entries from the passed in offset
  1070. * to the end and didn't find enough space for the cluster, and we also
  1071. * didn't find any bitmaps that met our criteria, just go ahead and exit
  1072. */
  1073. if (found_bitmap) {
  1074. ret = -ENOSPC;
  1075. goto out;
  1076. }
  1077. cluster->points_to_bitmap = false;
  1078. window_start = entry->offset;
  1079. window_free = entry->bytes;
  1080. last = entry;
  1081. max_extent = entry->bytes;
  1082. while (1) {
  1083. /* out window is just right, lets fill it */
  1084. if (window_free >= bytes + empty_size)
  1085. break;
  1086. node = rb_next(&last->offset_index);
  1087. if (!node) {
  1088. if (found_bitmap)
  1089. goto again;
  1090. ret = -ENOSPC;
  1091. goto out;
  1092. }
  1093. next = rb_entry(node, struct btrfs_free_space, offset_index);
  1094. /*
  1095. * we found a bitmap, so if this search doesn't result in a
  1096. * cluster, we know to go and search again for the bitmaps and
  1097. * start looking for space there
  1098. */
  1099. if (next->bitmap) {
  1100. if (!found_bitmap)
  1101. offset = next->offset;
  1102. found_bitmap = true;
  1103. last = next;
  1104. continue;
  1105. }
  1106. /*
  1107. * we haven't filled the empty size and the window is
  1108. * very large. reset and try again
  1109. */
  1110. if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
  1111. next->offset - window_start > (bytes + empty_size) * 2) {
  1112. entry = next;
  1113. window_start = entry->offset;
  1114. window_free = entry->bytes;
  1115. last = entry;
  1116. max_extent = 0;
  1117. } else {
  1118. last = next;
  1119. window_free += next->bytes;
  1120. if (entry->bytes > max_extent)
  1121. max_extent = entry->bytes;
  1122. }
  1123. }
  1124. cluster->window_start = entry->offset;
  1125. /*
  1126. * now we've found our entries, pull them out of the free space
  1127. * cache and put them into the cluster rbtree
  1128. *
  1129. * The cluster includes an rbtree, but only uses the offset index
  1130. * of each free space cache entry.
  1131. */
  1132. while (1) {
  1133. node = rb_next(&entry->offset_index);
  1134. if (entry->bitmap && node) {
  1135. entry = rb_entry(node, struct btrfs_free_space,
  1136. offset_index);
  1137. continue;
  1138. } else if (entry->bitmap && !node) {
  1139. break;
  1140. }
  1141. rb_erase(&entry->offset_index, &block_group->free_space_offset);
  1142. ret = tree_insert_offset(&cluster->root, entry->offset,
  1143. &entry->offset_index, 0);
  1144. BUG_ON(ret);
  1145. if (!node || entry == last)
  1146. break;
  1147. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1148. }
  1149. cluster->max_size = max_extent;
  1150. got_it:
  1151. ret = 0;
  1152. atomic_inc(&block_group->count);
  1153. list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
  1154. cluster->block_group = block_group;
  1155. out:
  1156. spin_unlock(&cluster->lock);
  1157. spin_unlock(&block_group->tree_lock);
  1158. return ret;
  1159. }
  1160. /*
  1161. * simple code to zero out a cluster
  1162. */
  1163. void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
  1164. {
  1165. spin_lock_init(&cluster->lock);
  1166. spin_lock_init(&cluster->refill_lock);
  1167. cluster->root.rb_node = NULL;
  1168. cluster->max_size = 0;
  1169. cluster->points_to_bitmap = false;
  1170. INIT_LIST_HEAD(&cluster->block_group_list);
  1171. cluster->block_group = NULL;
  1172. }