free-space-cache.c 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822
  1. /*
  2. * Copyright (C) 2008 Red Hat. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/pagemap.h>
  19. #include <linux/sched.h>
  20. #include <linux/slab.h>
  21. #include <linux/math64.h>
  22. #include "ctree.h"
  23. #include "free-space-cache.h"
  24. #include "transaction.h"
  25. #include "disk-io.h"
  26. #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
  27. #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
  28. static void recalculate_thresholds(struct btrfs_block_group_cache
  29. *block_group);
  30. static int link_free_space(struct btrfs_block_group_cache *block_group,
  31. struct btrfs_free_space *info);
  32. struct inode *lookup_free_space_inode(struct btrfs_root *root,
  33. struct btrfs_block_group_cache
  34. *block_group, struct btrfs_path *path)
  35. {
  36. struct btrfs_key key;
  37. struct btrfs_key location;
  38. struct btrfs_disk_key disk_key;
  39. struct btrfs_free_space_header *header;
  40. struct extent_buffer *leaf;
  41. struct inode *inode = NULL;
  42. int ret;
  43. spin_lock(&block_group->lock);
  44. if (block_group->inode)
  45. inode = igrab(block_group->inode);
  46. spin_unlock(&block_group->lock);
  47. if (inode)
  48. return inode;
  49. key.objectid = BTRFS_FREE_SPACE_OBJECTID;
  50. key.offset = block_group->key.objectid;
  51. key.type = 0;
  52. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  53. if (ret < 0)
  54. return ERR_PTR(ret);
  55. if (ret > 0) {
  56. btrfs_release_path(root, path);
  57. return ERR_PTR(-ENOENT);
  58. }
  59. leaf = path->nodes[0];
  60. header = btrfs_item_ptr(leaf, path->slots[0],
  61. struct btrfs_free_space_header);
  62. btrfs_free_space_key(leaf, header, &disk_key);
  63. btrfs_disk_key_to_cpu(&location, &disk_key);
  64. btrfs_release_path(root, path);
  65. inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
  66. if (!inode)
  67. return ERR_PTR(-ENOENT);
  68. if (IS_ERR(inode))
  69. return inode;
  70. if (is_bad_inode(inode)) {
  71. iput(inode);
  72. return ERR_PTR(-ENOENT);
  73. }
  74. spin_lock(&block_group->lock);
  75. if (!root->fs_info->closing) {
  76. block_group->inode = igrab(inode);
  77. block_group->iref = 1;
  78. }
  79. spin_unlock(&block_group->lock);
  80. return inode;
  81. }
  82. int create_free_space_inode(struct btrfs_root *root,
  83. struct btrfs_trans_handle *trans,
  84. struct btrfs_block_group_cache *block_group,
  85. struct btrfs_path *path)
  86. {
  87. struct btrfs_key key;
  88. struct btrfs_disk_key disk_key;
  89. struct btrfs_free_space_header *header;
  90. struct btrfs_inode_item *inode_item;
  91. struct extent_buffer *leaf;
  92. u64 objectid;
  93. int ret;
  94. ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
  95. if (ret < 0)
  96. return ret;
  97. ret = btrfs_insert_empty_inode(trans, root, path, objectid);
  98. if (ret)
  99. return ret;
  100. leaf = path->nodes[0];
  101. inode_item = btrfs_item_ptr(leaf, path->slots[0],
  102. struct btrfs_inode_item);
  103. btrfs_item_key(leaf, &disk_key, path->slots[0]);
  104. memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
  105. sizeof(*inode_item));
  106. btrfs_set_inode_generation(leaf, inode_item, trans->transid);
  107. btrfs_set_inode_size(leaf, inode_item, 0);
  108. btrfs_set_inode_nbytes(leaf, inode_item, 0);
  109. btrfs_set_inode_uid(leaf, inode_item, 0);
  110. btrfs_set_inode_gid(leaf, inode_item, 0);
  111. btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
  112. btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
  113. BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
  114. btrfs_set_inode_nlink(leaf, inode_item, 1);
  115. btrfs_set_inode_transid(leaf, inode_item, trans->transid);
  116. btrfs_set_inode_block_group(leaf, inode_item,
  117. block_group->key.objectid);
  118. btrfs_mark_buffer_dirty(leaf);
  119. btrfs_release_path(root, path);
  120. key.objectid = BTRFS_FREE_SPACE_OBJECTID;
  121. key.offset = block_group->key.objectid;
  122. key.type = 0;
  123. ret = btrfs_insert_empty_item(trans, root, path, &key,
  124. sizeof(struct btrfs_free_space_header));
  125. if (ret < 0) {
  126. btrfs_release_path(root, path);
  127. return ret;
  128. }
  129. leaf = path->nodes[0];
  130. header = btrfs_item_ptr(leaf, path->slots[0],
  131. struct btrfs_free_space_header);
  132. memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
  133. btrfs_set_free_space_key(leaf, header, &disk_key);
  134. btrfs_mark_buffer_dirty(leaf);
  135. btrfs_release_path(root, path);
  136. return 0;
  137. }
  138. int btrfs_truncate_free_space_cache(struct btrfs_root *root,
  139. struct btrfs_trans_handle *trans,
  140. struct btrfs_path *path,
  141. struct inode *inode)
  142. {
  143. loff_t oldsize;
  144. int ret = 0;
  145. trans->block_rsv = root->orphan_block_rsv;
  146. ret = btrfs_block_rsv_check(trans, root,
  147. root->orphan_block_rsv,
  148. 0, 5);
  149. if (ret)
  150. return ret;
  151. oldsize = i_size_read(inode);
  152. btrfs_i_size_write(inode, 0);
  153. truncate_pagecache(inode, oldsize, 0);
  154. /*
  155. * We don't need an orphan item because truncating the free space cache
  156. * will never be split across transactions.
  157. */
  158. ret = btrfs_truncate_inode_items(trans, root, inode,
  159. 0, BTRFS_EXTENT_DATA_KEY);
  160. if (ret) {
  161. WARN_ON(1);
  162. return ret;
  163. }
  164. return btrfs_update_inode(trans, root, inode);
  165. }
  166. int btrfs_write_out_cache(struct btrfs_root *root,
  167. struct btrfs_trans_handle *trans,
  168. struct btrfs_block_group_cache *block_group,
  169. struct btrfs_path *path)
  170. {
  171. struct btrfs_free_space_header *header;
  172. struct extent_buffer *leaf;
  173. struct inode *inode;
  174. struct rb_node *node;
  175. struct list_head *pos, *n;
  176. struct page *page;
  177. struct extent_state *cached_state = NULL;
  178. struct list_head bitmap_list;
  179. struct btrfs_key key;
  180. u64 bytes = 0;
  181. u32 *crc, *checksums;
  182. pgoff_t index = 0, last_index = 0;
  183. unsigned long first_page_offset;
  184. int num_checksums;
  185. int entries = 0;
  186. int bitmaps = 0;
  187. int ret = 0;
  188. root = root->fs_info->tree_root;
  189. INIT_LIST_HEAD(&bitmap_list);
  190. spin_lock(&block_group->lock);
  191. if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
  192. spin_unlock(&block_group->lock);
  193. return 0;
  194. }
  195. spin_unlock(&block_group->lock);
  196. inode = lookup_free_space_inode(root, block_group, path);
  197. if (IS_ERR(inode))
  198. return 0;
  199. if (!i_size_read(inode)) {
  200. iput(inode);
  201. return 0;
  202. }
  203. last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
  204. filemap_write_and_wait(inode->i_mapping);
  205. btrfs_wait_ordered_range(inode, inode->i_size &
  206. ~(root->sectorsize - 1), (u64)-1);
  207. /* We need a checksum per page. */
  208. num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
  209. crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
  210. if (!crc) {
  211. iput(inode);
  212. return 0;
  213. }
  214. /* Since the first page has all of our checksums and our generation we
  215. * need to calculate the offset into the page that we can start writing
  216. * our entries.
  217. */
  218. first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
  219. node = rb_first(&block_group->free_space_offset);
  220. if (!node)
  221. goto out_free;
  222. /*
  223. * Lock all pages first so we can lock the extent safely.
  224. *
  225. * NOTE: Because we hold the ref the entire time we're going to write to
  226. * the page find_get_page should never fail, so we don't do a check
  227. * after find_get_page at this point. Just putting this here so people
  228. * know and don't freak out.
  229. */
  230. while (index <= last_index) {
  231. page = grab_cache_page(inode->i_mapping, index);
  232. if (!page) {
  233. pgoff_t i = 0;
  234. while (i < index) {
  235. page = find_get_page(inode->i_mapping, i);
  236. unlock_page(page);
  237. page_cache_release(page);
  238. page_cache_release(page);
  239. i++;
  240. }
  241. goto out_free;
  242. }
  243. index++;
  244. }
  245. index = 0;
  246. lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
  247. 0, &cached_state, GFP_NOFS);
  248. /* Write out the extent entries */
  249. do {
  250. struct btrfs_free_space_entry *entry;
  251. void *addr;
  252. unsigned long offset = 0;
  253. unsigned long start_offset = 0;
  254. if (index == 0) {
  255. start_offset = first_page_offset;
  256. offset = start_offset;
  257. }
  258. page = find_get_page(inode->i_mapping, index);
  259. addr = kmap(page);
  260. entry = addr + start_offset;
  261. memset(addr, 0, PAGE_CACHE_SIZE);
  262. while (1) {
  263. struct btrfs_free_space *e;
  264. e = rb_entry(node, struct btrfs_free_space, offset_index);
  265. entries++;
  266. entry->offset = cpu_to_le64(e->offset);
  267. entry->bytes = cpu_to_le64(e->bytes);
  268. if (e->bitmap) {
  269. entry->type = BTRFS_FREE_SPACE_BITMAP;
  270. list_add_tail(&e->list, &bitmap_list);
  271. bitmaps++;
  272. } else {
  273. entry->type = BTRFS_FREE_SPACE_EXTENT;
  274. }
  275. node = rb_next(node);
  276. if (!node)
  277. break;
  278. offset += sizeof(struct btrfs_free_space_entry);
  279. if (offset + sizeof(struct btrfs_free_space_entry) >=
  280. PAGE_CACHE_SIZE)
  281. break;
  282. entry++;
  283. }
  284. *crc = ~(u32)0;
  285. *crc = btrfs_csum_data(root, addr + start_offset, *crc,
  286. PAGE_CACHE_SIZE - start_offset);
  287. kunmap(page);
  288. btrfs_csum_final(*crc, (char *)crc);
  289. crc++;
  290. bytes += PAGE_CACHE_SIZE;
  291. ClearPageChecked(page);
  292. set_page_extent_mapped(page);
  293. SetPageUptodate(page);
  294. set_page_dirty(page);
  295. /*
  296. * We need to release our reference we got for grab_cache_page,
  297. * except for the first page which will hold our checksums, we
  298. * do that below.
  299. */
  300. if (index != 0) {
  301. unlock_page(page);
  302. page_cache_release(page);
  303. }
  304. page_cache_release(page);
  305. index++;
  306. } while (node);
  307. /* Write out the bitmaps */
  308. list_for_each_safe(pos, n, &bitmap_list) {
  309. void *addr;
  310. struct btrfs_free_space *entry =
  311. list_entry(pos, struct btrfs_free_space, list);
  312. page = find_get_page(inode->i_mapping, index);
  313. addr = kmap(page);
  314. memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
  315. *crc = ~(u32)0;
  316. *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
  317. kunmap(page);
  318. btrfs_csum_final(*crc, (char *)crc);
  319. crc++;
  320. bytes += PAGE_CACHE_SIZE;
  321. ClearPageChecked(page);
  322. set_page_extent_mapped(page);
  323. SetPageUptodate(page);
  324. set_page_dirty(page);
  325. unlock_page(page);
  326. page_cache_release(page);
  327. page_cache_release(page);
  328. list_del_init(&entry->list);
  329. index++;
  330. }
  331. /* Zero out the rest of the pages just to make sure */
  332. while (index <= last_index) {
  333. void *addr;
  334. page = find_get_page(inode->i_mapping, index);
  335. addr = kmap(page);
  336. memset(addr, 0, PAGE_CACHE_SIZE);
  337. kunmap(page);
  338. ClearPageChecked(page);
  339. set_page_extent_mapped(page);
  340. SetPageUptodate(page);
  341. set_page_dirty(page);
  342. unlock_page(page);
  343. page_cache_release(page);
  344. page_cache_release(page);
  345. bytes += PAGE_CACHE_SIZE;
  346. index++;
  347. }
  348. btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
  349. /* Write the checksums and trans id to the first page */
  350. {
  351. void *addr;
  352. u64 *gen;
  353. page = find_get_page(inode->i_mapping, 0);
  354. addr = kmap(page);
  355. memcpy(addr, checksums, sizeof(u32) * num_checksums);
  356. gen = addr + (sizeof(u32) * num_checksums);
  357. *gen = trans->transid;
  358. kunmap(page);
  359. ClearPageChecked(page);
  360. set_page_extent_mapped(page);
  361. SetPageUptodate(page);
  362. set_page_dirty(page);
  363. unlock_page(page);
  364. page_cache_release(page);
  365. page_cache_release(page);
  366. }
  367. BTRFS_I(inode)->generation = trans->transid;
  368. unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
  369. i_size_read(inode) - 1, &cached_state, GFP_NOFS);
  370. filemap_write_and_wait(inode->i_mapping);
  371. key.objectid = BTRFS_FREE_SPACE_OBJECTID;
  372. key.offset = block_group->key.objectid;
  373. key.type = 0;
  374. ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
  375. if (ret < 0) {
  376. ret = 0;
  377. clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
  378. EXTENT_DIRTY | EXTENT_DELALLOC |
  379. EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
  380. goto out_free;
  381. }
  382. leaf = path->nodes[0];
  383. if (ret > 0) {
  384. struct btrfs_key found_key;
  385. BUG_ON(!path->slots[0]);
  386. path->slots[0]--;
  387. btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
  388. if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
  389. found_key.offset != block_group->key.objectid) {
  390. ret = 0;
  391. clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
  392. EXTENT_DIRTY | EXTENT_DELALLOC |
  393. EXTENT_DO_ACCOUNTING, 0, 0, NULL,
  394. GFP_NOFS);
  395. btrfs_release_path(root, path);
  396. goto out_free;
  397. }
  398. }
  399. header = btrfs_item_ptr(leaf, path->slots[0],
  400. struct btrfs_free_space_header);
  401. btrfs_set_free_space_entries(leaf, header, entries);
  402. btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
  403. btrfs_set_free_space_generation(leaf, header, trans->transid);
  404. btrfs_mark_buffer_dirty(leaf);
  405. btrfs_release_path(root, path);
  406. ret = 1;
  407. out_free:
  408. if (ret == 0) {
  409. invalidate_inode_pages2_range(inode->i_mapping, 0, index);
  410. spin_lock(&block_group->lock);
  411. block_group->disk_cache_state = BTRFS_DC_ERROR;
  412. spin_unlock(&block_group->lock);
  413. BTRFS_I(inode)->generation = 0;
  414. }
  415. kfree(checksums);
  416. btrfs_update_inode(trans, root, inode);
  417. iput(inode);
  418. return ret;
  419. }
  420. static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
  421. u64 offset)
  422. {
  423. BUG_ON(offset < bitmap_start);
  424. offset -= bitmap_start;
  425. return (unsigned long)(div64_u64(offset, sectorsize));
  426. }
  427. static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
  428. {
  429. return (unsigned long)(div64_u64(bytes, sectorsize));
  430. }
  431. static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
  432. u64 offset)
  433. {
  434. u64 bitmap_start;
  435. u64 bytes_per_bitmap;
  436. bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
  437. bitmap_start = offset - block_group->key.objectid;
  438. bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
  439. bitmap_start *= bytes_per_bitmap;
  440. bitmap_start += block_group->key.objectid;
  441. return bitmap_start;
  442. }
  443. static int tree_insert_offset(struct rb_root *root, u64 offset,
  444. struct rb_node *node, int bitmap)
  445. {
  446. struct rb_node **p = &root->rb_node;
  447. struct rb_node *parent = NULL;
  448. struct btrfs_free_space *info;
  449. while (*p) {
  450. parent = *p;
  451. info = rb_entry(parent, struct btrfs_free_space, offset_index);
  452. if (offset < info->offset) {
  453. p = &(*p)->rb_left;
  454. } else if (offset > info->offset) {
  455. p = &(*p)->rb_right;
  456. } else {
  457. /*
  458. * we could have a bitmap entry and an extent entry
  459. * share the same offset. If this is the case, we want
  460. * the extent entry to always be found first if we do a
  461. * linear search through the tree, since we want to have
  462. * the quickest allocation time, and allocating from an
  463. * extent is faster than allocating from a bitmap. So
  464. * if we're inserting a bitmap and we find an entry at
  465. * this offset, we want to go right, or after this entry
  466. * logically. If we are inserting an extent and we've
  467. * found a bitmap, we want to go left, or before
  468. * logically.
  469. */
  470. if (bitmap) {
  471. WARN_ON(info->bitmap);
  472. p = &(*p)->rb_right;
  473. } else {
  474. WARN_ON(!info->bitmap);
  475. p = &(*p)->rb_left;
  476. }
  477. }
  478. }
  479. rb_link_node(node, parent, p);
  480. rb_insert_color(node, root);
  481. return 0;
  482. }
  483. /*
  484. * searches the tree for the given offset.
  485. *
  486. * fuzzy - If this is set, then we are trying to make an allocation, and we just
  487. * want a section that has at least bytes size and comes at or after the given
  488. * offset.
  489. */
  490. static struct btrfs_free_space *
  491. tree_search_offset(struct btrfs_block_group_cache *block_group,
  492. u64 offset, int bitmap_only, int fuzzy)
  493. {
  494. struct rb_node *n = block_group->free_space_offset.rb_node;
  495. struct btrfs_free_space *entry, *prev = NULL;
  496. /* find entry that is closest to the 'offset' */
  497. while (1) {
  498. if (!n) {
  499. entry = NULL;
  500. break;
  501. }
  502. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  503. prev = entry;
  504. if (offset < entry->offset)
  505. n = n->rb_left;
  506. else if (offset > entry->offset)
  507. n = n->rb_right;
  508. else
  509. break;
  510. }
  511. if (bitmap_only) {
  512. if (!entry)
  513. return NULL;
  514. if (entry->bitmap)
  515. return entry;
  516. /*
  517. * bitmap entry and extent entry may share same offset,
  518. * in that case, bitmap entry comes after extent entry.
  519. */
  520. n = rb_next(n);
  521. if (!n)
  522. return NULL;
  523. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  524. if (entry->offset != offset)
  525. return NULL;
  526. WARN_ON(!entry->bitmap);
  527. return entry;
  528. } else if (entry) {
  529. if (entry->bitmap) {
  530. /*
  531. * if previous extent entry covers the offset,
  532. * we should return it instead of the bitmap entry
  533. */
  534. n = &entry->offset_index;
  535. while (1) {
  536. n = rb_prev(n);
  537. if (!n)
  538. break;
  539. prev = rb_entry(n, struct btrfs_free_space,
  540. offset_index);
  541. if (!prev->bitmap) {
  542. if (prev->offset + prev->bytes > offset)
  543. entry = prev;
  544. break;
  545. }
  546. }
  547. }
  548. return entry;
  549. }
  550. if (!prev)
  551. return NULL;
  552. /* find last entry before the 'offset' */
  553. entry = prev;
  554. if (entry->offset > offset) {
  555. n = rb_prev(&entry->offset_index);
  556. if (n) {
  557. entry = rb_entry(n, struct btrfs_free_space,
  558. offset_index);
  559. BUG_ON(entry->offset > offset);
  560. } else {
  561. if (fuzzy)
  562. return entry;
  563. else
  564. return NULL;
  565. }
  566. }
  567. if (entry->bitmap) {
  568. n = &entry->offset_index;
  569. while (1) {
  570. n = rb_prev(n);
  571. if (!n)
  572. break;
  573. prev = rb_entry(n, struct btrfs_free_space,
  574. offset_index);
  575. if (!prev->bitmap) {
  576. if (prev->offset + prev->bytes > offset)
  577. return prev;
  578. break;
  579. }
  580. }
  581. if (entry->offset + BITS_PER_BITMAP *
  582. block_group->sectorsize > offset)
  583. return entry;
  584. } else if (entry->offset + entry->bytes > offset)
  585. return entry;
  586. if (!fuzzy)
  587. return NULL;
  588. while (1) {
  589. if (entry->bitmap) {
  590. if (entry->offset + BITS_PER_BITMAP *
  591. block_group->sectorsize > offset)
  592. break;
  593. } else {
  594. if (entry->offset + entry->bytes > offset)
  595. break;
  596. }
  597. n = rb_next(&entry->offset_index);
  598. if (!n)
  599. return NULL;
  600. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  601. }
  602. return entry;
  603. }
  604. static void unlink_free_space(struct btrfs_block_group_cache *block_group,
  605. struct btrfs_free_space *info)
  606. {
  607. rb_erase(&info->offset_index, &block_group->free_space_offset);
  608. block_group->free_extents--;
  609. block_group->free_space -= info->bytes;
  610. }
  611. static int link_free_space(struct btrfs_block_group_cache *block_group,
  612. struct btrfs_free_space *info)
  613. {
  614. int ret = 0;
  615. BUG_ON(!info->bitmap && !info->bytes);
  616. ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
  617. &info->offset_index, (info->bitmap != NULL));
  618. if (ret)
  619. return ret;
  620. block_group->free_space += info->bytes;
  621. block_group->free_extents++;
  622. return ret;
  623. }
  624. static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
  625. {
  626. u64 max_bytes;
  627. u64 bitmap_bytes;
  628. u64 extent_bytes;
  629. /*
  630. * The goal is to keep the total amount of memory used per 1gb of space
  631. * at or below 32k, so we need to adjust how much memory we allow to be
  632. * used by extent based free space tracking
  633. */
  634. max_bytes = MAX_CACHE_BYTES_PER_GIG *
  635. (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
  636. /*
  637. * we want to account for 1 more bitmap than what we have so we can make
  638. * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
  639. * we add more bitmaps.
  640. */
  641. bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
  642. if (bitmap_bytes >= max_bytes) {
  643. block_group->extents_thresh = 0;
  644. return;
  645. }
  646. /*
  647. * we want the extent entry threshold to always be at most 1/2 the maxw
  648. * bytes we can have, or whatever is less than that.
  649. */
  650. extent_bytes = max_bytes - bitmap_bytes;
  651. extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
  652. block_group->extents_thresh =
  653. div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
  654. }
  655. static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
  656. struct btrfs_free_space *info, u64 offset,
  657. u64 bytes)
  658. {
  659. unsigned long start, end;
  660. unsigned long i;
  661. start = offset_to_bit(info->offset, block_group->sectorsize, offset);
  662. end = start + bytes_to_bits(bytes, block_group->sectorsize);
  663. BUG_ON(end > BITS_PER_BITMAP);
  664. for (i = start; i < end; i++)
  665. clear_bit(i, info->bitmap);
  666. info->bytes -= bytes;
  667. block_group->free_space -= bytes;
  668. }
  669. static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
  670. struct btrfs_free_space *info, u64 offset,
  671. u64 bytes)
  672. {
  673. unsigned long start, end;
  674. unsigned long i;
  675. start = offset_to_bit(info->offset, block_group->sectorsize, offset);
  676. end = start + bytes_to_bits(bytes, block_group->sectorsize);
  677. BUG_ON(end > BITS_PER_BITMAP);
  678. for (i = start; i < end; i++)
  679. set_bit(i, info->bitmap);
  680. info->bytes += bytes;
  681. block_group->free_space += bytes;
  682. }
  683. static int search_bitmap(struct btrfs_block_group_cache *block_group,
  684. struct btrfs_free_space *bitmap_info, u64 *offset,
  685. u64 *bytes)
  686. {
  687. unsigned long found_bits = 0;
  688. unsigned long bits, i;
  689. unsigned long next_zero;
  690. i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
  691. max_t(u64, *offset, bitmap_info->offset));
  692. bits = bytes_to_bits(*bytes, block_group->sectorsize);
  693. for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
  694. i < BITS_PER_BITMAP;
  695. i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
  696. next_zero = find_next_zero_bit(bitmap_info->bitmap,
  697. BITS_PER_BITMAP, i);
  698. if ((next_zero - i) >= bits) {
  699. found_bits = next_zero - i;
  700. break;
  701. }
  702. i = next_zero;
  703. }
  704. if (found_bits) {
  705. *offset = (u64)(i * block_group->sectorsize) +
  706. bitmap_info->offset;
  707. *bytes = (u64)(found_bits) * block_group->sectorsize;
  708. return 0;
  709. }
  710. return -1;
  711. }
  712. static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
  713. *block_group, u64 *offset,
  714. u64 *bytes, int debug)
  715. {
  716. struct btrfs_free_space *entry;
  717. struct rb_node *node;
  718. int ret;
  719. if (!block_group->free_space_offset.rb_node)
  720. return NULL;
  721. entry = tree_search_offset(block_group,
  722. offset_to_bitmap(block_group, *offset),
  723. 0, 1);
  724. if (!entry)
  725. return NULL;
  726. for (node = &entry->offset_index; node; node = rb_next(node)) {
  727. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  728. if (entry->bytes < *bytes)
  729. continue;
  730. if (entry->bitmap) {
  731. ret = search_bitmap(block_group, entry, offset, bytes);
  732. if (!ret)
  733. return entry;
  734. continue;
  735. }
  736. *offset = entry->offset;
  737. *bytes = entry->bytes;
  738. return entry;
  739. }
  740. return NULL;
  741. }
  742. static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
  743. struct btrfs_free_space *info, u64 offset)
  744. {
  745. u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
  746. int max_bitmaps = (int)div64_u64(block_group->key.offset +
  747. bytes_per_bg - 1, bytes_per_bg);
  748. BUG_ON(block_group->total_bitmaps >= max_bitmaps);
  749. info->offset = offset_to_bitmap(block_group, offset);
  750. info->bytes = 0;
  751. link_free_space(block_group, info);
  752. block_group->total_bitmaps++;
  753. recalculate_thresholds(block_group);
  754. }
  755. static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
  756. struct btrfs_free_space *bitmap_info,
  757. u64 *offset, u64 *bytes)
  758. {
  759. u64 end;
  760. u64 search_start, search_bytes;
  761. int ret;
  762. again:
  763. end = bitmap_info->offset +
  764. (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
  765. /*
  766. * XXX - this can go away after a few releases.
  767. *
  768. * since the only user of btrfs_remove_free_space is the tree logging
  769. * stuff, and the only way to test that is under crash conditions, we
  770. * want to have this debug stuff here just in case somethings not
  771. * working. Search the bitmap for the space we are trying to use to
  772. * make sure its actually there. If its not there then we need to stop
  773. * because something has gone wrong.
  774. */
  775. search_start = *offset;
  776. search_bytes = *bytes;
  777. ret = search_bitmap(block_group, bitmap_info, &search_start,
  778. &search_bytes);
  779. BUG_ON(ret < 0 || search_start != *offset);
  780. if (*offset > bitmap_info->offset && *offset + *bytes > end) {
  781. bitmap_clear_bits(block_group, bitmap_info, *offset,
  782. end - *offset + 1);
  783. *bytes -= end - *offset + 1;
  784. *offset = end + 1;
  785. } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
  786. bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
  787. *bytes = 0;
  788. }
  789. if (*bytes) {
  790. struct rb_node *next = rb_next(&bitmap_info->offset_index);
  791. if (!bitmap_info->bytes) {
  792. unlink_free_space(block_group, bitmap_info);
  793. kfree(bitmap_info->bitmap);
  794. kfree(bitmap_info);
  795. block_group->total_bitmaps--;
  796. recalculate_thresholds(block_group);
  797. }
  798. /*
  799. * no entry after this bitmap, but we still have bytes to
  800. * remove, so something has gone wrong.
  801. */
  802. if (!next)
  803. return -EINVAL;
  804. bitmap_info = rb_entry(next, struct btrfs_free_space,
  805. offset_index);
  806. /*
  807. * if the next entry isn't a bitmap we need to return to let the
  808. * extent stuff do its work.
  809. */
  810. if (!bitmap_info->bitmap)
  811. return -EAGAIN;
  812. /*
  813. * Ok the next item is a bitmap, but it may not actually hold
  814. * the information for the rest of this free space stuff, so
  815. * look for it, and if we don't find it return so we can try
  816. * everything over again.
  817. */
  818. search_start = *offset;
  819. search_bytes = *bytes;
  820. ret = search_bitmap(block_group, bitmap_info, &search_start,
  821. &search_bytes);
  822. if (ret < 0 || search_start != *offset)
  823. return -EAGAIN;
  824. goto again;
  825. } else if (!bitmap_info->bytes) {
  826. unlink_free_space(block_group, bitmap_info);
  827. kfree(bitmap_info->bitmap);
  828. kfree(bitmap_info);
  829. block_group->total_bitmaps--;
  830. recalculate_thresholds(block_group);
  831. }
  832. return 0;
  833. }
  834. static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
  835. struct btrfs_free_space *info)
  836. {
  837. struct btrfs_free_space *bitmap_info;
  838. int added = 0;
  839. u64 bytes, offset, end;
  840. int ret;
  841. /*
  842. * If we are below the extents threshold then we can add this as an
  843. * extent, and don't have to deal with the bitmap
  844. */
  845. if (block_group->free_extents < block_group->extents_thresh &&
  846. info->bytes > block_group->sectorsize * 4)
  847. return 0;
  848. /*
  849. * some block groups are so tiny they can't be enveloped by a bitmap, so
  850. * don't even bother to create a bitmap for this
  851. */
  852. if (BITS_PER_BITMAP * block_group->sectorsize >
  853. block_group->key.offset)
  854. return 0;
  855. bytes = info->bytes;
  856. offset = info->offset;
  857. again:
  858. bitmap_info = tree_search_offset(block_group,
  859. offset_to_bitmap(block_group, offset),
  860. 1, 0);
  861. if (!bitmap_info) {
  862. BUG_ON(added);
  863. goto new_bitmap;
  864. }
  865. end = bitmap_info->offset +
  866. (u64)(BITS_PER_BITMAP * block_group->sectorsize);
  867. if (offset >= bitmap_info->offset && offset + bytes > end) {
  868. bitmap_set_bits(block_group, bitmap_info, offset,
  869. end - offset);
  870. bytes -= end - offset;
  871. offset = end;
  872. added = 0;
  873. } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
  874. bitmap_set_bits(block_group, bitmap_info, offset, bytes);
  875. bytes = 0;
  876. } else {
  877. BUG();
  878. }
  879. if (!bytes) {
  880. ret = 1;
  881. goto out;
  882. } else
  883. goto again;
  884. new_bitmap:
  885. if (info && info->bitmap) {
  886. add_new_bitmap(block_group, info, offset);
  887. added = 1;
  888. info = NULL;
  889. goto again;
  890. } else {
  891. spin_unlock(&block_group->tree_lock);
  892. /* no pre-allocated info, allocate a new one */
  893. if (!info) {
  894. info = kzalloc(sizeof(struct btrfs_free_space),
  895. GFP_NOFS);
  896. if (!info) {
  897. spin_lock(&block_group->tree_lock);
  898. ret = -ENOMEM;
  899. goto out;
  900. }
  901. }
  902. /* allocate the bitmap */
  903. info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
  904. spin_lock(&block_group->tree_lock);
  905. if (!info->bitmap) {
  906. ret = -ENOMEM;
  907. goto out;
  908. }
  909. goto again;
  910. }
  911. out:
  912. if (info) {
  913. if (info->bitmap)
  914. kfree(info->bitmap);
  915. kfree(info);
  916. }
  917. return ret;
  918. }
  919. int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
  920. u64 offset, u64 bytes)
  921. {
  922. struct btrfs_free_space *right_info = NULL;
  923. struct btrfs_free_space *left_info = NULL;
  924. struct btrfs_free_space *info = NULL;
  925. int ret = 0;
  926. info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
  927. if (!info)
  928. return -ENOMEM;
  929. info->offset = offset;
  930. info->bytes = bytes;
  931. spin_lock(&block_group->tree_lock);
  932. /*
  933. * first we want to see if there is free space adjacent to the range we
  934. * are adding, if there is remove that struct and add a new one to
  935. * cover the entire range
  936. */
  937. right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
  938. if (right_info && rb_prev(&right_info->offset_index))
  939. left_info = rb_entry(rb_prev(&right_info->offset_index),
  940. struct btrfs_free_space, offset_index);
  941. else
  942. left_info = tree_search_offset(block_group, offset - 1, 0, 0);
  943. /*
  944. * If there was no extent directly to the left or right of this new
  945. * extent then we know we're going to have to allocate a new extent, so
  946. * before we do that see if we need to drop this into a bitmap
  947. */
  948. if ((!left_info || left_info->bitmap) &&
  949. (!right_info || right_info->bitmap)) {
  950. ret = insert_into_bitmap(block_group, info);
  951. if (ret < 0) {
  952. goto out;
  953. } else if (ret) {
  954. ret = 0;
  955. goto out;
  956. }
  957. }
  958. if (right_info && !right_info->bitmap) {
  959. unlink_free_space(block_group, right_info);
  960. info->bytes += right_info->bytes;
  961. kfree(right_info);
  962. }
  963. if (left_info && !left_info->bitmap &&
  964. left_info->offset + left_info->bytes == offset) {
  965. unlink_free_space(block_group, left_info);
  966. info->offset = left_info->offset;
  967. info->bytes += left_info->bytes;
  968. kfree(left_info);
  969. }
  970. ret = link_free_space(block_group, info);
  971. if (ret)
  972. kfree(info);
  973. out:
  974. spin_unlock(&block_group->tree_lock);
  975. if (ret) {
  976. printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
  977. BUG_ON(ret == -EEXIST);
  978. }
  979. return ret;
  980. }
  981. int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
  982. u64 offset, u64 bytes)
  983. {
  984. struct btrfs_free_space *info;
  985. struct btrfs_free_space *next_info = NULL;
  986. int ret = 0;
  987. spin_lock(&block_group->tree_lock);
  988. again:
  989. info = tree_search_offset(block_group, offset, 0, 0);
  990. if (!info) {
  991. /*
  992. * oops didn't find an extent that matched the space we wanted
  993. * to remove, look for a bitmap instead
  994. */
  995. info = tree_search_offset(block_group,
  996. offset_to_bitmap(block_group, offset),
  997. 1, 0);
  998. if (!info) {
  999. WARN_ON(1);
  1000. goto out_lock;
  1001. }
  1002. }
  1003. if (info->bytes < bytes && rb_next(&info->offset_index)) {
  1004. u64 end;
  1005. next_info = rb_entry(rb_next(&info->offset_index),
  1006. struct btrfs_free_space,
  1007. offset_index);
  1008. if (next_info->bitmap)
  1009. end = next_info->offset + BITS_PER_BITMAP *
  1010. block_group->sectorsize - 1;
  1011. else
  1012. end = next_info->offset + next_info->bytes;
  1013. if (next_info->bytes < bytes ||
  1014. next_info->offset > offset || offset > end) {
  1015. printk(KERN_CRIT "Found free space at %llu, size %llu,"
  1016. " trying to use %llu\n",
  1017. (unsigned long long)info->offset,
  1018. (unsigned long long)info->bytes,
  1019. (unsigned long long)bytes);
  1020. WARN_ON(1);
  1021. ret = -EINVAL;
  1022. goto out_lock;
  1023. }
  1024. info = next_info;
  1025. }
  1026. if (info->bytes == bytes) {
  1027. unlink_free_space(block_group, info);
  1028. if (info->bitmap) {
  1029. kfree(info->bitmap);
  1030. block_group->total_bitmaps--;
  1031. }
  1032. kfree(info);
  1033. goto out_lock;
  1034. }
  1035. if (!info->bitmap && info->offset == offset) {
  1036. unlink_free_space(block_group, info);
  1037. info->offset += bytes;
  1038. info->bytes -= bytes;
  1039. link_free_space(block_group, info);
  1040. goto out_lock;
  1041. }
  1042. if (!info->bitmap && info->offset <= offset &&
  1043. info->offset + info->bytes >= offset + bytes) {
  1044. u64 old_start = info->offset;
  1045. /*
  1046. * we're freeing space in the middle of the info,
  1047. * this can happen during tree log replay
  1048. *
  1049. * first unlink the old info and then
  1050. * insert it again after the hole we're creating
  1051. */
  1052. unlink_free_space(block_group, info);
  1053. if (offset + bytes < info->offset + info->bytes) {
  1054. u64 old_end = info->offset + info->bytes;
  1055. info->offset = offset + bytes;
  1056. info->bytes = old_end - info->offset;
  1057. ret = link_free_space(block_group, info);
  1058. WARN_ON(ret);
  1059. if (ret)
  1060. goto out_lock;
  1061. } else {
  1062. /* the hole we're creating ends at the end
  1063. * of the info struct, just free the info
  1064. */
  1065. kfree(info);
  1066. }
  1067. spin_unlock(&block_group->tree_lock);
  1068. /* step two, insert a new info struct to cover
  1069. * anything before the hole
  1070. */
  1071. ret = btrfs_add_free_space(block_group, old_start,
  1072. offset - old_start);
  1073. WARN_ON(ret);
  1074. goto out;
  1075. }
  1076. ret = remove_from_bitmap(block_group, info, &offset, &bytes);
  1077. if (ret == -EAGAIN)
  1078. goto again;
  1079. BUG_ON(ret);
  1080. out_lock:
  1081. spin_unlock(&block_group->tree_lock);
  1082. out:
  1083. return ret;
  1084. }
  1085. void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
  1086. u64 bytes)
  1087. {
  1088. struct btrfs_free_space *info;
  1089. struct rb_node *n;
  1090. int count = 0;
  1091. for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
  1092. info = rb_entry(n, struct btrfs_free_space, offset_index);
  1093. if (info->bytes >= bytes)
  1094. count++;
  1095. printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
  1096. (unsigned long long)info->offset,
  1097. (unsigned long long)info->bytes,
  1098. (info->bitmap) ? "yes" : "no");
  1099. }
  1100. printk(KERN_INFO "block group has cluster?: %s\n",
  1101. list_empty(&block_group->cluster_list) ? "no" : "yes");
  1102. printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
  1103. "\n", count);
  1104. }
  1105. u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
  1106. {
  1107. struct btrfs_free_space *info;
  1108. struct rb_node *n;
  1109. u64 ret = 0;
  1110. for (n = rb_first(&block_group->free_space_offset); n;
  1111. n = rb_next(n)) {
  1112. info = rb_entry(n, struct btrfs_free_space, offset_index);
  1113. ret += info->bytes;
  1114. }
  1115. return ret;
  1116. }
  1117. /*
  1118. * for a given cluster, put all of its extents back into the free
  1119. * space cache. If the block group passed doesn't match the block group
  1120. * pointed to by the cluster, someone else raced in and freed the
  1121. * cluster already. In that case, we just return without changing anything
  1122. */
  1123. static int
  1124. __btrfs_return_cluster_to_free_space(
  1125. struct btrfs_block_group_cache *block_group,
  1126. struct btrfs_free_cluster *cluster)
  1127. {
  1128. struct btrfs_free_space *entry;
  1129. struct rb_node *node;
  1130. bool bitmap;
  1131. spin_lock(&cluster->lock);
  1132. if (cluster->block_group != block_group)
  1133. goto out;
  1134. bitmap = cluster->points_to_bitmap;
  1135. cluster->block_group = NULL;
  1136. cluster->window_start = 0;
  1137. list_del_init(&cluster->block_group_list);
  1138. cluster->points_to_bitmap = false;
  1139. if (bitmap)
  1140. goto out;
  1141. node = rb_first(&cluster->root);
  1142. while (node) {
  1143. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1144. node = rb_next(&entry->offset_index);
  1145. rb_erase(&entry->offset_index, &cluster->root);
  1146. BUG_ON(entry->bitmap);
  1147. tree_insert_offset(&block_group->free_space_offset,
  1148. entry->offset, &entry->offset_index, 0);
  1149. }
  1150. cluster->root = RB_ROOT;
  1151. out:
  1152. spin_unlock(&cluster->lock);
  1153. btrfs_put_block_group(block_group);
  1154. return 0;
  1155. }
  1156. void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
  1157. {
  1158. struct btrfs_free_space *info;
  1159. struct rb_node *node;
  1160. struct btrfs_free_cluster *cluster;
  1161. struct list_head *head;
  1162. spin_lock(&block_group->tree_lock);
  1163. while ((head = block_group->cluster_list.next) !=
  1164. &block_group->cluster_list) {
  1165. cluster = list_entry(head, struct btrfs_free_cluster,
  1166. block_group_list);
  1167. WARN_ON(cluster->block_group != block_group);
  1168. __btrfs_return_cluster_to_free_space(block_group, cluster);
  1169. if (need_resched()) {
  1170. spin_unlock(&block_group->tree_lock);
  1171. cond_resched();
  1172. spin_lock(&block_group->tree_lock);
  1173. }
  1174. }
  1175. while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
  1176. info = rb_entry(node, struct btrfs_free_space, offset_index);
  1177. unlink_free_space(block_group, info);
  1178. if (info->bitmap)
  1179. kfree(info->bitmap);
  1180. kfree(info);
  1181. if (need_resched()) {
  1182. spin_unlock(&block_group->tree_lock);
  1183. cond_resched();
  1184. spin_lock(&block_group->tree_lock);
  1185. }
  1186. }
  1187. spin_unlock(&block_group->tree_lock);
  1188. }
  1189. u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
  1190. u64 offset, u64 bytes, u64 empty_size)
  1191. {
  1192. struct btrfs_free_space *entry = NULL;
  1193. u64 bytes_search = bytes + empty_size;
  1194. u64 ret = 0;
  1195. spin_lock(&block_group->tree_lock);
  1196. entry = find_free_space(block_group, &offset, &bytes_search, 0);
  1197. if (!entry)
  1198. goto out;
  1199. ret = offset;
  1200. if (entry->bitmap) {
  1201. bitmap_clear_bits(block_group, entry, offset, bytes);
  1202. if (!entry->bytes) {
  1203. unlink_free_space(block_group, entry);
  1204. kfree(entry->bitmap);
  1205. kfree(entry);
  1206. block_group->total_bitmaps--;
  1207. recalculate_thresholds(block_group);
  1208. }
  1209. } else {
  1210. unlink_free_space(block_group, entry);
  1211. entry->offset += bytes;
  1212. entry->bytes -= bytes;
  1213. if (!entry->bytes)
  1214. kfree(entry);
  1215. else
  1216. link_free_space(block_group, entry);
  1217. }
  1218. out:
  1219. spin_unlock(&block_group->tree_lock);
  1220. return ret;
  1221. }
  1222. /*
  1223. * given a cluster, put all of its extents back into the free space
  1224. * cache. If a block group is passed, this function will only free
  1225. * a cluster that belongs to the passed block group.
  1226. *
  1227. * Otherwise, it'll get a reference on the block group pointed to by the
  1228. * cluster and remove the cluster from it.
  1229. */
  1230. int btrfs_return_cluster_to_free_space(
  1231. struct btrfs_block_group_cache *block_group,
  1232. struct btrfs_free_cluster *cluster)
  1233. {
  1234. int ret;
  1235. /* first, get a safe pointer to the block group */
  1236. spin_lock(&cluster->lock);
  1237. if (!block_group) {
  1238. block_group = cluster->block_group;
  1239. if (!block_group) {
  1240. spin_unlock(&cluster->lock);
  1241. return 0;
  1242. }
  1243. } else if (cluster->block_group != block_group) {
  1244. /* someone else has already freed it don't redo their work */
  1245. spin_unlock(&cluster->lock);
  1246. return 0;
  1247. }
  1248. atomic_inc(&block_group->count);
  1249. spin_unlock(&cluster->lock);
  1250. /* now return any extents the cluster had on it */
  1251. spin_lock(&block_group->tree_lock);
  1252. ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
  1253. spin_unlock(&block_group->tree_lock);
  1254. /* finally drop our ref */
  1255. btrfs_put_block_group(block_group);
  1256. return ret;
  1257. }
  1258. static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
  1259. struct btrfs_free_cluster *cluster,
  1260. u64 bytes, u64 min_start)
  1261. {
  1262. struct btrfs_free_space *entry;
  1263. int err;
  1264. u64 search_start = cluster->window_start;
  1265. u64 search_bytes = bytes;
  1266. u64 ret = 0;
  1267. spin_lock(&block_group->tree_lock);
  1268. spin_lock(&cluster->lock);
  1269. if (!cluster->points_to_bitmap)
  1270. goto out;
  1271. if (cluster->block_group != block_group)
  1272. goto out;
  1273. /*
  1274. * search_start is the beginning of the bitmap, but at some point it may
  1275. * be a good idea to point to the actual start of the free area in the
  1276. * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
  1277. * to 1 to make sure we get the bitmap entry
  1278. */
  1279. entry = tree_search_offset(block_group,
  1280. offset_to_bitmap(block_group, search_start),
  1281. 1, 0);
  1282. if (!entry || !entry->bitmap)
  1283. goto out;
  1284. search_start = min_start;
  1285. search_bytes = bytes;
  1286. err = search_bitmap(block_group, entry, &search_start,
  1287. &search_bytes);
  1288. if (err)
  1289. goto out;
  1290. ret = search_start;
  1291. bitmap_clear_bits(block_group, entry, ret, bytes);
  1292. out:
  1293. spin_unlock(&cluster->lock);
  1294. spin_unlock(&block_group->tree_lock);
  1295. return ret;
  1296. }
  1297. /*
  1298. * given a cluster, try to allocate 'bytes' from it, returns 0
  1299. * if it couldn't find anything suitably large, or a logical disk offset
  1300. * if things worked out
  1301. */
  1302. u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
  1303. struct btrfs_free_cluster *cluster, u64 bytes,
  1304. u64 min_start)
  1305. {
  1306. struct btrfs_free_space *entry = NULL;
  1307. struct rb_node *node;
  1308. u64 ret = 0;
  1309. if (cluster->points_to_bitmap)
  1310. return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
  1311. min_start);
  1312. spin_lock(&cluster->lock);
  1313. if (bytes > cluster->max_size)
  1314. goto out;
  1315. if (cluster->block_group != block_group)
  1316. goto out;
  1317. node = rb_first(&cluster->root);
  1318. if (!node)
  1319. goto out;
  1320. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1321. while(1) {
  1322. if (entry->bytes < bytes || entry->offset < min_start) {
  1323. struct rb_node *node;
  1324. node = rb_next(&entry->offset_index);
  1325. if (!node)
  1326. break;
  1327. entry = rb_entry(node, struct btrfs_free_space,
  1328. offset_index);
  1329. continue;
  1330. }
  1331. ret = entry->offset;
  1332. entry->offset += bytes;
  1333. entry->bytes -= bytes;
  1334. if (entry->bytes == 0) {
  1335. rb_erase(&entry->offset_index, &cluster->root);
  1336. kfree(entry);
  1337. }
  1338. break;
  1339. }
  1340. out:
  1341. spin_unlock(&cluster->lock);
  1342. return ret;
  1343. }
  1344. static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
  1345. struct btrfs_free_space *entry,
  1346. struct btrfs_free_cluster *cluster,
  1347. u64 offset, u64 bytes, u64 min_bytes)
  1348. {
  1349. unsigned long next_zero;
  1350. unsigned long i;
  1351. unsigned long search_bits;
  1352. unsigned long total_bits;
  1353. unsigned long found_bits;
  1354. unsigned long start = 0;
  1355. unsigned long total_found = 0;
  1356. bool found = false;
  1357. i = offset_to_bit(entry->offset, block_group->sectorsize,
  1358. max_t(u64, offset, entry->offset));
  1359. search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
  1360. total_bits = bytes_to_bits(bytes, block_group->sectorsize);
  1361. again:
  1362. found_bits = 0;
  1363. for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
  1364. i < BITS_PER_BITMAP;
  1365. i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
  1366. next_zero = find_next_zero_bit(entry->bitmap,
  1367. BITS_PER_BITMAP, i);
  1368. if (next_zero - i >= search_bits) {
  1369. found_bits = next_zero - i;
  1370. break;
  1371. }
  1372. i = next_zero;
  1373. }
  1374. if (!found_bits)
  1375. return -1;
  1376. if (!found) {
  1377. start = i;
  1378. found = true;
  1379. }
  1380. total_found += found_bits;
  1381. if (cluster->max_size < found_bits * block_group->sectorsize)
  1382. cluster->max_size = found_bits * block_group->sectorsize;
  1383. if (total_found < total_bits) {
  1384. i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
  1385. if (i - start > total_bits * 2) {
  1386. total_found = 0;
  1387. cluster->max_size = 0;
  1388. found = false;
  1389. }
  1390. goto again;
  1391. }
  1392. cluster->window_start = start * block_group->sectorsize +
  1393. entry->offset;
  1394. cluster->points_to_bitmap = true;
  1395. return 0;
  1396. }
  1397. /*
  1398. * here we try to find a cluster of blocks in a block group. The goal
  1399. * is to find at least bytes free and up to empty_size + bytes free.
  1400. * We might not find them all in one contiguous area.
  1401. *
  1402. * returns zero and sets up cluster if things worked out, otherwise
  1403. * it returns -enospc
  1404. */
  1405. int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
  1406. struct btrfs_root *root,
  1407. struct btrfs_block_group_cache *block_group,
  1408. struct btrfs_free_cluster *cluster,
  1409. u64 offset, u64 bytes, u64 empty_size)
  1410. {
  1411. struct btrfs_free_space *entry = NULL;
  1412. struct rb_node *node;
  1413. struct btrfs_free_space *next;
  1414. struct btrfs_free_space *last = NULL;
  1415. u64 min_bytes;
  1416. u64 window_start;
  1417. u64 window_free;
  1418. u64 max_extent = 0;
  1419. bool found_bitmap = false;
  1420. int ret;
  1421. /* for metadata, allow allocates with more holes */
  1422. if (btrfs_test_opt(root, SSD_SPREAD)) {
  1423. min_bytes = bytes + empty_size;
  1424. } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
  1425. /*
  1426. * we want to do larger allocations when we are
  1427. * flushing out the delayed refs, it helps prevent
  1428. * making more work as we go along.
  1429. */
  1430. if (trans->transaction->delayed_refs.flushing)
  1431. min_bytes = max(bytes, (bytes + empty_size) >> 1);
  1432. else
  1433. min_bytes = max(bytes, (bytes + empty_size) >> 4);
  1434. } else
  1435. min_bytes = max(bytes, (bytes + empty_size) >> 2);
  1436. spin_lock(&block_group->tree_lock);
  1437. spin_lock(&cluster->lock);
  1438. /* someone already found a cluster, hooray */
  1439. if (cluster->block_group) {
  1440. ret = 0;
  1441. goto out;
  1442. }
  1443. again:
  1444. entry = tree_search_offset(block_group, offset, found_bitmap, 1);
  1445. if (!entry) {
  1446. ret = -ENOSPC;
  1447. goto out;
  1448. }
  1449. /*
  1450. * If found_bitmap is true, we exhausted our search for extent entries,
  1451. * and we just want to search all of the bitmaps that we can find, and
  1452. * ignore any extent entries we find.
  1453. */
  1454. while (entry->bitmap || found_bitmap ||
  1455. (!entry->bitmap && entry->bytes < min_bytes)) {
  1456. struct rb_node *node = rb_next(&entry->offset_index);
  1457. if (entry->bitmap && entry->bytes > bytes + empty_size) {
  1458. ret = btrfs_bitmap_cluster(block_group, entry, cluster,
  1459. offset, bytes + empty_size,
  1460. min_bytes);
  1461. if (!ret)
  1462. goto got_it;
  1463. }
  1464. if (!node) {
  1465. ret = -ENOSPC;
  1466. goto out;
  1467. }
  1468. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1469. }
  1470. /*
  1471. * We already searched all the extent entries from the passed in offset
  1472. * to the end and didn't find enough space for the cluster, and we also
  1473. * didn't find any bitmaps that met our criteria, just go ahead and exit
  1474. */
  1475. if (found_bitmap) {
  1476. ret = -ENOSPC;
  1477. goto out;
  1478. }
  1479. cluster->points_to_bitmap = false;
  1480. window_start = entry->offset;
  1481. window_free = entry->bytes;
  1482. last = entry;
  1483. max_extent = entry->bytes;
  1484. while (1) {
  1485. /* out window is just right, lets fill it */
  1486. if (window_free >= bytes + empty_size)
  1487. break;
  1488. node = rb_next(&last->offset_index);
  1489. if (!node) {
  1490. if (found_bitmap)
  1491. goto again;
  1492. ret = -ENOSPC;
  1493. goto out;
  1494. }
  1495. next = rb_entry(node, struct btrfs_free_space, offset_index);
  1496. /*
  1497. * we found a bitmap, so if this search doesn't result in a
  1498. * cluster, we know to go and search again for the bitmaps and
  1499. * start looking for space there
  1500. */
  1501. if (next->bitmap) {
  1502. if (!found_bitmap)
  1503. offset = next->offset;
  1504. found_bitmap = true;
  1505. last = next;
  1506. continue;
  1507. }
  1508. /*
  1509. * we haven't filled the empty size and the window is
  1510. * very large. reset and try again
  1511. */
  1512. if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
  1513. next->offset - window_start > (bytes + empty_size) * 2) {
  1514. entry = next;
  1515. window_start = entry->offset;
  1516. window_free = entry->bytes;
  1517. last = entry;
  1518. max_extent = entry->bytes;
  1519. } else {
  1520. last = next;
  1521. window_free += next->bytes;
  1522. if (entry->bytes > max_extent)
  1523. max_extent = entry->bytes;
  1524. }
  1525. }
  1526. cluster->window_start = entry->offset;
  1527. /*
  1528. * now we've found our entries, pull them out of the free space
  1529. * cache and put them into the cluster rbtree
  1530. *
  1531. * The cluster includes an rbtree, but only uses the offset index
  1532. * of each free space cache entry.
  1533. */
  1534. while (1) {
  1535. node = rb_next(&entry->offset_index);
  1536. if (entry->bitmap && node) {
  1537. entry = rb_entry(node, struct btrfs_free_space,
  1538. offset_index);
  1539. continue;
  1540. } else if (entry->bitmap && !node) {
  1541. break;
  1542. }
  1543. rb_erase(&entry->offset_index, &block_group->free_space_offset);
  1544. ret = tree_insert_offset(&cluster->root, entry->offset,
  1545. &entry->offset_index, 0);
  1546. BUG_ON(ret);
  1547. if (!node || entry == last)
  1548. break;
  1549. entry = rb_entry(node, struct btrfs_free_space, offset_index);
  1550. }
  1551. cluster->max_size = max_extent;
  1552. got_it:
  1553. ret = 0;
  1554. atomic_inc(&block_group->count);
  1555. list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
  1556. cluster->block_group = block_group;
  1557. out:
  1558. spin_unlock(&cluster->lock);
  1559. spin_unlock(&block_group->tree_lock);
  1560. return ret;
  1561. }
  1562. /*
  1563. * simple code to zero out a cluster
  1564. */
  1565. void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
  1566. {
  1567. spin_lock_init(&cluster->lock);
  1568. spin_lock_init(&cluster->refill_lock);
  1569. cluster->root = RB_ROOT;
  1570. cluster->max_size = 0;
  1571. cluster->points_to_bitmap = false;
  1572. INIT_LIST_HEAD(&cluster->block_group_list);
  1573. cluster->block_group = NULL;
  1574. }