free-space-cache.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. /*
  2. * Copyright (C) 2008 Red Hat. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/sched.h>
  19. #include "ctree.h"
  20. static int tree_insert_offset(struct rb_root *root, u64 offset,
  21. struct rb_node *node)
  22. {
  23. struct rb_node **p = &root->rb_node;
  24. struct rb_node *parent = NULL;
  25. struct btrfs_free_space *info;
  26. while (*p) {
  27. parent = *p;
  28. info = rb_entry(parent, struct btrfs_free_space, offset_index);
  29. if (offset < info->offset)
  30. p = &(*p)->rb_left;
  31. else if (offset > info->offset)
  32. p = &(*p)->rb_right;
  33. else
  34. return -EEXIST;
  35. }
  36. rb_link_node(node, parent, p);
  37. rb_insert_color(node, root);
  38. return 0;
  39. }
  40. static int tree_insert_bytes(struct rb_root *root, u64 bytes,
  41. struct rb_node *node)
  42. {
  43. struct rb_node **p = &root->rb_node;
  44. struct rb_node *parent = NULL;
  45. struct btrfs_free_space *info;
  46. while (*p) {
  47. parent = *p;
  48. info = rb_entry(parent, struct btrfs_free_space, bytes_index);
  49. if (bytes < info->bytes)
  50. p = &(*p)->rb_left;
  51. else
  52. p = &(*p)->rb_right;
  53. }
  54. rb_link_node(node, parent, p);
  55. rb_insert_color(node, root);
  56. return 0;
  57. }
  58. /*
  59. * searches the tree for the given offset.
  60. *
  61. * fuzzy == 1: this is used for allocations where we are given a hint of where
  62. * to look for free space. Because the hint may not be completely on an offset
  63. * mark, or the hint may no longer point to free space we need to fudge our
  64. * results a bit. So we look for free space starting at or after offset with at
  65. * least bytes size. We prefer to find as close to the given offset as we can.
  66. * Also if the offset is within a free space range, then we will return the free
  67. * space that contains the given offset, which means we can return a free space
  68. * chunk with an offset before the provided offset.
  69. *
  70. * fuzzy == 0: this is just a normal tree search. Give us the free space that
  71. * starts at the given offset which is at least bytes size, and if its not there
  72. * return NULL.
  73. */
  74. static struct btrfs_free_space *tree_search_offset(struct rb_root *root,
  75. u64 offset, u64 bytes,
  76. int fuzzy)
  77. {
  78. struct rb_node *n = root->rb_node;
  79. struct btrfs_free_space *entry, *ret = NULL;
  80. while (n) {
  81. entry = rb_entry(n, struct btrfs_free_space, offset_index);
  82. if (offset < entry->offset) {
  83. if (fuzzy &&
  84. (!ret || entry->offset < ret->offset) &&
  85. (bytes <= entry->bytes))
  86. ret = entry;
  87. n = n->rb_left;
  88. } else if (offset > entry->offset) {
  89. if (fuzzy &&
  90. (entry->offset + entry->bytes - 1) >= offset &&
  91. bytes <= entry->bytes) {
  92. ret = entry;
  93. break;
  94. }
  95. n = n->rb_right;
  96. } else {
  97. if (bytes > entry->bytes) {
  98. n = n->rb_right;
  99. continue;
  100. }
  101. ret = entry;
  102. break;
  103. }
  104. }
  105. return ret;
  106. }
  107. /*
  108. * return a chunk at least bytes size, as close to offset that we can get.
  109. */
  110. static struct btrfs_free_space *tree_search_bytes(struct rb_root *root,
  111. u64 offset, u64 bytes)
  112. {
  113. struct rb_node *n = root->rb_node;
  114. struct btrfs_free_space *entry, *ret = NULL;
  115. while (n) {
  116. entry = rb_entry(n, struct btrfs_free_space, bytes_index);
  117. if (bytes < entry->bytes) {
  118. /*
  119. * We prefer to get a hole size as close to the size we
  120. * are asking for so we don't take small slivers out of
  121. * huge holes, but we also want to get as close to the
  122. * offset as possible so we don't have a whole lot of
  123. * fragmentation.
  124. */
  125. if (offset <= entry->offset) {
  126. if (!ret)
  127. ret = entry;
  128. else if (entry->bytes < ret->bytes)
  129. ret = entry;
  130. else if (entry->offset < ret->offset)
  131. ret = entry;
  132. }
  133. n = n->rb_left;
  134. } else if (bytes > entry->bytes) {
  135. n = n->rb_right;
  136. } else {
  137. /*
  138. * Ok we may have multiple chunks of the wanted size,
  139. * so we don't want to take the first one we find, we
  140. * want to take the one closest to our given offset, so
  141. * keep searching just in case theres a better match.
  142. */
  143. n = n->rb_right;
  144. if (offset > entry->offset)
  145. continue;
  146. else if (!ret || entry->offset < ret->offset)
  147. ret = entry;
  148. }
  149. }
  150. return ret;
  151. }
  152. static void unlink_free_space(struct btrfs_block_group_cache *block_group,
  153. struct btrfs_free_space *info)
  154. {
  155. rb_erase(&info->offset_index, &block_group->free_space_offset);
  156. rb_erase(&info->bytes_index, &block_group->free_space_bytes);
  157. }
  158. static int link_free_space(struct btrfs_block_group_cache *block_group,
  159. struct btrfs_free_space *info)
  160. {
  161. int ret = 0;
  162. BUG_ON(!info->bytes);
  163. ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
  164. &info->offset_index);
  165. if (ret)
  166. return ret;
  167. ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes,
  168. &info->bytes_index);
  169. if (ret)
  170. return ret;
  171. return ret;
  172. }
  173. int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
  174. u64 offset, u64 bytes)
  175. {
  176. struct btrfs_free_space *right_info;
  177. struct btrfs_free_space *left_info;
  178. struct btrfs_free_space *info = NULL;
  179. int ret = 0;
  180. info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
  181. if (!info)
  182. return -ENOMEM;
  183. info->offset = offset;
  184. info->bytes = bytes;
  185. spin_lock(&block_group->tree_lock);
  186. /*
  187. * first we want to see if there is free space adjacent to the range we
  188. * are adding, if there is remove that struct and add a new one to
  189. * cover the entire range
  190. */
  191. right_info = tree_search_offset(&block_group->free_space_offset,
  192. offset+bytes, 0, 0);
  193. left_info = tree_search_offset(&block_group->free_space_offset,
  194. offset-1, 0, 1);
  195. if (right_info) {
  196. unlink_free_space(block_group, right_info);
  197. info->bytes += right_info->bytes;
  198. kfree(right_info);
  199. }
  200. if (left_info && left_info->offset + left_info->bytes == offset) {
  201. unlink_free_space(block_group, left_info);
  202. info->offset = left_info->offset;
  203. info->bytes += left_info->bytes;
  204. kfree(left_info);
  205. }
  206. ret = link_free_space(block_group, info);
  207. if (ret)
  208. kfree(info);
  209. spin_unlock(&block_group->tree_lock);
  210. if (ret) {
  211. printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret);
  212. if (ret == -EEXIST)
  213. BUG();
  214. }
  215. return ret;
  216. }
  217. int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
  218. u64 offset, u64 bytes)
  219. {
  220. struct btrfs_free_space *info;
  221. int ret = 0;
  222. spin_lock(&block_group->tree_lock);
  223. info = tree_search_offset(&block_group->free_space_offset, offset, 0,
  224. 1);
  225. if (info && info->offset == offset) {
  226. if (info->bytes < bytes) {
  227. printk(KERN_ERR "Found free space at %llu, size %llu,"
  228. "trying to use %llu\n",
  229. (unsigned long long)info->offset,
  230. (unsigned long long)info->bytes,
  231. (unsigned long long)bytes);
  232. WARN_ON(1);
  233. ret = -EINVAL;
  234. spin_unlock(&block_group->tree_lock);
  235. goto out;
  236. }
  237. unlink_free_space(block_group, info);
  238. if (info->bytes == bytes) {
  239. kfree(info);
  240. spin_unlock(&block_group->tree_lock);
  241. goto out;
  242. }
  243. info->offset += bytes;
  244. info->bytes -= bytes;
  245. ret = link_free_space(block_group, info);
  246. spin_unlock(&block_group->tree_lock);
  247. BUG_ON(ret);
  248. } else if (info && info->offset < offset &&
  249. info->offset + info->bytes >= offset + bytes) {
  250. u64 old_start = info->offset;
  251. /*
  252. * we're freeing space in the middle of the info,
  253. * this can happen during tree log replay
  254. *
  255. * first unlink the old info and then
  256. * insert it again after the hole we're creating
  257. */
  258. unlink_free_space(block_group, info);
  259. if (offset + bytes < info->offset + info->bytes) {
  260. u64 old_end = info->offset + info->bytes;
  261. info->offset = offset + bytes;
  262. info->bytes = old_end - info->offset;
  263. ret = link_free_space(block_group, info);
  264. BUG_ON(ret);
  265. } else {
  266. /* the hole we're creating ends at the end
  267. * of the info struct, just free the info
  268. */
  269. kfree(info);
  270. }
  271. spin_unlock(&block_group->tree_lock);
  272. /* step two, insert a new info struct to cover anything
  273. * before the hole
  274. */
  275. ret = btrfs_add_free_space(block_group, old_start,
  276. offset - old_start);
  277. BUG_ON(ret);
  278. } else {
  279. spin_unlock(&block_group->tree_lock);
  280. if (!info) {
  281. printk(KERN_ERR "couldn't find space %llu to free\n",
  282. (unsigned long long)offset);
  283. printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n",
  284. block_group->cached, block_group->key.objectid,
  285. block_group->key.offset);
  286. btrfs_dump_free_space(block_group, bytes);
  287. } else if (info) {
  288. printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, "
  289. "but wanted offset=%llu bytes=%llu\n",
  290. info->offset, info->bytes, offset, bytes);
  291. }
  292. WARN_ON(1);
  293. }
  294. out:
  295. return ret;
  296. }
  297. void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
  298. u64 bytes)
  299. {
  300. struct btrfs_free_space *info;
  301. struct rb_node *n;
  302. int count = 0;
  303. for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
  304. info = rb_entry(n, struct btrfs_free_space, offset_index);
  305. if (info->bytes >= bytes)
  306. count++;
  307. printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset,
  308. info->bytes);
  309. }
  310. printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
  311. "\n", count);
  312. }
  313. u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
  314. {
  315. struct btrfs_free_space *info;
  316. struct rb_node *n;
  317. u64 ret = 0;
  318. for (n = rb_first(&block_group->free_space_offset); n;
  319. n = rb_next(n)) {
  320. info = rb_entry(n, struct btrfs_free_space, offset_index);
  321. ret += info->bytes;
  322. }
  323. return ret;
  324. }
  325. void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
  326. {
  327. struct btrfs_free_space *info;
  328. struct rb_node *node;
  329. spin_lock(&block_group->tree_lock);
  330. while ((node = rb_last(&block_group->free_space_bytes)) != NULL) {
  331. info = rb_entry(node, struct btrfs_free_space, bytes_index);
  332. unlink_free_space(block_group, info);
  333. kfree(info);
  334. if (need_resched()) {
  335. spin_unlock(&block_group->tree_lock);
  336. cond_resched();
  337. spin_lock(&block_group->tree_lock);
  338. }
  339. }
  340. spin_unlock(&block_group->tree_lock);
  341. }
  342. u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
  343. u64 offset, u64 bytes, u64 empty_size)
  344. {
  345. struct btrfs_free_space *entry = NULL;
  346. u64 ret = 0;
  347. spin_lock(&block_group->tree_lock);
  348. entry = tree_search_offset(&block_group->free_space_offset, offset,
  349. bytes + empty_size, 1);
  350. if (!entry)
  351. entry = tree_search_bytes(&block_group->free_space_bytes,
  352. offset, bytes + empty_size);
  353. if (entry) {
  354. unlink_free_space(block_group, entry);
  355. ret = entry->offset;
  356. entry->offset += bytes;
  357. entry->bytes -= bytes;
  358. if (!entry->bytes)
  359. kfree(entry);
  360. else
  361. link_free_space(block_group, entry);
  362. }
  363. spin_unlock(&block_group->tree_lock);
  364. return ret;
  365. }