btnode.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. /*
  2. * btnode.c - NILFS B-tree node cache
  3. *
  4. * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  19. *
  20. * This file was originally written by Seiji Kihara <kihara@osrg.net>
  21. * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for
  22. * stabilization and simplification.
  23. *
  24. */
  25. #include <linux/types.h>
  26. #include <linux/buffer_head.h>
  27. #include <linux/mm.h>
  28. #include <linux/backing-dev.h>
  29. #include "nilfs.h"
  30. #include "mdt.h"
  31. #include "dat.h"
  32. #include "page.h"
  33. #include "btnode.h"
  34. void nilfs_btnode_cache_init_once(struct address_space *btnc)
  35. {
  36. INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC);
  37. spin_lock_init(&btnc->tree_lock);
  38. INIT_LIST_HEAD(&btnc->private_list);
  39. spin_lock_init(&btnc->private_lock);
  40. spin_lock_init(&btnc->i_mmap_lock);
  41. INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap);
  42. INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
  43. }
  44. static struct address_space_operations def_btnode_aops;
  45. void nilfs_btnode_cache_init(struct address_space *btnc)
  46. {
  47. btnc->host = NULL; /* can safely set to host inode ? */
  48. btnc->flags = 0;
  49. mapping_set_gfp_mask(btnc, GFP_NOFS);
  50. btnc->assoc_mapping = NULL;
  51. btnc->backing_dev_info = &default_backing_dev_info;
  52. btnc->a_ops = &def_btnode_aops;
  53. }
  54. void nilfs_btnode_cache_clear(struct address_space *btnc)
  55. {
  56. invalidate_mapping_pages(btnc, 0, -1);
  57. truncate_inode_pages(btnc, 0);
  58. }
  59. int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
  60. sector_t pblocknr, struct buffer_head **pbh,
  61. int newblk)
  62. {
  63. struct buffer_head *bh;
  64. struct inode *inode = NILFS_BTNC_I(btnc);
  65. int err;
  66. bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
  67. if (unlikely(!bh))
  68. return -ENOMEM;
  69. err = -EEXIST; /* internal code */
  70. if (newblk) {
  71. if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
  72. buffer_dirty(bh))) {
  73. brelse(bh);
  74. BUG();
  75. }
  76. bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
  77. bh->b_blocknr = blocknr;
  78. set_buffer_mapped(bh);
  79. set_buffer_uptodate(bh);
  80. goto found;
  81. }
  82. if (buffer_uptodate(bh) || buffer_dirty(bh))
  83. goto found;
  84. if (pblocknr == 0) {
  85. pblocknr = blocknr;
  86. if (inode->i_ino != NILFS_DAT_INO) {
  87. struct inode *dat =
  88. nilfs_dat_inode(NILFS_I_NILFS(inode));
  89. /* blocknr is a virtual block number */
  90. err = nilfs_dat_translate(dat, blocknr, &pblocknr);
  91. if (unlikely(err)) {
  92. brelse(bh);
  93. goto out_locked;
  94. }
  95. }
  96. }
  97. lock_buffer(bh);
  98. if (buffer_uptodate(bh)) {
  99. unlock_buffer(bh);
  100. err = -EEXIST; /* internal code */
  101. goto found;
  102. }
  103. set_buffer_mapped(bh);
  104. bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
  105. bh->b_blocknr = pblocknr; /* set block address for read */
  106. bh->b_end_io = end_buffer_read_sync;
  107. get_bh(bh);
  108. submit_bh(READ, bh);
  109. bh->b_blocknr = blocknr; /* set back to the given block address */
  110. err = 0;
  111. found:
  112. *pbh = bh;
  113. out_locked:
  114. unlock_page(bh->b_page);
  115. page_cache_release(bh->b_page);
  116. return err;
  117. }
  118. int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr,
  119. sector_t pblocknr, struct buffer_head **pbh, int newblk)
  120. {
  121. struct buffer_head *bh;
  122. int err;
  123. err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk);
  124. if (err == -EEXIST) /* internal code (cache hit) */
  125. return 0;
  126. if (unlikely(err))
  127. return err;
  128. bh = *pbh;
  129. wait_on_buffer(bh);
  130. if (!buffer_uptodate(bh)) {
  131. brelse(bh);
  132. return -EIO;
  133. }
  134. return 0;
  135. }
  136. /**
  137. * nilfs_btnode_delete - delete B-tree node buffer
  138. * @bh: buffer to be deleted
  139. *
  140. * nilfs_btnode_delete() invalidates the specified buffer and delete the page
  141. * including the buffer if the page gets unbusy.
  142. */
  143. void nilfs_btnode_delete(struct buffer_head *bh)
  144. {
  145. struct address_space *mapping;
  146. struct page *page = bh->b_page;
  147. pgoff_t index = page_index(page);
  148. int still_dirty;
  149. page_cache_get(page);
  150. lock_page(page);
  151. wait_on_page_writeback(page);
  152. nilfs_forget_buffer(bh);
  153. still_dirty = PageDirty(page);
  154. mapping = page->mapping;
  155. unlock_page(page);
  156. page_cache_release(page);
  157. if (!still_dirty && mapping)
  158. invalidate_inode_pages2_range(mapping, index, index);
  159. }
  160. /**
  161. * nilfs_btnode_prepare_change_key
  162. * prepare to move contents of the block for old key to one of new key.
  163. * the old buffer will not be removed, but might be reused for new buffer.
  164. * it might return -ENOMEM because of memory allocation errors,
  165. * and might return -EIO because of disk read errors.
  166. */
  167. int nilfs_btnode_prepare_change_key(struct address_space *btnc,
  168. struct nilfs_btnode_chkey_ctxt *ctxt)
  169. {
  170. struct buffer_head *obh, *nbh;
  171. struct inode *inode = NILFS_BTNC_I(btnc);
  172. __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
  173. int err;
  174. if (oldkey == newkey)
  175. return 0;
  176. obh = ctxt->bh;
  177. ctxt->newbh = NULL;
  178. if (inode->i_blkbits == PAGE_CACHE_SHIFT) {
  179. lock_page(obh->b_page);
  180. /*
  181. * We cannot call radix_tree_preload for the kernels older
  182. * than 2.6.23, because it is not exported for modules.
  183. */
  184. err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
  185. if (err)
  186. goto failed_unlock;
  187. /* BUG_ON(oldkey != obh->b_page->index); */
  188. if (unlikely(oldkey != obh->b_page->index))
  189. NILFS_PAGE_BUG(obh->b_page,
  190. "invalid oldkey %lld (newkey=%lld)",
  191. (unsigned long long)oldkey,
  192. (unsigned long long)newkey);
  193. retry:
  194. spin_lock_irq(&btnc->tree_lock);
  195. err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
  196. spin_unlock_irq(&btnc->tree_lock);
  197. /*
  198. * Note: page->index will not change to newkey until
  199. * nilfs_btnode_commit_change_key() will be called.
  200. * To protect the page in intermediate state, the page lock
  201. * is held.
  202. */
  203. radix_tree_preload_end();
  204. if (!err)
  205. return 0;
  206. else if (err != -EEXIST)
  207. goto failed_unlock;
  208. err = invalidate_inode_pages2_range(btnc, newkey, newkey);
  209. if (!err)
  210. goto retry;
  211. /* fallback to copy mode */
  212. unlock_page(obh->b_page);
  213. }
  214. err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1);
  215. if (likely(!err)) {
  216. BUG_ON(nbh == obh);
  217. ctxt->newbh = nbh;
  218. }
  219. return err;
  220. failed_unlock:
  221. unlock_page(obh->b_page);
  222. return err;
  223. }
  224. /**
  225. * nilfs_btnode_commit_change_key
  226. * commit the change_key operation prepared by prepare_change_key().
  227. */
  228. void nilfs_btnode_commit_change_key(struct address_space *btnc,
  229. struct nilfs_btnode_chkey_ctxt *ctxt)
  230. {
  231. struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh;
  232. __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
  233. struct page *opage;
  234. if (oldkey == newkey)
  235. return;
  236. if (nbh == NULL) { /* blocksize == pagesize */
  237. opage = obh->b_page;
  238. if (unlikely(oldkey != opage->index))
  239. NILFS_PAGE_BUG(opage,
  240. "invalid oldkey %lld (newkey=%lld)",
  241. (unsigned long long)oldkey,
  242. (unsigned long long)newkey);
  243. if (!test_set_buffer_dirty(obh) && TestSetPageDirty(opage))
  244. BUG();
  245. spin_lock_irq(&btnc->tree_lock);
  246. radix_tree_delete(&btnc->page_tree, oldkey);
  247. radix_tree_tag_set(&btnc->page_tree, newkey,
  248. PAGECACHE_TAG_DIRTY);
  249. spin_unlock_irq(&btnc->tree_lock);
  250. opage->index = obh->b_blocknr = newkey;
  251. unlock_page(opage);
  252. } else {
  253. nilfs_copy_buffer(nbh, obh);
  254. nilfs_btnode_mark_dirty(nbh);
  255. nbh->b_blocknr = newkey;
  256. ctxt->bh = nbh;
  257. nilfs_btnode_delete(obh); /* will decrement bh->b_count */
  258. }
  259. }
  260. /**
  261. * nilfs_btnode_abort_change_key
  262. * abort the change_key operation prepared by prepare_change_key().
  263. */
  264. void nilfs_btnode_abort_change_key(struct address_space *btnc,
  265. struct nilfs_btnode_chkey_ctxt *ctxt)
  266. {
  267. struct buffer_head *nbh = ctxt->newbh;
  268. __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
  269. if (oldkey == newkey)
  270. return;
  271. if (nbh == NULL) { /* blocksize == pagesize */
  272. spin_lock_irq(&btnc->tree_lock);
  273. radix_tree_delete(&btnc->page_tree, newkey);
  274. spin_unlock_irq(&btnc->tree_lock);
  275. unlock_page(ctxt->bh->b_page);
  276. } else
  277. brelse(nbh);
  278. }