page.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. /*
  2. * page.c - buffer/page management specific to NILFS
  3. *
  4. * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  19. *
  20. * Written by Ryusuke Konishi <ryusuke@osrg.net>,
  21. * Seiji Kihara <kihara@osrg.net>.
  22. */
  23. #include <linux/pagemap.h>
  24. #include <linux/writeback.h>
  25. #include <linux/swap.h>
  26. #include <linux/bitops.h>
  27. #include <linux/page-flags.h>
  28. #include <linux/list.h>
  29. #include <linux/highmem.h>
  30. #include <linux/pagevec.h>
  31. #include <linux/gfp.h>
  32. #include "nilfs.h"
  33. #include "page.h"
  34. #include "mdt.h"
  35. #define NILFS_BUFFER_INHERENT_BITS \
  36. ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
  37. (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \
  38. (1UL << BH_NILFS_Checked))
  39. static struct buffer_head *
  40. __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
  41. int blkbits, unsigned long b_state)
  42. {
  43. unsigned long first_block;
  44. struct buffer_head *bh;
  45. if (!page_has_buffers(page))
  46. create_empty_buffers(page, 1 << blkbits, b_state);
  47. first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
  48. bh = nilfs_page_get_nth_block(page, block - first_block);
  49. touch_buffer(bh);
  50. wait_on_buffer(bh);
  51. return bh;
  52. }
  53. /*
  54. * Since the page cache of B-tree node pages or data page cache of pseudo
  55. * inodes does not have a valid mapping->host pointer, calling
  56. * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
  57. * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
  58. * To avoid this problem, the old style mark_buffer_dirty() is used instead.
  59. */
  60. void nilfs_mark_buffer_dirty(struct buffer_head *bh)
  61. {
  62. if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
  63. __set_page_dirty_nobuffers(bh->b_page);
  64. }
  65. struct buffer_head *nilfs_grab_buffer(struct inode *inode,
  66. struct address_space *mapping,
  67. unsigned long blkoff,
  68. unsigned long b_state)
  69. {
  70. int blkbits = inode->i_blkbits;
  71. pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
  72. struct page *page;
  73. struct buffer_head *bh;
  74. page = grab_cache_page(mapping, index);
  75. if (unlikely(!page))
  76. return NULL;
  77. bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
  78. if (unlikely(!bh)) {
  79. unlock_page(page);
  80. page_cache_release(page);
  81. return NULL;
  82. }
  83. return bh;
  84. }
  85. /**
  86. * nilfs_forget_buffer - discard dirty state
  87. * @inode: owner inode of the buffer
  88. * @bh: buffer head of the buffer to be discarded
  89. */
  90. void nilfs_forget_buffer(struct buffer_head *bh)
  91. {
  92. struct page *page = bh->b_page;
  93. lock_buffer(bh);
  94. clear_buffer_nilfs_volatile(bh);
  95. clear_buffer_nilfs_checked(bh);
  96. clear_buffer_nilfs_redirected(bh);
  97. clear_buffer_dirty(bh);
  98. if (nilfs_page_buffers_clean(page))
  99. __nilfs_clear_page_dirty(page);
  100. clear_buffer_uptodate(bh);
  101. clear_buffer_mapped(bh);
  102. bh->b_blocknr = -1;
  103. ClearPageUptodate(page);
  104. ClearPageMappedToDisk(page);
  105. unlock_buffer(bh);
  106. brelse(bh);
  107. }
  108. /**
  109. * nilfs_copy_buffer -- copy buffer data and flags
  110. * @dbh: destination buffer
  111. * @sbh: source buffer
  112. */
  113. void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
  114. {
  115. void *kaddr0, *kaddr1;
  116. unsigned long bits;
  117. struct page *spage = sbh->b_page, *dpage = dbh->b_page;
  118. struct buffer_head *bh;
  119. kaddr0 = kmap_atomic(spage, KM_USER0);
  120. kaddr1 = kmap_atomic(dpage, KM_USER1);
  121. memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
  122. kunmap_atomic(kaddr1, KM_USER1);
  123. kunmap_atomic(kaddr0, KM_USER0);
  124. dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
  125. dbh->b_blocknr = sbh->b_blocknr;
  126. dbh->b_bdev = sbh->b_bdev;
  127. bh = dbh;
  128. bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
  129. while ((bh = bh->b_this_page) != dbh) {
  130. lock_buffer(bh);
  131. bits &= bh->b_state;
  132. unlock_buffer(bh);
  133. }
  134. if (bits & (1UL << BH_Uptodate))
  135. SetPageUptodate(dpage);
  136. else
  137. ClearPageUptodate(dpage);
  138. if (bits & (1UL << BH_Mapped))
  139. SetPageMappedToDisk(dpage);
  140. else
  141. ClearPageMappedToDisk(dpage);
  142. }
  143. /**
  144. * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
  145. * @page: page to be checked
  146. *
  147. * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
  148. * Otherwise, it returns non-zero value.
  149. */
  150. int nilfs_page_buffers_clean(struct page *page)
  151. {
  152. struct buffer_head *bh, *head;
  153. bh = head = page_buffers(page);
  154. do {
  155. if (buffer_dirty(bh))
  156. return 0;
  157. bh = bh->b_this_page;
  158. } while (bh != head);
  159. return 1;
  160. }
  161. void nilfs_page_bug(struct page *page)
  162. {
  163. struct address_space *m;
  164. unsigned long ino = 0;
  165. if (unlikely(!page)) {
  166. printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
  167. return;
  168. }
  169. m = page->mapping;
  170. if (m) {
  171. struct inode *inode = NILFS_AS_I(m);
  172. if (inode != NULL)
  173. ino = inode->i_ino;
  174. }
  175. printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
  176. "mapping=%p ino=%lu\n",
  177. page, atomic_read(&page->_count),
  178. (unsigned long long)page->index, page->flags, m, ino);
  179. if (page_has_buffers(page)) {
  180. struct buffer_head *bh, *head;
  181. int i = 0;
  182. bh = head = page_buffers(page);
  183. do {
  184. printk(KERN_CRIT
  185. " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
  186. i++, bh, atomic_read(&bh->b_count),
  187. (unsigned long long)bh->b_blocknr, bh->b_state);
  188. bh = bh->b_this_page;
  189. } while (bh != head);
  190. }
  191. }
  192. /**
  193. * nilfs_alloc_private_page - allocate a private page with buffer heads
  194. *
  195. * Return Value: On success, a pointer to the allocated page is returned.
  196. * On error, NULL is returned.
  197. */
  198. struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
  199. unsigned long state)
  200. {
  201. struct buffer_head *bh, *head, *tail;
  202. struct page *page;
  203. page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
  204. if (unlikely(!page))
  205. return NULL;
  206. lock_page(page);
  207. head = alloc_page_buffers(page, size, 0);
  208. if (unlikely(!head)) {
  209. unlock_page(page);
  210. __free_page(page);
  211. return NULL;
  212. }
  213. bh = head;
  214. do {
  215. bh->b_state = (1UL << BH_NILFS_Allocated) | state;
  216. tail = bh;
  217. bh->b_bdev = bdev;
  218. bh = bh->b_this_page;
  219. } while (bh);
  220. tail->b_this_page = head;
  221. attach_page_buffers(page, head);
  222. return page;
  223. }
  224. void nilfs_free_private_page(struct page *page)
  225. {
  226. BUG_ON(!PageLocked(page));
  227. BUG_ON(page->mapping);
  228. if (page_has_buffers(page) && !try_to_free_buffers(page))
  229. NILFS_PAGE_BUG(page, "failed to free page");
  230. unlock_page(page);
  231. __free_page(page);
  232. }
  233. /**
  234. * nilfs_copy_page -- copy the page with buffers
  235. * @dst: destination page
  236. * @src: source page
  237. * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
  238. *
  239. * This function is for both data pages and btnode pages. The dirty flag
  240. * should be treated by caller. The page must not be under i/o.
  241. * Both src and dst page must be locked
  242. */
  243. static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
  244. {
  245. struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
  246. unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
  247. BUG_ON(PageWriteback(dst));
  248. sbh = sbufs = page_buffers(src);
  249. if (!page_has_buffers(dst))
  250. create_empty_buffers(dst, sbh->b_size, 0);
  251. if (copy_dirty)
  252. mask |= (1UL << BH_Dirty);
  253. dbh = dbufs = page_buffers(dst);
  254. do {
  255. lock_buffer(sbh);
  256. lock_buffer(dbh);
  257. dbh->b_state = sbh->b_state & mask;
  258. dbh->b_blocknr = sbh->b_blocknr;
  259. dbh->b_bdev = sbh->b_bdev;
  260. sbh = sbh->b_this_page;
  261. dbh = dbh->b_this_page;
  262. } while (dbh != dbufs);
  263. copy_highpage(dst, src);
  264. if (PageUptodate(src) && !PageUptodate(dst))
  265. SetPageUptodate(dst);
  266. else if (!PageUptodate(src) && PageUptodate(dst))
  267. ClearPageUptodate(dst);
  268. if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
  269. SetPageMappedToDisk(dst);
  270. else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
  271. ClearPageMappedToDisk(dst);
  272. do {
  273. unlock_buffer(sbh);
  274. unlock_buffer(dbh);
  275. sbh = sbh->b_this_page;
  276. dbh = dbh->b_this_page;
  277. } while (dbh != dbufs);
  278. }
  279. int nilfs_copy_dirty_pages(struct address_space *dmap,
  280. struct address_space *smap)
  281. {
  282. struct pagevec pvec;
  283. unsigned int i;
  284. pgoff_t index = 0;
  285. int err = 0;
  286. pagevec_init(&pvec, 0);
  287. repeat:
  288. if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
  289. PAGEVEC_SIZE))
  290. return 0;
  291. for (i = 0; i < pagevec_count(&pvec); i++) {
  292. struct page *page = pvec.pages[i], *dpage;
  293. lock_page(page);
  294. if (unlikely(!PageDirty(page)))
  295. NILFS_PAGE_BUG(page, "inconsistent dirty state");
  296. dpage = grab_cache_page(dmap, page->index);
  297. if (unlikely(!dpage)) {
  298. /* No empty page is added to the page cache */
  299. err = -ENOMEM;
  300. unlock_page(page);
  301. break;
  302. }
  303. if (unlikely(!page_has_buffers(page)))
  304. NILFS_PAGE_BUG(page,
  305. "found empty page in dat page cache");
  306. nilfs_copy_page(dpage, page, 1);
  307. __set_page_dirty_nobuffers(dpage);
  308. unlock_page(dpage);
  309. page_cache_release(dpage);
  310. unlock_page(page);
  311. }
  312. pagevec_release(&pvec);
  313. cond_resched();
  314. if (likely(!err))
  315. goto repeat;
  316. return err;
  317. }
  318. /**
  319. * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
  320. * @dmap: destination page cache
  321. * @smap: source page cache
  322. *
  323. * No pages must no be added to the cache during this process.
  324. * This must be ensured by the caller.
  325. */
  326. void nilfs_copy_back_pages(struct address_space *dmap,
  327. struct address_space *smap)
  328. {
  329. struct pagevec pvec;
  330. unsigned int i, n;
  331. pgoff_t index = 0;
  332. int err;
  333. pagevec_init(&pvec, 0);
  334. repeat:
  335. n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
  336. if (!n)
  337. return;
  338. index = pvec.pages[n - 1]->index + 1;
  339. for (i = 0; i < pagevec_count(&pvec); i++) {
  340. struct page *page = pvec.pages[i], *dpage;
  341. pgoff_t offset = page->index;
  342. lock_page(page);
  343. dpage = find_lock_page(dmap, offset);
  344. if (dpage) {
  345. /* override existing page on the destination cache */
  346. WARN_ON(PageDirty(dpage));
  347. nilfs_copy_page(dpage, page, 0);
  348. unlock_page(dpage);
  349. page_cache_release(dpage);
  350. } else {
  351. struct page *page2;
  352. /* move the page to the destination cache */
  353. spin_lock_irq(&smap->tree_lock);
  354. page2 = radix_tree_delete(&smap->page_tree, offset);
  355. WARN_ON(page2 != page);
  356. smap->nrpages--;
  357. spin_unlock_irq(&smap->tree_lock);
  358. spin_lock_irq(&dmap->tree_lock);
  359. err = radix_tree_insert(&dmap->page_tree, offset, page);
  360. if (unlikely(err < 0)) {
  361. WARN_ON(err == -EEXIST);
  362. page->mapping = NULL;
  363. page_cache_release(page); /* for cache */
  364. } else {
  365. page->mapping = dmap;
  366. dmap->nrpages++;
  367. if (PageDirty(page))
  368. radix_tree_tag_set(&dmap->page_tree,
  369. offset,
  370. PAGECACHE_TAG_DIRTY);
  371. }
  372. spin_unlock_irq(&dmap->tree_lock);
  373. }
  374. unlock_page(page);
  375. }
  376. pagevec_release(&pvec);
  377. cond_resched();
  378. goto repeat;
  379. }
  380. void nilfs_clear_dirty_pages(struct address_space *mapping)
  381. {
  382. struct pagevec pvec;
  383. unsigned int i;
  384. pgoff_t index = 0;
  385. pagevec_init(&pvec, 0);
  386. while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
  387. PAGEVEC_SIZE)) {
  388. for (i = 0; i < pagevec_count(&pvec); i++) {
  389. struct page *page = pvec.pages[i];
  390. struct buffer_head *bh, *head;
  391. lock_page(page);
  392. ClearPageUptodate(page);
  393. ClearPageMappedToDisk(page);
  394. bh = head = page_buffers(page);
  395. do {
  396. lock_buffer(bh);
  397. clear_buffer_dirty(bh);
  398. clear_buffer_nilfs_volatile(bh);
  399. clear_buffer_nilfs_checked(bh);
  400. clear_buffer_nilfs_redirected(bh);
  401. clear_buffer_uptodate(bh);
  402. clear_buffer_mapped(bh);
  403. unlock_buffer(bh);
  404. bh = bh->b_this_page;
  405. } while (bh != head);
  406. __nilfs_clear_page_dirty(page);
  407. unlock_page(page);
  408. }
  409. pagevec_release(&pvec);
  410. cond_resched();
  411. }
  412. }
  413. unsigned nilfs_page_count_clean_buffers(struct page *page,
  414. unsigned from, unsigned to)
  415. {
  416. unsigned block_start, block_end;
  417. struct buffer_head *bh, *head;
  418. unsigned nc = 0;
  419. for (bh = head = page_buffers(page), block_start = 0;
  420. bh != head || !block_start;
  421. block_start = block_end, bh = bh->b_this_page) {
  422. block_end = block_start + bh->b_size;
  423. if (block_end > from && block_start < to && !buffer_dirty(bh))
  424. nc++;
  425. }
  426. return nc;
  427. }
  428. void nilfs_mapping_init_once(struct address_space *mapping)
  429. {
  430. memset(mapping, 0, sizeof(*mapping));
  431. INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
  432. spin_lock_init(&mapping->tree_lock);
  433. INIT_LIST_HEAD(&mapping->private_list);
  434. spin_lock_init(&mapping->private_lock);
  435. spin_lock_init(&mapping->i_mmap_lock);
  436. INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
  437. INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
  438. }
  439. void nilfs_mapping_init(struct address_space *mapping,
  440. struct backing_dev_info *bdi,
  441. const struct address_space_operations *aops)
  442. {
  443. mapping->host = NULL;
  444. mapping->flags = 0;
  445. mapping_set_gfp_mask(mapping, GFP_NOFS);
  446. mapping->assoc_mapping = NULL;
  447. mapping->backing_dev_info = bdi;
  448. mapping->a_ops = aops;
  449. }
  450. /*
  451. * NILFS2 needs clear_page_dirty() in the following two cases:
  452. *
  453. * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
  454. * page dirty flags when it copies back pages from the shadow cache
  455. * (gcdat->{i_mapping,i_btnode_cache}) to its original cache
  456. * (dat->{i_mapping,i_btnode_cache}).
  457. *
  458. * 2) Some B-tree operations like insertion or deletion may dispose buffers
  459. * in dirty state, and this needs to cancel the dirty state of their pages.
  460. */
  461. int __nilfs_clear_page_dirty(struct page *page)
  462. {
  463. struct address_space *mapping = page->mapping;
  464. if (mapping) {
  465. spin_lock_irq(&mapping->tree_lock);
  466. if (test_bit(PG_dirty, &page->flags)) {
  467. radix_tree_tag_clear(&mapping->page_tree,
  468. page_index(page),
  469. PAGECACHE_TAG_DIRTY);
  470. spin_unlock_irq(&mapping->tree_lock);
  471. return clear_page_dirty_for_io(page);
  472. }
  473. spin_unlock_irq(&mapping->tree_lock);
  474. return 0;
  475. }
  476. return TestClearPageDirty(page);
  477. }
  478. /**
  479. * nilfs_find_uncommitted_extent - find extent of uncommitted data
  480. * @inode: inode
  481. * @start_blk: start block offset (in)
  482. * @blkoff: start offset of the found extent (out)
  483. *
  484. * This function searches an extent of buffers marked "delayed" which
  485. * starts from a block offset equal to or larger than @start_blk. If
  486. * such an extent was found, this will store the start offset in
  487. * @blkoff and return its length in blocks. Otherwise, zero is
  488. * returned.
  489. */
  490. unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
  491. sector_t start_blk,
  492. sector_t *blkoff)
  493. {
  494. unsigned int i;
  495. pgoff_t index;
  496. unsigned int nblocks_in_page;
  497. unsigned long length = 0;
  498. sector_t b;
  499. struct pagevec pvec;
  500. struct page *page;
  501. if (inode->i_mapping->nrpages == 0)
  502. return 0;
  503. index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
  504. nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
  505. pagevec_init(&pvec, 0);
  506. repeat:
  507. pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
  508. pvec.pages);
  509. if (pvec.nr == 0)
  510. return length;
  511. if (length > 0 && pvec.pages[0]->index > index)
  512. goto out;
  513. b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
  514. i = 0;
  515. do {
  516. page = pvec.pages[i];
  517. lock_page(page);
  518. if (page_has_buffers(page)) {
  519. struct buffer_head *bh, *head;
  520. bh = head = page_buffers(page);
  521. do {
  522. if (b < start_blk)
  523. continue;
  524. if (buffer_delay(bh)) {
  525. if (length == 0)
  526. *blkoff = b;
  527. length++;
  528. } else if (length > 0) {
  529. goto out_locked;
  530. }
  531. } while (++b, bh = bh->b_this_page, bh != head);
  532. } else {
  533. if (length > 0)
  534. goto out_locked;
  535. b += nblocks_in_page;
  536. }
  537. unlock_page(page);
  538. } while (++i < pagevec_count(&pvec));
  539. index = page->index + 1;
  540. pagevec_release(&pvec);
  541. cond_resched();
  542. goto repeat;
  543. out_locked:
  544. unlock_page(page);
  545. out:
  546. pagevec_release(&pvec);
  547. return length;
  548. }