aops.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182
  1. /*
  2. * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
  3. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
  4. *
  5. * This copyrighted material is made available to anyone wishing to use,
  6. * modify, copy, or redistribute it subject to the terms and conditions
  7. * of the GNU General Public License version 2.
  8. */
  9. #include <linux/sched.h>
  10. #include <linux/slab.h>
  11. #include <linux/spinlock.h>
  12. #include <linux/completion.h>
  13. #include <linux/buffer_head.h>
  14. #include <linux/pagemap.h>
  15. #include <linux/pagevec.h>
  16. #include <linux/mpage.h>
  17. #include <linux/fs.h>
  18. #include <linux/writeback.h>
  19. #include <linux/swap.h>
  20. #include <linux/gfs2_ondisk.h>
  21. #include <linux/backing-dev.h>
  22. #include "gfs2.h"
  23. #include "incore.h"
  24. #include "bmap.h"
  25. #include "glock.h"
  26. #include "inode.h"
  27. #include "log.h"
  28. #include "meta_io.h"
  29. #include "quota.h"
  30. #include "trans.h"
  31. #include "rgrp.h"
  32. #include "super.h"
  33. #include "util.h"
  34. #include "glops.h"
  35. static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
  36. unsigned int from, unsigned int to)
  37. {
  38. struct buffer_head *head = page_buffers(page);
  39. unsigned int bsize = head->b_size;
  40. struct buffer_head *bh;
  41. unsigned int start, end;
  42. for (bh = head, start = 0; bh != head || !start;
  43. bh = bh->b_this_page, start = end) {
  44. end = start + bsize;
  45. if (end <= from || start >= to)
  46. continue;
  47. if (gfs2_is_jdata(ip))
  48. set_buffer_uptodate(bh);
  49. gfs2_trans_add_bh(ip->i_gl, bh, 0);
  50. }
  51. }
  52. /**
  53. * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
  54. * @inode: The inode
  55. * @lblock: The block number to look up
  56. * @bh_result: The buffer head to return the result in
  57. * @create: Non-zero if we may add block to the file
  58. *
  59. * Returns: errno
  60. */
  61. static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
  62. struct buffer_head *bh_result, int create)
  63. {
  64. int error;
  65. error = gfs2_block_map(inode, lblock, bh_result, 0);
  66. if (error)
  67. return error;
  68. if (!buffer_mapped(bh_result))
  69. return -EIO;
  70. return 0;
  71. }
  72. static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
  73. struct buffer_head *bh_result, int create)
  74. {
  75. return gfs2_block_map(inode, lblock, bh_result, 0);
  76. }
  77. /**
  78. * gfs2_writepage_common - Common bits of writepage
  79. * @page: The page to be written
  80. * @wbc: The writeback control
  81. *
  82. * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
  83. */
  84. static int gfs2_writepage_common(struct page *page,
  85. struct writeback_control *wbc)
  86. {
  87. struct inode *inode = page->mapping->host;
  88. struct gfs2_inode *ip = GFS2_I(inode);
  89. struct gfs2_sbd *sdp = GFS2_SB(inode);
  90. loff_t i_size = i_size_read(inode);
  91. pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
  92. unsigned offset;
  93. if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
  94. goto out;
  95. if (current->journal_info)
  96. goto redirty;
  97. /* Is the page fully outside i_size? (truncate in progress) */
  98. offset = i_size & (PAGE_CACHE_SIZE-1);
  99. if (page->index > end_index || (page->index == end_index && !offset)) {
  100. page->mapping->a_ops->invalidatepage(page, 0);
  101. goto out;
  102. }
  103. return 1;
  104. redirty:
  105. redirty_page_for_writepage(wbc, page);
  106. out:
  107. unlock_page(page);
  108. return 0;
  109. }
  110. /**
  111. * gfs2_writeback_writepage - Write page for writeback mappings
  112. * @page: The page
  113. * @wbc: The writeback control
  114. *
  115. */
  116. static int gfs2_writeback_writepage(struct page *page,
  117. struct writeback_control *wbc)
  118. {
  119. int ret;
  120. ret = gfs2_writepage_common(page, wbc);
  121. if (ret <= 0)
  122. return ret;
  123. return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
  124. }
  125. /**
  126. * gfs2_ordered_writepage - Write page for ordered data files
  127. * @page: The page to write
  128. * @wbc: The writeback control
  129. *
  130. */
  131. static int gfs2_ordered_writepage(struct page *page,
  132. struct writeback_control *wbc)
  133. {
  134. struct inode *inode = page->mapping->host;
  135. struct gfs2_inode *ip = GFS2_I(inode);
  136. int ret;
  137. ret = gfs2_writepage_common(page, wbc);
  138. if (ret <= 0)
  139. return ret;
  140. if (!page_has_buffers(page)) {
  141. create_empty_buffers(page, inode->i_sb->s_blocksize,
  142. (1 << BH_Dirty)|(1 << BH_Uptodate));
  143. }
  144. gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1);
  145. return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
  146. }
  147. /**
  148. * __gfs2_jdata_writepage - The core of jdata writepage
  149. * @page: The page to write
  150. * @wbc: The writeback control
  151. *
  152. * This is shared between writepage and writepages and implements the
  153. * core of the writepage operation. If a transaction is required then
  154. * PageChecked will have been set and the transaction will have
  155. * already been started before this is called.
  156. */
  157. static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
  158. {
  159. struct inode *inode = page->mapping->host;
  160. struct gfs2_inode *ip = GFS2_I(inode);
  161. struct gfs2_sbd *sdp = GFS2_SB(inode);
  162. if (PageChecked(page)) {
  163. ClearPageChecked(page);
  164. if (!page_has_buffers(page)) {
  165. create_empty_buffers(page, inode->i_sb->s_blocksize,
  166. (1 << BH_Dirty)|(1 << BH_Uptodate));
  167. }
  168. gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
  169. }
  170. return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
  171. }
  172. /**
  173. * gfs2_jdata_writepage - Write complete page
  174. * @page: Page to write
  175. *
  176. * Returns: errno
  177. *
  178. */
  179. static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
  180. {
  181. struct inode *inode = page->mapping->host;
  182. struct gfs2_sbd *sdp = GFS2_SB(inode);
  183. int ret;
  184. int done_trans = 0;
  185. if (PageChecked(page)) {
  186. if (wbc->sync_mode != WB_SYNC_ALL)
  187. goto out_ignore;
  188. ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
  189. if (ret)
  190. goto out_ignore;
  191. done_trans = 1;
  192. }
  193. ret = gfs2_writepage_common(page, wbc);
  194. if (ret > 0)
  195. ret = __gfs2_jdata_writepage(page, wbc);
  196. if (done_trans)
  197. gfs2_trans_end(sdp);
  198. return ret;
  199. out_ignore:
  200. redirty_page_for_writepage(wbc, page);
  201. unlock_page(page);
  202. return 0;
  203. }
  204. /**
  205. * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk
  206. * @mapping: The mapping to write
  207. * @wbc: Write-back control
  208. *
  209. * For the data=writeback case we can already ignore buffer heads
  210. * and write whole extents at once. This is a big reduction in the
  211. * number of I/O requests we send and the bmap calls we make in this case.
  212. */
  213. static int gfs2_writeback_writepages(struct address_space *mapping,
  214. struct writeback_control *wbc)
  215. {
  216. return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
  217. }
  218. /**
  219. * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
  220. * @mapping: The mapping
  221. * @wbc: The writeback control
  222. * @writepage: The writepage function to call for each page
  223. * @pvec: The vector of pages
  224. * @nr_pages: The number of pages to write
  225. *
  226. * Returns: non-zero if loop should terminate, zero otherwise
  227. */
  228. static int gfs2_write_jdata_pagevec(struct address_space *mapping,
  229. struct writeback_control *wbc,
  230. struct pagevec *pvec,
  231. int nr_pages, pgoff_t end)
  232. {
  233. struct inode *inode = mapping->host;
  234. struct gfs2_sbd *sdp = GFS2_SB(inode);
  235. loff_t i_size = i_size_read(inode);
  236. pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
  237. unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
  238. unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
  239. int i;
  240. int ret;
  241. ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
  242. if (ret < 0)
  243. return ret;
  244. for(i = 0; i < nr_pages; i++) {
  245. struct page *page = pvec->pages[i];
  246. lock_page(page);
  247. if (unlikely(page->mapping != mapping)) {
  248. unlock_page(page);
  249. continue;
  250. }
  251. if (!wbc->range_cyclic && page->index > end) {
  252. ret = 1;
  253. unlock_page(page);
  254. continue;
  255. }
  256. if (wbc->sync_mode != WB_SYNC_NONE)
  257. wait_on_page_writeback(page);
  258. if (PageWriteback(page) ||
  259. !clear_page_dirty_for_io(page)) {
  260. unlock_page(page);
  261. continue;
  262. }
  263. /* Is the page fully outside i_size? (truncate in progress) */
  264. if (page->index > end_index || (page->index == end_index && !offset)) {
  265. page->mapping->a_ops->invalidatepage(page, 0);
  266. unlock_page(page);
  267. continue;
  268. }
  269. ret = __gfs2_jdata_writepage(page, wbc);
  270. if (ret || (--(wbc->nr_to_write) <= 0))
  271. ret = 1;
  272. }
  273. gfs2_trans_end(sdp);
  274. return ret;
  275. }
  276. /**
  277. * gfs2_write_cache_jdata - Like write_cache_pages but different
  278. * @mapping: The mapping to write
  279. * @wbc: The writeback control
  280. * @writepage: The writepage function to call
  281. * @data: The data to pass to writepage
  282. *
  283. * The reason that we use our own function here is that we need to
  284. * start transactions before we grab page locks. This allows us
  285. * to get the ordering right.
  286. */
  287. static int gfs2_write_cache_jdata(struct address_space *mapping,
  288. struct writeback_control *wbc)
  289. {
  290. int ret = 0;
  291. int done = 0;
  292. struct pagevec pvec;
  293. int nr_pages;
  294. pgoff_t index;
  295. pgoff_t end;
  296. int scanned = 0;
  297. int range_whole = 0;
  298. pagevec_init(&pvec, 0);
  299. if (wbc->range_cyclic) {
  300. index = mapping->writeback_index; /* Start from prev offset */
  301. end = -1;
  302. } else {
  303. index = wbc->range_start >> PAGE_CACHE_SHIFT;
  304. end = wbc->range_end >> PAGE_CACHE_SHIFT;
  305. if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
  306. range_whole = 1;
  307. scanned = 1;
  308. }
  309. retry:
  310. while (!done && (index <= end) &&
  311. (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
  312. PAGECACHE_TAG_DIRTY,
  313. min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
  314. scanned = 1;
  315. ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end);
  316. if (ret)
  317. done = 1;
  318. if (ret > 0)
  319. ret = 0;
  320. pagevec_release(&pvec);
  321. cond_resched();
  322. }
  323. if (!scanned && !done) {
  324. /*
  325. * We hit the last page and there is more work to be done: wrap
  326. * back to the start of the file
  327. */
  328. scanned = 1;
  329. index = 0;
  330. goto retry;
  331. }
  332. if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
  333. mapping->writeback_index = index;
  334. return ret;
  335. }
  336. /**
  337. * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
  338. * @mapping: The mapping to write
  339. * @wbc: The writeback control
  340. *
  341. */
  342. static int gfs2_jdata_writepages(struct address_space *mapping,
  343. struct writeback_control *wbc)
  344. {
  345. struct gfs2_inode *ip = GFS2_I(mapping->host);
  346. struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
  347. int ret;
  348. ret = gfs2_write_cache_jdata(mapping, wbc);
  349. if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
  350. gfs2_log_flush(sdp, ip->i_gl);
  351. ret = gfs2_write_cache_jdata(mapping, wbc);
  352. }
  353. return ret;
  354. }
  355. /**
  356. * stuffed_readpage - Fill in a Linux page with stuffed file data
  357. * @ip: the inode
  358. * @page: the page
  359. *
  360. * Returns: errno
  361. */
  362. static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
  363. {
  364. struct buffer_head *dibh;
  365. u64 dsize = i_size_read(&ip->i_inode);
  366. void *kaddr;
  367. int error;
  368. /*
  369. * Due to the order of unstuffing files and ->fault(), we can be
  370. * asked for a zero page in the case of a stuffed file being extended,
  371. * so we need to supply one here. It doesn't happen often.
  372. */
  373. if (unlikely(page->index)) {
  374. zero_user(page, 0, PAGE_CACHE_SIZE);
  375. SetPageUptodate(page);
  376. return 0;
  377. }
  378. error = gfs2_meta_inode_buffer(ip, &dibh);
  379. if (error)
  380. return error;
  381. kaddr = kmap_atomic(page, KM_USER0);
  382. if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
  383. dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
  384. memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
  385. memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
  386. kunmap_atomic(kaddr, KM_USER0);
  387. flush_dcache_page(page);
  388. brelse(dibh);
  389. SetPageUptodate(page);
  390. return 0;
  391. }
  392. /**
  393. * __gfs2_readpage - readpage
  394. * @file: The file to read a page for
  395. * @page: The page to read
  396. *
  397. * This is the core of gfs2's readpage. Its used by the internal file
  398. * reading code as in that case we already hold the glock. Also its
  399. * called by gfs2_readpage() once the required lock has been granted.
  400. *
  401. */
  402. static int __gfs2_readpage(void *file, struct page *page)
  403. {
  404. struct gfs2_inode *ip = GFS2_I(page->mapping->host);
  405. struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
  406. int error;
  407. if (gfs2_is_stuffed(ip)) {
  408. error = stuffed_readpage(ip, page);
  409. unlock_page(page);
  410. } else {
  411. error = mpage_readpage(page, gfs2_block_map);
  412. }
  413. if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
  414. return -EIO;
  415. return error;
  416. }
  417. /**
  418. * gfs2_readpage - read a page of a file
  419. * @file: The file to read
  420. * @page: The page of the file
  421. *
  422. * This deals with the locking required. We have to unlock and
  423. * relock the page in order to get the locking in the right
  424. * order.
  425. */
  426. static int gfs2_readpage(struct file *file, struct page *page)
  427. {
  428. struct address_space *mapping = page->mapping;
  429. struct gfs2_inode *ip = GFS2_I(mapping->host);
  430. struct gfs2_holder gh;
  431. int error;
  432. unlock_page(page);
  433. gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
  434. error = gfs2_glock_nq(&gh);
  435. if (unlikely(error))
  436. goto out;
  437. error = AOP_TRUNCATED_PAGE;
  438. lock_page(page);
  439. if (page->mapping == mapping && !PageUptodate(page))
  440. error = __gfs2_readpage(file, page);
  441. else
  442. unlock_page(page);
  443. gfs2_glock_dq(&gh);
  444. out:
  445. gfs2_holder_uninit(&gh);
  446. if (error && error != AOP_TRUNCATED_PAGE)
  447. lock_page(page);
  448. return error;
  449. }
  450. /**
  451. * gfs2_internal_read - read an internal file
  452. * @ip: The gfs2 inode
  453. * @ra_state: The readahead state (or NULL for no readahead)
  454. * @buf: The buffer to fill
  455. * @pos: The file position
  456. * @size: The amount to read
  457. *
  458. */
  459. int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
  460. char *buf, loff_t *pos, unsigned size)
  461. {
  462. struct address_space *mapping = ip->i_inode.i_mapping;
  463. unsigned long index = *pos / PAGE_CACHE_SIZE;
  464. unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
  465. unsigned copied = 0;
  466. unsigned amt;
  467. struct page *page;
  468. void *p;
  469. do {
  470. amt = size - copied;
  471. if (offset + size > PAGE_CACHE_SIZE)
  472. amt = PAGE_CACHE_SIZE - offset;
  473. page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
  474. if (IS_ERR(page))
  475. return PTR_ERR(page);
  476. p = kmap_atomic(page, KM_USER0);
  477. memcpy(buf + copied, p + offset, amt);
  478. kunmap_atomic(p, KM_USER0);
  479. mark_page_accessed(page);
  480. page_cache_release(page);
  481. copied += amt;
  482. index++;
  483. offset = 0;
  484. } while(copied < size);
  485. (*pos) += size;
  486. return size;
  487. }
  488. /**
  489. * gfs2_readpages - Read a bunch of pages at once
  490. *
  491. * Some notes:
  492. * 1. This is only for readahead, so we can simply ignore any things
  493. * which are slightly inconvenient (such as locking conflicts between
  494. * the page lock and the glock) and return having done no I/O. Its
  495. * obviously not something we'd want to do on too regular a basis.
  496. * Any I/O we ignore at this time will be done via readpage later.
  497. * 2. We don't handle stuffed files here we let readpage do the honours.
  498. * 3. mpage_readpages() does most of the heavy lifting in the common case.
  499. * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
  500. */
  501. static int gfs2_readpages(struct file *file, struct address_space *mapping,
  502. struct list_head *pages, unsigned nr_pages)
  503. {
  504. struct inode *inode = mapping->host;
  505. struct gfs2_inode *ip = GFS2_I(inode);
  506. struct gfs2_sbd *sdp = GFS2_SB(inode);
  507. struct gfs2_holder gh;
  508. int ret;
  509. gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
  510. ret = gfs2_glock_nq(&gh);
  511. if (unlikely(ret))
  512. goto out_uninit;
  513. if (!gfs2_is_stuffed(ip))
  514. ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
  515. gfs2_glock_dq(&gh);
  516. out_uninit:
  517. gfs2_holder_uninit(&gh);
  518. if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
  519. ret = -EIO;
  520. return ret;
  521. }
  522. /**
  523. * gfs2_write_begin - Begin to write to a file
  524. * @file: The file to write to
  525. * @mapping: The mapping in which to write
  526. * @pos: The file offset at which to start writing
  527. * @len: Length of the write
  528. * @flags: Various flags
  529. * @pagep: Pointer to return the page
  530. * @fsdata: Pointer to return fs data (unused by GFS2)
  531. *
  532. * Returns: errno
  533. */
  534. static int gfs2_write_begin(struct file *file, struct address_space *mapping,
  535. loff_t pos, unsigned len, unsigned flags,
  536. struct page **pagep, void **fsdata)
  537. {
  538. struct gfs2_inode *ip = GFS2_I(mapping->host);
  539. struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
  540. struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
  541. unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
  542. int alloc_required;
  543. int error = 0;
  544. struct gfs2_alloc *al;
  545. pgoff_t index = pos >> PAGE_CACHE_SHIFT;
  546. unsigned from = pos & (PAGE_CACHE_SIZE - 1);
  547. unsigned to = from + len;
  548. struct page *page;
  549. gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
  550. error = gfs2_glock_nq(&ip->i_gh);
  551. if (unlikely(error))
  552. goto out_uninit;
  553. if (&ip->i_inode == sdp->sd_rindex) {
  554. error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
  555. GL_NOCACHE, &m_ip->i_gh);
  556. if (unlikely(error)) {
  557. gfs2_glock_dq(&ip->i_gh);
  558. goto out_uninit;
  559. }
  560. }
  561. alloc_required = gfs2_write_alloc_required(ip, pos, len);
  562. if (alloc_required || gfs2_is_jdata(ip))
  563. gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
  564. if (alloc_required) {
  565. al = gfs2_alloc_get(ip);
  566. if (!al) {
  567. error = -ENOMEM;
  568. goto out_unlock;
  569. }
  570. error = gfs2_quota_lock_check(ip);
  571. if (error)
  572. goto out_alloc_put;
  573. al->al_requested = data_blocks + ind_blocks;
  574. error = gfs2_inplace_reserve(ip);
  575. if (error)
  576. goto out_qunlock;
  577. }
  578. rblocks = RES_DINODE + ind_blocks;
  579. if (gfs2_is_jdata(ip))
  580. rblocks += data_blocks ? data_blocks : 1;
  581. if (ind_blocks || data_blocks)
  582. rblocks += RES_STATFS + RES_QUOTA;
  583. if (&ip->i_inode == sdp->sd_rindex)
  584. rblocks += 2 * RES_STATFS;
  585. error = gfs2_trans_begin(sdp, rblocks,
  586. PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
  587. if (error)
  588. goto out_trans_fail;
  589. error = -ENOMEM;
  590. flags |= AOP_FLAG_NOFS;
  591. page = grab_cache_page_write_begin(mapping, index, flags);
  592. *pagep = page;
  593. if (unlikely(!page))
  594. goto out_endtrans;
  595. if (gfs2_is_stuffed(ip)) {
  596. error = 0;
  597. if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
  598. error = gfs2_unstuff_dinode(ip, page);
  599. if (error == 0)
  600. goto prepare_write;
  601. } else if (!PageUptodate(page)) {
  602. error = stuffed_readpage(ip, page);
  603. }
  604. goto out;
  605. }
  606. prepare_write:
  607. error = block_prepare_write(page, from, to, gfs2_block_map);
  608. out:
  609. if (error == 0)
  610. return 0;
  611. page_cache_release(page);
  612. /*
  613. * XXX(truncate): the call below should probably be replaced with
  614. * a call to the gfs2-specific truncate blocks helper to actually
  615. * release disk blocks..
  616. */
  617. if (pos + len > ip->i_inode.i_size)
  618. truncate_setsize(&ip->i_inode, ip->i_inode.i_size);
  619. out_endtrans:
  620. gfs2_trans_end(sdp);
  621. out_trans_fail:
  622. if (alloc_required) {
  623. gfs2_inplace_release(ip);
  624. out_qunlock:
  625. gfs2_quota_unlock(ip);
  626. out_alloc_put:
  627. gfs2_alloc_put(ip);
  628. }
  629. out_unlock:
  630. if (&ip->i_inode == sdp->sd_rindex) {
  631. gfs2_glock_dq(&m_ip->i_gh);
  632. gfs2_holder_uninit(&m_ip->i_gh);
  633. }
  634. gfs2_glock_dq(&ip->i_gh);
  635. out_uninit:
  636. gfs2_holder_uninit(&ip->i_gh);
  637. return error;
  638. }
  639. /**
  640. * adjust_fs_space - Adjusts the free space available due to gfs2_grow
  641. * @inode: the rindex inode
  642. */
  643. static void adjust_fs_space(struct inode *inode)
  644. {
  645. struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
  646. struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
  647. struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
  648. struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
  649. struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
  650. struct buffer_head *m_bh, *l_bh;
  651. u64 fs_total, new_free;
  652. /* Total up the file system space, according to the latest rindex. */
  653. fs_total = gfs2_ri_total(sdp);
  654. if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0)
  655. return;
  656. spin_lock(&sdp->sd_statfs_spin);
  657. gfs2_statfs_change_in(m_sc, m_bh->b_data +
  658. sizeof(struct gfs2_dinode));
  659. if (fs_total > (m_sc->sc_total + l_sc->sc_total))
  660. new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
  661. else
  662. new_free = 0;
  663. spin_unlock(&sdp->sd_statfs_spin);
  664. fs_warn(sdp, "File system extended by %llu blocks.\n",
  665. (unsigned long long)new_free);
  666. gfs2_statfs_change(sdp, new_free, new_free, 0);
  667. if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
  668. goto out;
  669. update_statfs(sdp, m_bh, l_bh);
  670. brelse(l_bh);
  671. out:
  672. brelse(m_bh);
  673. }
  674. /**
  675. * gfs2_stuffed_write_end - Write end for stuffed files
  676. * @inode: The inode
  677. * @dibh: The buffer_head containing the on-disk inode
  678. * @pos: The file position
  679. * @len: The length of the write
  680. * @copied: How much was actually copied by the VFS
  681. * @page: The page
  682. *
  683. * This copies the data from the page into the inode block after
  684. * the inode data structure itself.
  685. *
  686. * Returns: errno
  687. */
  688. static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
  689. loff_t pos, unsigned len, unsigned copied,
  690. struct page *page)
  691. {
  692. struct gfs2_inode *ip = GFS2_I(inode);
  693. struct gfs2_sbd *sdp = GFS2_SB(inode);
  694. struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
  695. u64 to = pos + copied;
  696. void *kaddr;
  697. unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
  698. struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
  699. BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
  700. kaddr = kmap_atomic(page, KM_USER0);
  701. memcpy(buf + pos, kaddr + pos, copied);
  702. memset(kaddr + pos + copied, 0, len - copied);
  703. flush_dcache_page(page);
  704. kunmap_atomic(kaddr, KM_USER0);
  705. if (!PageUptodate(page))
  706. SetPageUptodate(page);
  707. unlock_page(page);
  708. page_cache_release(page);
  709. if (copied) {
  710. if (inode->i_size < to) {
  711. i_size_write(inode, to);
  712. ip->i_disksize = inode->i_size;
  713. }
  714. gfs2_dinode_out(ip, di);
  715. mark_inode_dirty(inode);
  716. }
  717. if (inode == sdp->sd_rindex) {
  718. adjust_fs_space(inode);
  719. ip->i_gh.gh_flags |= GL_NOCACHE;
  720. }
  721. brelse(dibh);
  722. gfs2_trans_end(sdp);
  723. if (inode == sdp->sd_rindex) {
  724. gfs2_glock_dq(&m_ip->i_gh);
  725. gfs2_holder_uninit(&m_ip->i_gh);
  726. }
  727. gfs2_glock_dq(&ip->i_gh);
  728. gfs2_holder_uninit(&ip->i_gh);
  729. return copied;
  730. }
  731. /**
  732. * gfs2_write_end
  733. * @file: The file to write to
  734. * @mapping: The address space to write to
  735. * @pos: The file position
  736. * @len: The length of the data
  737. * @copied:
  738. * @page: The page that has been written
  739. * @fsdata: The fsdata (unused in GFS2)
  740. *
  741. * The main write_end function for GFS2. We have a separate one for
  742. * stuffed files as they are slightly different, otherwise we just
  743. * put our locking around the VFS provided functions.
  744. *
  745. * Returns: errno
  746. */
  747. static int gfs2_write_end(struct file *file, struct address_space *mapping,
  748. loff_t pos, unsigned len, unsigned copied,
  749. struct page *page, void *fsdata)
  750. {
  751. struct inode *inode = page->mapping->host;
  752. struct gfs2_inode *ip = GFS2_I(inode);
  753. struct gfs2_sbd *sdp = GFS2_SB(inode);
  754. struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
  755. struct buffer_head *dibh;
  756. struct gfs2_alloc *al = ip->i_alloc;
  757. unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
  758. unsigned int to = from + len;
  759. int ret;
  760. BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
  761. ret = gfs2_meta_inode_buffer(ip, &dibh);
  762. if (unlikely(ret)) {
  763. unlock_page(page);
  764. page_cache_release(page);
  765. goto failed;
  766. }
  767. gfs2_trans_add_bh(ip->i_gl, dibh, 1);
  768. if (gfs2_is_stuffed(ip))
  769. return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
  770. if (!gfs2_is_writeback(ip))
  771. gfs2_page_add_databufs(ip, page, from, to);
  772. ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
  773. if (ret > 0) {
  774. if (inode->i_size > ip->i_disksize)
  775. ip->i_disksize = inode->i_size;
  776. gfs2_dinode_out(ip, dibh->b_data);
  777. mark_inode_dirty(inode);
  778. }
  779. if (inode == sdp->sd_rindex) {
  780. adjust_fs_space(inode);
  781. ip->i_gh.gh_flags |= GL_NOCACHE;
  782. }
  783. brelse(dibh);
  784. gfs2_trans_end(sdp);
  785. failed:
  786. if (al) {
  787. gfs2_inplace_release(ip);
  788. gfs2_quota_unlock(ip);
  789. gfs2_alloc_put(ip);
  790. }
  791. if (inode == sdp->sd_rindex) {
  792. gfs2_glock_dq(&m_ip->i_gh);
  793. gfs2_holder_uninit(&m_ip->i_gh);
  794. }
  795. gfs2_glock_dq(&ip->i_gh);
  796. gfs2_holder_uninit(&ip->i_gh);
  797. return ret;
  798. }
  799. /**
  800. * gfs2_set_page_dirty - Page dirtying function
  801. * @page: The page to dirty
  802. *
  803. * Returns: 1 if it dirtyed the page, or 0 otherwise
  804. */
  805. static int gfs2_set_page_dirty(struct page *page)
  806. {
  807. SetPageChecked(page);
  808. return __set_page_dirty_buffers(page);
  809. }
  810. /**
  811. * gfs2_bmap - Block map function
  812. * @mapping: Address space info
  813. * @lblock: The block to map
  814. *
  815. * Returns: The disk address for the block or 0 on hole or error
  816. */
  817. static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
  818. {
  819. struct gfs2_inode *ip = GFS2_I(mapping->host);
  820. struct gfs2_holder i_gh;
  821. sector_t dblock = 0;
  822. int error;
  823. error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
  824. if (error)
  825. return 0;
  826. if (!gfs2_is_stuffed(ip))
  827. dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
  828. gfs2_glock_dq_uninit(&i_gh);
  829. return dblock;
  830. }
  831. static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
  832. {
  833. struct gfs2_bufdata *bd;
  834. lock_buffer(bh);
  835. gfs2_log_lock(sdp);
  836. clear_buffer_dirty(bh);
  837. bd = bh->b_private;
  838. if (bd) {
  839. if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh))
  840. list_del_init(&bd->bd_le.le_list);
  841. else
  842. gfs2_remove_from_journal(bh, current->journal_info, 0);
  843. }
  844. bh->b_bdev = NULL;
  845. clear_buffer_mapped(bh);
  846. clear_buffer_req(bh);
  847. clear_buffer_new(bh);
  848. gfs2_log_unlock(sdp);
  849. unlock_buffer(bh);
  850. }
  851. static void gfs2_invalidatepage(struct page *page, unsigned long offset)
  852. {
  853. struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
  854. struct buffer_head *bh, *head;
  855. unsigned long pos = 0;
  856. BUG_ON(!PageLocked(page));
  857. if (offset == 0)
  858. ClearPageChecked(page);
  859. if (!page_has_buffers(page))
  860. goto out;
  861. bh = head = page_buffers(page);
  862. do {
  863. if (offset <= pos)
  864. gfs2_discard(sdp, bh);
  865. pos += bh->b_size;
  866. bh = bh->b_this_page;
  867. } while (bh != head);
  868. out:
  869. if (offset == 0)
  870. try_to_release_page(page, 0);
  871. }
  872. /**
  873. * gfs2_ok_for_dio - check that dio is valid on this file
  874. * @ip: The inode
  875. * @rw: READ or WRITE
  876. * @offset: The offset at which we are reading or writing
  877. *
  878. * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
  879. * 1 (to accept the i/o request)
  880. */
  881. static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
  882. {
  883. /*
  884. * Should we return an error here? I can't see that O_DIRECT for
  885. * a stuffed file makes any sense. For now we'll silently fall
  886. * back to buffered I/O
  887. */
  888. if (gfs2_is_stuffed(ip))
  889. return 0;
  890. if (offset >= i_size_read(&ip->i_inode))
  891. return 0;
  892. return 1;
  893. }
  894. static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
  895. const struct iovec *iov, loff_t offset,
  896. unsigned long nr_segs)
  897. {
  898. struct file *file = iocb->ki_filp;
  899. struct inode *inode = file->f_mapping->host;
  900. struct gfs2_inode *ip = GFS2_I(inode);
  901. struct gfs2_holder gh;
  902. int rv;
  903. /*
  904. * Deferred lock, even if its a write, since we do no allocation
  905. * on this path. All we need change is atime, and this lock mode
  906. * ensures that other nodes have flushed their buffered read caches
  907. * (i.e. their page cache entries for this inode). We do not,
  908. * unfortunately have the option of only flushing a range like
  909. * the VFS does.
  910. */
  911. gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
  912. rv = gfs2_glock_nq(&gh);
  913. if (rv)
  914. return rv;
  915. rv = gfs2_ok_for_dio(ip, rw, offset);
  916. if (rv != 1)
  917. goto out; /* dio not valid, fall back to buffered i/o */
  918. rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
  919. offset, nr_segs, gfs2_get_block_direct,
  920. NULL, NULL, 0);
  921. out:
  922. gfs2_glock_dq_m(1, &gh);
  923. gfs2_holder_uninit(&gh);
  924. return rv;
  925. }
  926. /**
  927. * gfs2_releasepage - free the metadata associated with a page
  928. * @page: the page that's being released
  929. * @gfp_mask: passed from Linux VFS, ignored by us
  930. *
  931. * Call try_to_free_buffers() if the buffers in this page can be
  932. * released.
  933. *
  934. * Returns: 0
  935. */
  936. int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
  937. {
  938. struct address_space *mapping = page->mapping;
  939. struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
  940. struct buffer_head *bh, *head;
  941. struct gfs2_bufdata *bd;
  942. if (!page_has_buffers(page))
  943. return 0;
  944. gfs2_log_lock(sdp);
  945. head = bh = page_buffers(page);
  946. do {
  947. if (atomic_read(&bh->b_count))
  948. goto cannot_release;
  949. bd = bh->b_private;
  950. if (bd && bd->bd_ail)
  951. goto cannot_release;
  952. gfs2_assert_warn(sdp, !buffer_pinned(bh));
  953. gfs2_assert_warn(sdp, !buffer_dirty(bh));
  954. bh = bh->b_this_page;
  955. } while(bh != head);
  956. gfs2_log_unlock(sdp);
  957. head = bh = page_buffers(page);
  958. do {
  959. gfs2_log_lock(sdp);
  960. bd = bh->b_private;
  961. if (bd) {
  962. gfs2_assert_warn(sdp, bd->bd_bh == bh);
  963. gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
  964. if (!list_empty(&bd->bd_le.le_list)) {
  965. if (!buffer_pinned(bh))
  966. list_del_init(&bd->bd_le.le_list);
  967. else
  968. bd = NULL;
  969. }
  970. if (bd)
  971. bd->bd_bh = NULL;
  972. bh->b_private = NULL;
  973. }
  974. gfs2_log_unlock(sdp);
  975. if (bd)
  976. kmem_cache_free(gfs2_bufdata_cachep, bd);
  977. bh = bh->b_this_page;
  978. } while (bh != head);
  979. return try_to_free_buffers(page);
  980. cannot_release:
  981. gfs2_log_unlock(sdp);
  982. return 0;
  983. }
  984. static const struct address_space_operations gfs2_writeback_aops = {
  985. .writepage = gfs2_writeback_writepage,
  986. .writepages = gfs2_writeback_writepages,
  987. .readpage = gfs2_readpage,
  988. .readpages = gfs2_readpages,
  989. .sync_page = block_sync_page,
  990. .write_begin = gfs2_write_begin,
  991. .write_end = gfs2_write_end,
  992. .bmap = gfs2_bmap,
  993. .invalidatepage = gfs2_invalidatepage,
  994. .releasepage = gfs2_releasepage,
  995. .direct_IO = gfs2_direct_IO,
  996. .migratepage = buffer_migrate_page,
  997. .is_partially_uptodate = block_is_partially_uptodate,
  998. .error_remove_page = generic_error_remove_page,
  999. };
  1000. static const struct address_space_operations gfs2_ordered_aops = {
  1001. .writepage = gfs2_ordered_writepage,
  1002. .readpage = gfs2_readpage,
  1003. .readpages = gfs2_readpages,
  1004. .sync_page = block_sync_page,
  1005. .write_begin = gfs2_write_begin,
  1006. .write_end = gfs2_write_end,
  1007. .set_page_dirty = gfs2_set_page_dirty,
  1008. .bmap = gfs2_bmap,
  1009. .invalidatepage = gfs2_invalidatepage,
  1010. .releasepage = gfs2_releasepage,
  1011. .direct_IO = gfs2_direct_IO,
  1012. .migratepage = buffer_migrate_page,
  1013. .is_partially_uptodate = block_is_partially_uptodate,
  1014. .error_remove_page = generic_error_remove_page,
  1015. };
  1016. static const struct address_space_operations gfs2_jdata_aops = {
  1017. .writepage = gfs2_jdata_writepage,
  1018. .writepages = gfs2_jdata_writepages,
  1019. .readpage = gfs2_readpage,
  1020. .readpages = gfs2_readpages,
  1021. .sync_page = block_sync_page,
  1022. .write_begin = gfs2_write_begin,
  1023. .write_end = gfs2_write_end,
  1024. .set_page_dirty = gfs2_set_page_dirty,
  1025. .bmap = gfs2_bmap,
  1026. .invalidatepage = gfs2_invalidatepage,
  1027. .releasepage = gfs2_releasepage,
  1028. .is_partially_uptodate = block_is_partially_uptodate,
  1029. .error_remove_page = generic_error_remove_page,
  1030. };
  1031. void gfs2_set_aops(struct inode *inode)
  1032. {
  1033. struct gfs2_inode *ip = GFS2_I(inode);
  1034. if (gfs2_is_writeback(ip))
  1035. inode->i_mapping->a_ops = &gfs2_writeback_aops;
  1036. else if (gfs2_is_ordered(ip))
  1037. inode->i_mapping->a_ops = &gfs2_ordered_aops;
  1038. else if (gfs2_is_jdata(ip))
  1039. inode->i_mapping->a_ops = &gfs2_jdata_aops;
  1040. else
  1041. BUG();
  1042. }