write.c 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800
  1. /*
  2. * linux/fs/nfs/write.c
  3. *
  4. * Write file data over NFS.
  5. *
  6. * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
  7. */
  8. #include <linux/types.h>
  9. #include <linux/slab.h>
  10. #include <linux/mm.h>
  11. #include <linux/pagemap.h>
  12. #include <linux/file.h>
  13. #include <linux/writeback.h>
  14. #include <linux/swap.h>
  15. #include <linux/migrate.h>
  16. #include <linux/sunrpc/clnt.h>
  17. #include <linux/nfs_fs.h>
  18. #include <linux/nfs_mount.h>
  19. #include <linux/nfs_page.h>
  20. #include <linux/backing-dev.h>
  21. #include <linux/export.h>
  22. #include <asm/uaccess.h>
  23. #include "delegation.h"
  24. #include "internal.h"
  25. #include "iostat.h"
  26. #include "nfs4_fs.h"
  27. #include "fscache.h"
  28. #include "pnfs.h"
  29. #define NFSDBG_FACILITY NFSDBG_PAGECACHE
  30. #define MIN_POOL_WRITE (32)
  31. #define MIN_POOL_COMMIT (4)
  32. /*
  33. * Local function declarations
  34. */
  35. static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
  36. struct inode *inode, int ioflags);
  37. static void nfs_redirty_request(struct nfs_page *req);
  38. static const struct rpc_call_ops nfs_write_partial_ops;
  39. static const struct rpc_call_ops nfs_write_full_ops;
  40. static const struct rpc_call_ops nfs_commit_ops;
  41. static struct kmem_cache *nfs_wdata_cachep;
  42. static mempool_t *nfs_wdata_mempool;
  43. static struct kmem_cache *nfs_cdata_cachep;
  44. static mempool_t *nfs_commit_mempool;
  45. struct nfs_commit_data *nfs_commitdata_alloc(void)
  46. {
  47. struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
  48. if (p) {
  49. memset(p, 0, sizeof(*p));
  50. INIT_LIST_HEAD(&p->pages);
  51. }
  52. return p;
  53. }
  54. EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
  55. void nfs_commit_free(struct nfs_commit_data *p)
  56. {
  57. mempool_free(p, nfs_commit_mempool);
  58. }
  59. EXPORT_SYMBOL_GPL(nfs_commit_free);
  60. struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
  61. {
  62. struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
  63. if (p) {
  64. memset(p, 0, sizeof(*p));
  65. INIT_LIST_HEAD(&p->pages);
  66. p->npages = pagecount;
  67. if (pagecount <= ARRAY_SIZE(p->page_array))
  68. p->pagevec = p->page_array;
  69. else {
  70. p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
  71. if (!p->pagevec) {
  72. mempool_free(p, nfs_wdata_mempool);
  73. p = NULL;
  74. }
  75. }
  76. }
  77. return p;
  78. }
  79. void nfs_writedata_free(struct nfs_write_data *p)
  80. {
  81. if (p && (p->pagevec != &p->page_array[0]))
  82. kfree(p->pagevec);
  83. mempool_free(p, nfs_wdata_mempool);
  84. }
  85. void nfs_writedata_release(struct nfs_write_data *wdata)
  86. {
  87. put_nfs_open_context(wdata->args.context);
  88. nfs_writedata_free(wdata);
  89. }
  90. static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
  91. {
  92. ctx->error = error;
  93. smp_wmb();
  94. set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
  95. }
  96. static struct nfs_page *nfs_page_find_request_locked(struct page *page)
  97. {
  98. struct nfs_page *req = NULL;
  99. if (PagePrivate(page)) {
  100. req = (struct nfs_page *)page_private(page);
  101. if (req != NULL)
  102. kref_get(&req->wb_kref);
  103. }
  104. return req;
  105. }
  106. static struct nfs_page *nfs_page_find_request(struct page *page)
  107. {
  108. struct inode *inode = page->mapping->host;
  109. struct nfs_page *req = NULL;
  110. spin_lock(&inode->i_lock);
  111. req = nfs_page_find_request_locked(page);
  112. spin_unlock(&inode->i_lock);
  113. return req;
  114. }
  115. /* Adjust the file length if we're writing beyond the end */
  116. static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
  117. {
  118. struct inode *inode = page->mapping->host;
  119. loff_t end, i_size;
  120. pgoff_t end_index;
  121. spin_lock(&inode->i_lock);
  122. i_size = i_size_read(inode);
  123. end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
  124. if (i_size > 0 && page->index < end_index)
  125. goto out;
  126. end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
  127. if (i_size >= end)
  128. goto out;
  129. i_size_write(inode, end);
  130. nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
  131. out:
  132. spin_unlock(&inode->i_lock);
  133. }
  134. /* A writeback failed: mark the page as bad, and invalidate the page cache */
  135. static void nfs_set_pageerror(struct page *page)
  136. {
  137. SetPageError(page);
  138. nfs_zap_mapping(page->mapping->host, page->mapping);
  139. }
  140. /* We can set the PG_uptodate flag if we see that a write request
  141. * covers the full page.
  142. */
  143. static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
  144. {
  145. if (PageUptodate(page))
  146. return;
  147. if (base != 0)
  148. return;
  149. if (count != nfs_page_length(page))
  150. return;
  151. SetPageUptodate(page);
  152. }
  153. static int wb_priority(struct writeback_control *wbc)
  154. {
  155. if (wbc->for_reclaim)
  156. return FLUSH_HIGHPRI | FLUSH_STABLE;
  157. if (wbc->for_kupdate || wbc->for_background)
  158. return FLUSH_LOWPRI | FLUSH_COND_STABLE;
  159. return FLUSH_COND_STABLE;
  160. }
  161. /*
  162. * NFS congestion control
  163. */
  164. int nfs_congestion_kb;
  165. #define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10))
  166. #define NFS_CONGESTION_OFF_THRESH \
  167. (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
  168. static int nfs_set_page_writeback(struct page *page)
  169. {
  170. int ret = test_set_page_writeback(page);
  171. if (!ret) {
  172. struct inode *inode = page->mapping->host;
  173. struct nfs_server *nfss = NFS_SERVER(inode);
  174. page_cache_get(page);
  175. if (atomic_long_inc_return(&nfss->writeback) >
  176. NFS_CONGESTION_ON_THRESH) {
  177. set_bdi_congested(&nfss->backing_dev_info,
  178. BLK_RW_ASYNC);
  179. }
  180. }
  181. return ret;
  182. }
  183. static void nfs_end_page_writeback(struct page *page)
  184. {
  185. struct inode *inode = page->mapping->host;
  186. struct nfs_server *nfss = NFS_SERVER(inode);
  187. end_page_writeback(page);
  188. page_cache_release(page);
  189. if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
  190. clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
  191. }
  192. static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
  193. {
  194. struct inode *inode = page->mapping->host;
  195. struct nfs_page *req;
  196. int ret;
  197. spin_lock(&inode->i_lock);
  198. for (;;) {
  199. req = nfs_page_find_request_locked(page);
  200. if (req == NULL)
  201. break;
  202. if (nfs_lock_request_dontget(req))
  203. break;
  204. /* Note: If we hold the page lock, as is the case in nfs_writepage,
  205. * then the call to nfs_lock_request_dontget() will always
  206. * succeed provided that someone hasn't already marked the
  207. * request as dirty (in which case we don't care).
  208. */
  209. spin_unlock(&inode->i_lock);
  210. if (!nonblock)
  211. ret = nfs_wait_on_request(req);
  212. else
  213. ret = -EAGAIN;
  214. nfs_release_request(req);
  215. if (ret != 0)
  216. return ERR_PTR(ret);
  217. spin_lock(&inode->i_lock);
  218. }
  219. spin_unlock(&inode->i_lock);
  220. return req;
  221. }
  222. /*
  223. * Find an associated nfs write request, and prepare to flush it out
  224. * May return an error if the user signalled nfs_wait_on_request().
  225. */
  226. static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
  227. struct page *page, bool nonblock)
  228. {
  229. struct nfs_page *req;
  230. int ret = 0;
  231. req = nfs_find_and_lock_request(page, nonblock);
  232. if (!req)
  233. goto out;
  234. ret = PTR_ERR(req);
  235. if (IS_ERR(req))
  236. goto out;
  237. ret = nfs_set_page_writeback(page);
  238. BUG_ON(ret != 0);
  239. BUG_ON(test_bit(PG_CLEAN, &req->wb_flags));
  240. if (!nfs_pageio_add_request(pgio, req)) {
  241. nfs_redirty_request(req);
  242. ret = pgio->pg_error;
  243. }
  244. out:
  245. return ret;
  246. }
  247. static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
  248. {
  249. struct inode *inode = page->mapping->host;
  250. int ret;
  251. nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
  252. nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
  253. nfs_pageio_cond_complete(pgio, page->index);
  254. ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
  255. if (ret == -EAGAIN) {
  256. redirty_page_for_writepage(wbc, page);
  257. ret = 0;
  258. }
  259. return ret;
  260. }
  261. /*
  262. * Write an mmapped page to the server.
  263. */
  264. static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
  265. {
  266. struct nfs_pageio_descriptor pgio;
  267. int err;
  268. nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
  269. err = nfs_do_writepage(page, wbc, &pgio);
  270. nfs_pageio_complete(&pgio);
  271. if (err < 0)
  272. return err;
  273. if (pgio.pg_error < 0)
  274. return pgio.pg_error;
  275. return 0;
  276. }
  277. int nfs_writepage(struct page *page, struct writeback_control *wbc)
  278. {
  279. int ret;
  280. ret = nfs_writepage_locked(page, wbc);
  281. unlock_page(page);
  282. return ret;
  283. }
  284. static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
  285. {
  286. int ret;
  287. ret = nfs_do_writepage(page, wbc, data);
  288. unlock_page(page);
  289. return ret;
  290. }
  291. int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
  292. {
  293. struct inode *inode = mapping->host;
  294. unsigned long *bitlock = &NFS_I(inode)->flags;
  295. struct nfs_pageio_descriptor pgio;
  296. int err;
  297. /* Stop dirtying of new pages while we sync */
  298. err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
  299. nfs_wait_bit_killable, TASK_KILLABLE);
  300. if (err)
  301. goto out_err;
  302. nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
  303. nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
  304. err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
  305. nfs_pageio_complete(&pgio);
  306. clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
  307. smp_mb__after_clear_bit();
  308. wake_up_bit(bitlock, NFS_INO_FLUSHING);
  309. if (err < 0)
  310. goto out_err;
  311. err = pgio.pg_error;
  312. if (err < 0)
  313. goto out_err;
  314. return 0;
  315. out_err:
  316. return err;
  317. }
  318. /*
  319. * Insert a write request into an inode
  320. */
  321. static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
  322. {
  323. struct nfs_inode *nfsi = NFS_I(inode);
  324. /* Lock the request! */
  325. nfs_lock_request_dontget(req);
  326. spin_lock(&inode->i_lock);
  327. if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
  328. inode->i_version++;
  329. set_bit(PG_MAPPED, &req->wb_flags);
  330. SetPagePrivate(req->wb_page);
  331. set_page_private(req->wb_page, (unsigned long)req);
  332. nfsi->npages++;
  333. kref_get(&req->wb_kref);
  334. spin_unlock(&inode->i_lock);
  335. }
  336. /*
  337. * Remove a write request from an inode
  338. */
  339. static void nfs_inode_remove_request(struct nfs_page *req)
  340. {
  341. struct inode *inode = req->wb_context->dentry->d_inode;
  342. struct nfs_inode *nfsi = NFS_I(inode);
  343. BUG_ON (!NFS_WBACK_BUSY(req));
  344. spin_lock(&inode->i_lock);
  345. set_page_private(req->wb_page, 0);
  346. ClearPagePrivate(req->wb_page);
  347. clear_bit(PG_MAPPED, &req->wb_flags);
  348. nfsi->npages--;
  349. spin_unlock(&inode->i_lock);
  350. nfs_release_request(req);
  351. }
  352. static void
  353. nfs_mark_request_dirty(struct nfs_page *req)
  354. {
  355. __set_page_dirty_nobuffers(req->wb_page);
  356. }
  357. #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
  358. /**
  359. * nfs_request_add_commit_list - add request to a commit list
  360. * @req: pointer to a struct nfs_page
  361. * @head: commit list head
  362. *
  363. * This sets the PG_CLEAN bit, updates the inode global count of
  364. * number of outstanding requests requiring a commit as well as
  365. * the MM page stats.
  366. *
  367. * The caller must _not_ hold the inode->i_lock, but must be
  368. * holding the nfs_page lock.
  369. */
  370. void
  371. nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head)
  372. {
  373. struct inode *inode = req->wb_context->dentry->d_inode;
  374. set_bit(PG_CLEAN, &(req)->wb_flags);
  375. spin_lock(&inode->i_lock);
  376. nfs_list_add_request(req, head);
  377. NFS_I(inode)->ncommit++;
  378. spin_unlock(&inode->i_lock);
  379. inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
  380. inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
  381. __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
  382. }
  383. EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
  384. /**
  385. * nfs_request_remove_commit_list - Remove request from a commit list
  386. * @req: pointer to a nfs_page
  387. *
  388. * This clears the PG_CLEAN bit, and updates the inode global count of
  389. * number of outstanding requests requiring a commit
  390. * It does not update the MM page stats.
  391. *
  392. * The caller _must_ hold the inode->i_lock and the nfs_page lock.
  393. */
  394. void
  395. nfs_request_remove_commit_list(struct nfs_page *req)
  396. {
  397. struct inode *inode = req->wb_context->dentry->d_inode;
  398. if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
  399. return;
  400. nfs_list_remove_request(req);
  401. NFS_I(inode)->ncommit--;
  402. }
  403. EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
  404. /*
  405. * Add a request to the inode's commit list.
  406. */
  407. static void
  408. nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
  409. {
  410. struct inode *inode = req->wb_context->dentry->d_inode;
  411. if (pnfs_mark_request_commit(req, lseg))
  412. return;
  413. nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list);
  414. }
  415. static void
  416. nfs_clear_page_commit(struct page *page)
  417. {
  418. dec_zone_page_state(page, NR_UNSTABLE_NFS);
  419. dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
  420. }
  421. static void
  422. nfs_clear_request_commit(struct nfs_page *req)
  423. {
  424. if (test_bit(PG_CLEAN, &req->wb_flags)) {
  425. struct inode *inode = req->wb_context->dentry->d_inode;
  426. if (!pnfs_clear_request_commit(req)) {
  427. spin_lock(&inode->i_lock);
  428. nfs_request_remove_commit_list(req);
  429. spin_unlock(&inode->i_lock);
  430. }
  431. nfs_clear_page_commit(req->wb_page);
  432. }
  433. }
  434. static inline
  435. int nfs_write_need_commit(struct nfs_write_data *data)
  436. {
  437. if (data->verf.committed == NFS_DATA_SYNC)
  438. return data->lseg == NULL;
  439. else
  440. return data->verf.committed != NFS_FILE_SYNC;
  441. }
  442. static inline
  443. int nfs_reschedule_unstable_write(struct nfs_page *req,
  444. struct nfs_write_data *data)
  445. {
  446. if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
  447. nfs_mark_request_commit(req, data->lseg);
  448. return 1;
  449. }
  450. if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
  451. nfs_mark_request_dirty(req);
  452. return 1;
  453. }
  454. return 0;
  455. }
  456. #else
  457. static void
  458. nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
  459. {
  460. }
  461. static void
  462. nfs_clear_request_commit(struct nfs_page *req)
  463. {
  464. }
  465. static inline
  466. int nfs_write_need_commit(struct nfs_write_data *data)
  467. {
  468. return 0;
  469. }
  470. static inline
  471. int nfs_reschedule_unstable_write(struct nfs_page *req,
  472. struct nfs_write_data *data)
  473. {
  474. return 0;
  475. }
  476. #endif
  477. #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
  478. static int
  479. nfs_need_commit(struct nfs_inode *nfsi)
  480. {
  481. return nfsi->ncommit > 0;
  482. }
  483. /* i_lock held by caller */
  484. static int
  485. nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
  486. spinlock_t *lock)
  487. {
  488. struct nfs_page *req, *tmp;
  489. int ret = 0;
  490. list_for_each_entry_safe(req, tmp, src, wb_list) {
  491. if (!nfs_lock_request(req))
  492. continue;
  493. if (cond_resched_lock(lock))
  494. list_safe_reset_next(req, tmp, wb_list);
  495. nfs_request_remove_commit_list(req);
  496. nfs_list_add_request(req, dst);
  497. ret++;
  498. if (ret == max)
  499. break;
  500. }
  501. return ret;
  502. }
  503. /*
  504. * nfs_scan_commit - Scan an inode for commit requests
  505. * @inode: NFS inode to scan
  506. * @dst: destination list
  507. *
  508. * Moves requests from the inode's 'commit' request list.
  509. * The requests are *not* checked to ensure that they form a contiguous set.
  510. */
  511. static int
  512. nfs_scan_commit(struct inode *inode, struct list_head *dst)
  513. {
  514. struct nfs_inode *nfsi = NFS_I(inode);
  515. int ret = 0;
  516. spin_lock(&inode->i_lock);
  517. if (nfsi->ncommit > 0) {
  518. const int max = INT_MAX;
  519. ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max,
  520. &inode->i_lock);
  521. ret += pnfs_scan_commit_lists(inode, max - ret,
  522. &inode->i_lock);
  523. }
  524. spin_unlock(&inode->i_lock);
  525. return ret;
  526. }
  527. #else
  528. static inline int nfs_need_commit(struct nfs_inode *nfsi)
  529. {
  530. return 0;
  531. }
  532. static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst)
  533. {
  534. return 0;
  535. }
  536. #endif
  537. /*
  538. * Search for an existing write request, and attempt to update
  539. * it to reflect a new dirty region on a given page.
  540. *
  541. * If the attempt fails, then the existing request is flushed out
  542. * to disk.
  543. */
  544. static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
  545. struct page *page,
  546. unsigned int offset,
  547. unsigned int bytes)
  548. {
  549. struct nfs_page *req;
  550. unsigned int rqend;
  551. unsigned int end;
  552. int error;
  553. if (!PagePrivate(page))
  554. return NULL;
  555. end = offset + bytes;
  556. spin_lock(&inode->i_lock);
  557. for (;;) {
  558. req = nfs_page_find_request_locked(page);
  559. if (req == NULL)
  560. goto out_unlock;
  561. rqend = req->wb_offset + req->wb_bytes;
  562. /*
  563. * Tell the caller to flush out the request if
  564. * the offsets are non-contiguous.
  565. * Note: nfs_flush_incompatible() will already
  566. * have flushed out requests having wrong owners.
  567. */
  568. if (offset > rqend
  569. || end < req->wb_offset)
  570. goto out_flushme;
  571. if (nfs_lock_request_dontget(req))
  572. break;
  573. /* The request is locked, so wait and then retry */
  574. spin_unlock(&inode->i_lock);
  575. error = nfs_wait_on_request(req);
  576. nfs_release_request(req);
  577. if (error != 0)
  578. goto out_err;
  579. spin_lock(&inode->i_lock);
  580. }
  581. /* Okay, the request matches. Update the region */
  582. if (offset < req->wb_offset) {
  583. req->wb_offset = offset;
  584. req->wb_pgbase = offset;
  585. }
  586. if (end > rqend)
  587. req->wb_bytes = end - req->wb_offset;
  588. else
  589. req->wb_bytes = rqend - req->wb_offset;
  590. out_unlock:
  591. spin_unlock(&inode->i_lock);
  592. if (req)
  593. nfs_clear_request_commit(req);
  594. return req;
  595. out_flushme:
  596. spin_unlock(&inode->i_lock);
  597. nfs_release_request(req);
  598. error = nfs_wb_page(inode, page);
  599. out_err:
  600. return ERR_PTR(error);
  601. }
  602. /*
  603. * Try to update an existing write request, or create one if there is none.
  604. *
  605. * Note: Should always be called with the Page Lock held to prevent races
  606. * if we have to add a new request. Also assumes that the caller has
  607. * already called nfs_flush_incompatible() if necessary.
  608. */
  609. static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
  610. struct page *page, unsigned int offset, unsigned int bytes)
  611. {
  612. struct inode *inode = page->mapping->host;
  613. struct nfs_page *req;
  614. req = nfs_try_to_update_request(inode, page, offset, bytes);
  615. if (req != NULL)
  616. goto out;
  617. req = nfs_create_request(ctx, inode, page, offset, bytes);
  618. if (IS_ERR(req))
  619. goto out;
  620. nfs_inode_add_request(inode, req);
  621. out:
  622. return req;
  623. }
  624. static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
  625. unsigned int offset, unsigned int count)
  626. {
  627. struct nfs_page *req;
  628. req = nfs_setup_write_request(ctx, page, offset, count);
  629. if (IS_ERR(req))
  630. return PTR_ERR(req);
  631. /* Update file length */
  632. nfs_grow_file(page, offset, count);
  633. nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
  634. nfs_mark_request_dirty(req);
  635. nfs_unlock_request(req);
  636. return 0;
  637. }
  638. int nfs_flush_incompatible(struct file *file, struct page *page)
  639. {
  640. struct nfs_open_context *ctx = nfs_file_open_context(file);
  641. struct nfs_page *req;
  642. int do_flush, status;
  643. /*
  644. * Look for a request corresponding to this page. If there
  645. * is one, and it belongs to another file, we flush it out
  646. * before we try to copy anything into the page. Do this
  647. * due to the lack of an ACCESS-type call in NFSv2.
  648. * Also do the same if we find a request from an existing
  649. * dropped page.
  650. */
  651. do {
  652. req = nfs_page_find_request(page);
  653. if (req == NULL)
  654. return 0;
  655. do_flush = req->wb_page != page || req->wb_context != ctx ||
  656. req->wb_lock_context->lockowner != current->files ||
  657. req->wb_lock_context->pid != current->tgid;
  658. nfs_release_request(req);
  659. if (!do_flush)
  660. return 0;
  661. status = nfs_wb_page(page->mapping->host, page);
  662. } while (status == 0);
  663. return status;
  664. }
  665. /*
  666. * If the page cache is marked as unsafe or invalid, then we can't rely on
  667. * the PageUptodate() flag. In this case, we will need to turn off
  668. * write optimisations that depend on the page contents being correct.
  669. */
  670. static int nfs_write_pageuptodate(struct page *page, struct inode *inode)
  671. {
  672. return PageUptodate(page) &&
  673. !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA));
  674. }
  675. /*
  676. * Update and possibly write a cached page of an NFS file.
  677. *
  678. * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
  679. * things with a page scheduled for an RPC call (e.g. invalidate it).
  680. */
  681. int nfs_updatepage(struct file *file, struct page *page,
  682. unsigned int offset, unsigned int count)
  683. {
  684. struct nfs_open_context *ctx = nfs_file_open_context(file);
  685. struct inode *inode = page->mapping->host;
  686. int status = 0;
  687. nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
  688. dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n",
  689. file->f_path.dentry->d_parent->d_name.name,
  690. file->f_path.dentry->d_name.name, count,
  691. (long long)(page_offset(page) + offset));
  692. /* If we're not using byte range locks, and we know the page
  693. * is up to date, it may be more efficient to extend the write
  694. * to cover the entire page in order to avoid fragmentation
  695. * inefficiencies.
  696. */
  697. if (nfs_write_pageuptodate(page, inode) &&
  698. inode->i_flock == NULL &&
  699. !(file->f_flags & O_DSYNC)) {
  700. count = max(count + offset, nfs_page_length(page));
  701. offset = 0;
  702. }
  703. status = nfs_writepage_setup(ctx, page, offset, count);
  704. if (status < 0)
  705. nfs_set_pageerror(page);
  706. else
  707. __set_page_dirty_nobuffers(page);
  708. dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
  709. status, (long long)i_size_read(inode));
  710. return status;
  711. }
  712. static void nfs_writepage_release(struct nfs_page *req,
  713. struct nfs_write_data *data)
  714. {
  715. struct page *page = req->wb_page;
  716. if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
  717. nfs_inode_remove_request(req);
  718. nfs_unlock_request(req);
  719. nfs_end_page_writeback(page);
  720. }
  721. static int flush_task_priority(int how)
  722. {
  723. switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
  724. case FLUSH_HIGHPRI:
  725. return RPC_PRIORITY_HIGH;
  726. case FLUSH_LOWPRI:
  727. return RPC_PRIORITY_LOW;
  728. }
  729. return RPC_PRIORITY_NORMAL;
  730. }
  731. int nfs_initiate_write(struct nfs_write_data *data,
  732. struct rpc_clnt *clnt,
  733. const struct rpc_call_ops *call_ops,
  734. int how)
  735. {
  736. struct inode *inode = data->inode;
  737. int priority = flush_task_priority(how);
  738. struct rpc_task *task;
  739. struct rpc_message msg = {
  740. .rpc_argp = &data->args,
  741. .rpc_resp = &data->res,
  742. .rpc_cred = data->cred,
  743. };
  744. struct rpc_task_setup task_setup_data = {
  745. .rpc_client = clnt,
  746. .task = &data->task,
  747. .rpc_message = &msg,
  748. .callback_ops = call_ops,
  749. .callback_data = data,
  750. .workqueue = nfsiod_workqueue,
  751. .flags = RPC_TASK_ASYNC,
  752. .priority = priority,
  753. };
  754. int ret = 0;
  755. /* Set up the initial task struct. */
  756. NFS_PROTO(inode)->write_setup(data, &msg);
  757. dprintk("NFS: %5u initiated write call "
  758. "(req %s/%lld, %u bytes @ offset %llu)\n",
  759. data->task.tk_pid,
  760. inode->i_sb->s_id,
  761. (long long)NFS_FILEID(inode),
  762. data->args.count,
  763. (unsigned long long)data->args.offset);
  764. task = rpc_run_task(&task_setup_data);
  765. if (IS_ERR(task)) {
  766. ret = PTR_ERR(task);
  767. goto out;
  768. }
  769. if (how & FLUSH_SYNC) {
  770. ret = rpc_wait_for_completion_task(task);
  771. if (ret == 0)
  772. ret = task->tk_status;
  773. }
  774. rpc_put_task(task);
  775. out:
  776. return ret;
  777. }
  778. EXPORT_SYMBOL_GPL(nfs_initiate_write);
  779. /*
  780. * Set up the argument/result storage required for the RPC call.
  781. */
  782. static void nfs_write_rpcsetup(struct nfs_page *req,
  783. struct nfs_write_data *data,
  784. unsigned int count, unsigned int offset,
  785. int how)
  786. {
  787. struct inode *inode = req->wb_context->dentry->d_inode;
  788. /* Set up the RPC argument and reply structs
  789. * NB: take care not to mess about with data->commit et al. */
  790. data->req = req;
  791. data->inode = inode = req->wb_context->dentry->d_inode;
  792. data->cred = req->wb_context->cred;
  793. data->args.fh = NFS_FH(inode);
  794. data->args.offset = req_offset(req) + offset;
  795. /* pnfs_set_layoutcommit needs this */
  796. data->mds_offset = data->args.offset;
  797. data->args.pgbase = req->wb_pgbase + offset;
  798. data->args.pages = data->pagevec;
  799. data->args.count = count;
  800. data->args.context = get_nfs_open_context(req->wb_context);
  801. data->args.lock_context = req->wb_lock_context;
  802. data->args.stable = NFS_UNSTABLE;
  803. switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
  804. case 0:
  805. break;
  806. case FLUSH_COND_STABLE:
  807. if (nfs_need_commit(NFS_I(inode)))
  808. break;
  809. default:
  810. data->args.stable = NFS_FILE_SYNC;
  811. }
  812. data->res.fattr = &data->fattr;
  813. data->res.count = count;
  814. data->res.verf = &data->verf;
  815. nfs_fattr_init(&data->fattr);
  816. }
  817. static int nfs_do_write(struct nfs_write_data *data,
  818. const struct rpc_call_ops *call_ops,
  819. int how)
  820. {
  821. struct inode *inode = data->args.context->dentry->d_inode;
  822. return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
  823. }
  824. static int nfs_do_multiple_writes(struct list_head *head,
  825. const struct rpc_call_ops *call_ops,
  826. int how)
  827. {
  828. struct nfs_write_data *data;
  829. int ret = 0;
  830. while (!list_empty(head)) {
  831. int ret2;
  832. data = list_entry(head->next, struct nfs_write_data, list);
  833. list_del_init(&data->list);
  834. ret2 = nfs_do_write(data, call_ops, how);
  835. if (ret == 0)
  836. ret = ret2;
  837. }
  838. return ret;
  839. }
  840. /* If a nfs_flush_* function fails, it should remove reqs from @head and
  841. * call this on each, which will prepare them to be retried on next
  842. * writeback using standard nfs.
  843. */
  844. static void nfs_redirty_request(struct nfs_page *req)
  845. {
  846. struct page *page = req->wb_page;
  847. nfs_mark_request_dirty(req);
  848. nfs_unlock_request(req);
  849. nfs_end_page_writeback(page);
  850. }
  851. /*
  852. * Generate multiple small requests to write out a single
  853. * contiguous dirty area on one page.
  854. */
  855. static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
  856. {
  857. struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
  858. struct page *page = req->wb_page;
  859. struct nfs_write_data *data;
  860. size_t wsize = desc->pg_bsize, nbytes;
  861. unsigned int offset;
  862. int requests = 0;
  863. int ret = 0;
  864. nfs_list_remove_request(req);
  865. if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
  866. (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
  867. desc->pg_count > wsize))
  868. desc->pg_ioflags &= ~FLUSH_COND_STABLE;
  869. offset = 0;
  870. nbytes = desc->pg_count;
  871. do {
  872. size_t len = min(nbytes, wsize);
  873. data = nfs_writedata_alloc(1);
  874. if (!data)
  875. goto out_bad;
  876. data->pagevec[0] = page;
  877. nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
  878. list_add(&data->list, res);
  879. requests++;
  880. nbytes -= len;
  881. offset += len;
  882. } while (nbytes != 0);
  883. atomic_set(&req->wb_complete, requests);
  884. desc->pg_rpc_callops = &nfs_write_partial_ops;
  885. return ret;
  886. out_bad:
  887. while (!list_empty(res)) {
  888. data = list_entry(res->next, struct nfs_write_data, list);
  889. list_del(&data->list);
  890. nfs_writedata_release(data);
  891. }
  892. nfs_redirty_request(req);
  893. return -ENOMEM;
  894. }
  895. /*
  896. * Create an RPC task for the given write request and kick it.
  897. * The page must have been locked by the caller.
  898. *
  899. * It may happen that the page we're passed is not marked dirty.
  900. * This is the case if nfs_updatepage detects a conflicting request
  901. * that has been written but not committed.
  902. */
  903. static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
  904. {
  905. struct nfs_page *req;
  906. struct page **pages;
  907. struct nfs_write_data *data;
  908. struct list_head *head = &desc->pg_list;
  909. int ret = 0;
  910. data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
  911. desc->pg_count));
  912. if (!data) {
  913. while (!list_empty(head)) {
  914. req = nfs_list_entry(head->next);
  915. nfs_list_remove_request(req);
  916. nfs_redirty_request(req);
  917. }
  918. ret = -ENOMEM;
  919. goto out;
  920. }
  921. pages = data->pagevec;
  922. while (!list_empty(head)) {
  923. req = nfs_list_entry(head->next);
  924. nfs_list_remove_request(req);
  925. nfs_list_add_request(req, &data->pages);
  926. *pages++ = req->wb_page;
  927. }
  928. req = nfs_list_entry(data->pages.next);
  929. if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
  930. (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
  931. desc->pg_ioflags &= ~FLUSH_COND_STABLE;
  932. /* Set up the argument struct */
  933. nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
  934. list_add(&data->list, res);
  935. desc->pg_rpc_callops = &nfs_write_full_ops;
  936. out:
  937. return ret;
  938. }
  939. int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
  940. {
  941. if (desc->pg_bsize < PAGE_CACHE_SIZE)
  942. return nfs_flush_multi(desc, head);
  943. return nfs_flush_one(desc, head);
  944. }
  945. static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
  946. {
  947. LIST_HEAD(head);
  948. int ret;
  949. ret = nfs_generic_flush(desc, &head);
  950. if (ret == 0)
  951. ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
  952. desc->pg_ioflags);
  953. return ret;
  954. }
  955. static const struct nfs_pageio_ops nfs_pageio_write_ops = {
  956. .pg_test = nfs_generic_pg_test,
  957. .pg_doio = nfs_generic_pg_writepages,
  958. };
  959. void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
  960. struct inode *inode, int ioflags)
  961. {
  962. nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
  963. NFS_SERVER(inode)->wsize, ioflags);
  964. }
  965. void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
  966. {
  967. pgio->pg_ops = &nfs_pageio_write_ops;
  968. pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
  969. }
  970. EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
  971. static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
  972. struct inode *inode, int ioflags)
  973. {
  974. if (!pnfs_pageio_init_write(pgio, inode, ioflags))
  975. nfs_pageio_init_write_mds(pgio, inode, ioflags);
  976. }
  977. /*
  978. * Handle a write reply that flushed part of a page.
  979. */
  980. static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
  981. {
  982. struct nfs_write_data *data = calldata;
  983. dprintk("NFS: %5u write(%s/%lld %d@%lld)",
  984. task->tk_pid,
  985. data->req->wb_context->dentry->d_inode->i_sb->s_id,
  986. (long long)
  987. NFS_FILEID(data->req->wb_context->dentry->d_inode),
  988. data->req->wb_bytes, (long long)req_offset(data->req));
  989. nfs_writeback_done(task, data);
  990. }
  991. static void nfs_writeback_release_partial(void *calldata)
  992. {
  993. struct nfs_write_data *data = calldata;
  994. struct nfs_page *req = data->req;
  995. struct page *page = req->wb_page;
  996. int status = data->task.tk_status;
  997. if (status < 0) {
  998. nfs_set_pageerror(page);
  999. nfs_context_set_write_error(req->wb_context, status);
  1000. dprintk(", error = %d\n", status);
  1001. goto out;
  1002. }
  1003. if (nfs_write_need_commit(data)) {
  1004. struct inode *inode = page->mapping->host;
  1005. spin_lock(&inode->i_lock);
  1006. if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
  1007. /* Do nothing we need to resend the writes */
  1008. } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
  1009. memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
  1010. dprintk(" defer commit\n");
  1011. } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
  1012. set_bit(PG_NEED_RESCHED, &req->wb_flags);
  1013. clear_bit(PG_NEED_COMMIT, &req->wb_flags);
  1014. dprintk(" server reboot detected\n");
  1015. }
  1016. spin_unlock(&inode->i_lock);
  1017. } else
  1018. dprintk(" OK\n");
  1019. out:
  1020. if (atomic_dec_and_test(&req->wb_complete))
  1021. nfs_writepage_release(req, data);
  1022. nfs_writedata_release(calldata);
  1023. }
  1024. void nfs_write_prepare(struct rpc_task *task, void *calldata)
  1025. {
  1026. struct nfs_write_data *data = calldata;
  1027. NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
  1028. }
  1029. void nfs_commit_prepare(struct rpc_task *task, void *calldata)
  1030. {
  1031. struct nfs_commit_data *data = calldata;
  1032. NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
  1033. }
  1034. static const struct rpc_call_ops nfs_write_partial_ops = {
  1035. .rpc_call_prepare = nfs_write_prepare,
  1036. .rpc_call_done = nfs_writeback_done_partial,
  1037. .rpc_release = nfs_writeback_release_partial,
  1038. };
  1039. /*
  1040. * Handle a write reply that flushes a whole page.
  1041. *
  1042. * FIXME: There is an inherent race with invalidate_inode_pages and
  1043. * writebacks since the page->count is kept > 1 for as long
  1044. * as the page has a write request pending.
  1045. */
  1046. static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
  1047. {
  1048. struct nfs_write_data *data = calldata;
  1049. nfs_writeback_done(task, data);
  1050. }
  1051. static void nfs_writeback_release_full(void *calldata)
  1052. {
  1053. struct nfs_write_data *data = calldata;
  1054. int status = data->task.tk_status;
  1055. /* Update attributes as result of writeback. */
  1056. while (!list_empty(&data->pages)) {
  1057. struct nfs_page *req = nfs_list_entry(data->pages.next);
  1058. struct page *page = req->wb_page;
  1059. nfs_list_remove_request(req);
  1060. dprintk("NFS: %5u write (%s/%lld %d@%lld)",
  1061. data->task.tk_pid,
  1062. req->wb_context->dentry->d_inode->i_sb->s_id,
  1063. (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
  1064. req->wb_bytes,
  1065. (long long)req_offset(req));
  1066. if (status < 0) {
  1067. nfs_set_pageerror(page);
  1068. nfs_context_set_write_error(req->wb_context, status);
  1069. dprintk(", error = %d\n", status);
  1070. goto remove_request;
  1071. }
  1072. if (nfs_write_need_commit(data)) {
  1073. memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
  1074. nfs_mark_request_commit(req, data->lseg);
  1075. dprintk(" marked for commit\n");
  1076. goto next;
  1077. }
  1078. dprintk(" OK\n");
  1079. remove_request:
  1080. nfs_inode_remove_request(req);
  1081. next:
  1082. nfs_unlock_request(req);
  1083. nfs_end_page_writeback(page);
  1084. }
  1085. nfs_writedata_release(calldata);
  1086. }
  1087. static const struct rpc_call_ops nfs_write_full_ops = {
  1088. .rpc_call_prepare = nfs_write_prepare,
  1089. .rpc_call_done = nfs_writeback_done_full,
  1090. .rpc_release = nfs_writeback_release_full,
  1091. };
  1092. /*
  1093. * This function is called when the WRITE call is complete.
  1094. */
  1095. void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
  1096. {
  1097. struct nfs_writeargs *argp = &data->args;
  1098. struct nfs_writeres *resp = &data->res;
  1099. int status;
  1100. dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
  1101. task->tk_pid, task->tk_status);
  1102. /*
  1103. * ->write_done will attempt to use post-op attributes to detect
  1104. * conflicting writes by other clients. A strict interpretation
  1105. * of close-to-open would allow us to continue caching even if
  1106. * another writer had changed the file, but some applications
  1107. * depend on tighter cache coherency when writing.
  1108. */
  1109. status = NFS_PROTO(data->inode)->write_done(task, data);
  1110. if (status != 0)
  1111. return;
  1112. nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
  1113. #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
  1114. if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
  1115. /* We tried a write call, but the server did not
  1116. * commit data to stable storage even though we
  1117. * requested it.
  1118. * Note: There is a known bug in Tru64 < 5.0 in which
  1119. * the server reports NFS_DATA_SYNC, but performs
  1120. * NFS_FILE_SYNC. We therefore implement this checking
  1121. * as a dprintk() in order to avoid filling syslog.
  1122. */
  1123. static unsigned long complain;
  1124. /* Note this will print the MDS for a DS write */
  1125. if (time_before(complain, jiffies)) {
  1126. dprintk("NFS: faulty NFS server %s:"
  1127. " (committed = %d) != (stable = %d)\n",
  1128. NFS_SERVER(data->inode)->nfs_client->cl_hostname,
  1129. resp->verf->committed, argp->stable);
  1130. complain = jiffies + 300 * HZ;
  1131. }
  1132. }
  1133. #endif
  1134. /* Is this a short write? */
  1135. if (task->tk_status >= 0 && resp->count < argp->count) {
  1136. static unsigned long complain;
  1137. nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
  1138. /* Has the server at least made some progress? */
  1139. if (resp->count != 0) {
  1140. /* Was this an NFSv2 write or an NFSv3 stable write? */
  1141. if (resp->verf->committed != NFS_UNSTABLE) {
  1142. /* Resend from where the server left off */
  1143. data->mds_offset += resp->count;
  1144. argp->offset += resp->count;
  1145. argp->pgbase += resp->count;
  1146. argp->count -= resp->count;
  1147. } else {
  1148. /* Resend as a stable write in order to avoid
  1149. * headaches in the case of a server crash.
  1150. */
  1151. argp->stable = NFS_FILE_SYNC;
  1152. }
  1153. rpc_restart_call_prepare(task);
  1154. return;
  1155. }
  1156. if (time_before(complain, jiffies)) {
  1157. printk(KERN_WARNING
  1158. "NFS: Server wrote zero bytes, expected %u.\n",
  1159. argp->count);
  1160. complain = jiffies + 300 * HZ;
  1161. }
  1162. /* Can't do anything about it except throw an error. */
  1163. task->tk_status = -EIO;
  1164. }
  1165. return;
  1166. }
  1167. #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
  1168. static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
  1169. {
  1170. int ret;
  1171. if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
  1172. return 1;
  1173. if (!may_wait)
  1174. return 0;
  1175. ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
  1176. NFS_INO_COMMIT,
  1177. nfs_wait_bit_killable,
  1178. TASK_KILLABLE);
  1179. return (ret < 0) ? ret : 1;
  1180. }
  1181. void nfs_commit_clear_lock(struct nfs_inode *nfsi)
  1182. {
  1183. clear_bit(NFS_INO_COMMIT, &nfsi->flags);
  1184. smp_mb__after_clear_bit();
  1185. wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
  1186. }
  1187. EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
  1188. void nfs_commitdata_release(struct nfs_commit_data *data)
  1189. {
  1190. put_nfs_open_context(data->context);
  1191. nfs_commit_free(data);
  1192. }
  1193. EXPORT_SYMBOL_GPL(nfs_commitdata_release);
  1194. int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
  1195. const struct rpc_call_ops *call_ops,
  1196. int how)
  1197. {
  1198. struct rpc_task *task;
  1199. int priority = flush_task_priority(how);
  1200. struct rpc_message msg = {
  1201. .rpc_argp = &data->args,
  1202. .rpc_resp = &data->res,
  1203. .rpc_cred = data->cred,
  1204. };
  1205. struct rpc_task_setup task_setup_data = {
  1206. .task = &data->task,
  1207. .rpc_client = clnt,
  1208. .rpc_message = &msg,
  1209. .callback_ops = call_ops,
  1210. .callback_data = data,
  1211. .workqueue = nfsiod_workqueue,
  1212. .flags = RPC_TASK_ASYNC,
  1213. .priority = priority,
  1214. };
  1215. /* Set up the initial task struct. */
  1216. NFS_PROTO(data->inode)->commit_setup(data, &msg);
  1217. dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
  1218. task = rpc_run_task(&task_setup_data);
  1219. if (IS_ERR(task))
  1220. return PTR_ERR(task);
  1221. if (how & FLUSH_SYNC)
  1222. rpc_wait_for_completion_task(task);
  1223. rpc_put_task(task);
  1224. return 0;
  1225. }
  1226. EXPORT_SYMBOL_GPL(nfs_initiate_commit);
  1227. /*
  1228. * Set up the argument/result storage required for the RPC call.
  1229. */
  1230. void nfs_init_commit(struct nfs_commit_data *data,
  1231. struct list_head *head,
  1232. struct pnfs_layout_segment *lseg)
  1233. {
  1234. struct nfs_page *first = nfs_list_entry(head->next);
  1235. struct inode *inode = first->wb_context->dentry->d_inode;
  1236. /* Set up the RPC argument and reply structs
  1237. * NB: take care not to mess about with data->commit et al. */
  1238. list_splice_init(head, &data->pages);
  1239. data->inode = inode;
  1240. data->cred = first->wb_context->cred;
  1241. data->lseg = lseg; /* reference transferred */
  1242. data->mds_ops = &nfs_commit_ops;
  1243. data->args.fh = NFS_FH(data->inode);
  1244. /* Note: we always request a commit of the entire inode */
  1245. data->args.offset = 0;
  1246. data->args.count = 0;
  1247. data->context = get_nfs_open_context(first->wb_context);
  1248. data->res.fattr = &data->fattr;
  1249. data->res.verf = &data->verf;
  1250. nfs_fattr_init(&data->fattr);
  1251. }
  1252. EXPORT_SYMBOL_GPL(nfs_init_commit);
  1253. void nfs_retry_commit(struct list_head *page_list,
  1254. struct pnfs_layout_segment *lseg)
  1255. {
  1256. struct nfs_page *req;
  1257. while (!list_empty(page_list)) {
  1258. req = nfs_list_entry(page_list->next);
  1259. nfs_list_remove_request(req);
  1260. nfs_mark_request_commit(req, lseg);
  1261. dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
  1262. dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
  1263. BDI_RECLAIMABLE);
  1264. nfs_unlock_request(req);
  1265. }
  1266. }
  1267. EXPORT_SYMBOL_GPL(nfs_retry_commit);
  1268. /*
  1269. * Commit dirty pages
  1270. */
  1271. static int
  1272. nfs_commit_list(struct inode *inode, struct list_head *head, int how)
  1273. {
  1274. struct nfs_commit_data *data;
  1275. data = nfs_commitdata_alloc();
  1276. if (!data)
  1277. goto out_bad;
  1278. /* Set up the argument struct */
  1279. nfs_init_commit(data, head, NULL);
  1280. return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how);
  1281. out_bad:
  1282. nfs_retry_commit(head, NULL);
  1283. nfs_commit_clear_lock(NFS_I(inode));
  1284. return -ENOMEM;
  1285. }
  1286. /*
  1287. * COMMIT call returned
  1288. */
  1289. static void nfs_commit_done(struct rpc_task *task, void *calldata)
  1290. {
  1291. struct nfs_commit_data *data = calldata;
  1292. dprintk("NFS: %5u nfs_commit_done (status %d)\n",
  1293. task->tk_pid, task->tk_status);
  1294. /* Call the NFS version-specific code */
  1295. NFS_PROTO(data->inode)->commit_done(task, data);
  1296. }
  1297. void nfs_commit_release_pages(struct nfs_commit_data *data)
  1298. {
  1299. struct nfs_page *req;
  1300. int status = data->task.tk_status;
  1301. while (!list_empty(&data->pages)) {
  1302. req = nfs_list_entry(data->pages.next);
  1303. nfs_list_remove_request(req);
  1304. nfs_clear_page_commit(req->wb_page);
  1305. dprintk("NFS: commit (%s/%lld %d@%lld)",
  1306. req->wb_context->dentry->d_sb->s_id,
  1307. (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
  1308. req->wb_bytes,
  1309. (long long)req_offset(req));
  1310. if (status < 0) {
  1311. nfs_context_set_write_error(req->wb_context, status);
  1312. nfs_inode_remove_request(req);
  1313. dprintk(", error = %d\n", status);
  1314. goto next;
  1315. }
  1316. /* Okay, COMMIT succeeded, apparently. Check the verifier
  1317. * returned by the server against all stored verfs. */
  1318. if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
  1319. /* We have a match */
  1320. nfs_inode_remove_request(req);
  1321. dprintk(" OK\n");
  1322. goto next;
  1323. }
  1324. /* We have a mismatch. Write the page again */
  1325. dprintk(" mismatch\n");
  1326. nfs_mark_request_dirty(req);
  1327. next:
  1328. nfs_unlock_request(req);
  1329. }
  1330. }
  1331. EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
  1332. static void nfs_commit_release(void *calldata)
  1333. {
  1334. struct nfs_commit_data *data = calldata;
  1335. nfs_commit_release_pages(data);
  1336. nfs_commit_clear_lock(NFS_I(data->inode));
  1337. nfs_commitdata_release(calldata);
  1338. }
  1339. static const struct rpc_call_ops nfs_commit_ops = {
  1340. .rpc_call_prepare = nfs_commit_prepare,
  1341. .rpc_call_done = nfs_commit_done,
  1342. .rpc_release = nfs_commit_release,
  1343. };
  1344. int nfs_commit_inode(struct inode *inode, int how)
  1345. {
  1346. LIST_HEAD(head);
  1347. int may_wait = how & FLUSH_SYNC;
  1348. int res;
  1349. res = nfs_commit_set_lock(NFS_I(inode), may_wait);
  1350. if (res <= 0)
  1351. goto out_mark_dirty;
  1352. res = nfs_scan_commit(inode, &head);
  1353. if (res) {
  1354. int error;
  1355. error = pnfs_commit_list(inode, &head, how);
  1356. if (error == PNFS_NOT_ATTEMPTED)
  1357. error = nfs_commit_list(inode, &head, how);
  1358. if (error < 0)
  1359. return error;
  1360. if (!may_wait)
  1361. goto out_mark_dirty;
  1362. error = wait_on_bit(&NFS_I(inode)->flags,
  1363. NFS_INO_COMMIT,
  1364. nfs_wait_bit_killable,
  1365. TASK_KILLABLE);
  1366. if (error < 0)
  1367. return error;
  1368. } else
  1369. nfs_commit_clear_lock(NFS_I(inode));
  1370. return res;
  1371. /* Note: If we exit without ensuring that the commit is complete,
  1372. * we must mark the inode as dirty. Otherwise, future calls to
  1373. * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
  1374. * that the data is on the disk.
  1375. */
  1376. out_mark_dirty:
  1377. __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
  1378. return res;
  1379. }
  1380. static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
  1381. {
  1382. struct nfs_inode *nfsi = NFS_I(inode);
  1383. int flags = FLUSH_SYNC;
  1384. int ret = 0;
  1385. /* no commits means nothing needs to be done */
  1386. if (!nfsi->ncommit)
  1387. return ret;
  1388. if (wbc->sync_mode == WB_SYNC_NONE) {
  1389. /* Don't commit yet if this is a non-blocking flush and there
  1390. * are a lot of outstanding writes for this mapping.
  1391. */
  1392. if (nfsi->ncommit <= (nfsi->npages >> 1))
  1393. goto out_mark_dirty;
  1394. /* don't wait for the COMMIT response */
  1395. flags = 0;
  1396. }
  1397. ret = nfs_commit_inode(inode, flags);
  1398. if (ret >= 0) {
  1399. if (wbc->sync_mode == WB_SYNC_NONE) {
  1400. if (ret < wbc->nr_to_write)
  1401. wbc->nr_to_write -= ret;
  1402. else
  1403. wbc->nr_to_write = 0;
  1404. }
  1405. return 0;
  1406. }
  1407. out_mark_dirty:
  1408. __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
  1409. return ret;
  1410. }
  1411. #else
  1412. static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
  1413. {
  1414. return 0;
  1415. }
  1416. #endif
  1417. int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
  1418. {
  1419. int ret;
  1420. ret = nfs_commit_unstable_pages(inode, wbc);
  1421. if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
  1422. int status;
  1423. bool sync = true;
  1424. if (wbc->sync_mode == WB_SYNC_NONE)
  1425. sync = false;
  1426. status = pnfs_layoutcommit_inode(inode, sync);
  1427. if (status < 0)
  1428. return status;
  1429. }
  1430. return ret;
  1431. }
  1432. /*
  1433. * flush the inode to disk.
  1434. */
  1435. int nfs_wb_all(struct inode *inode)
  1436. {
  1437. struct writeback_control wbc = {
  1438. .sync_mode = WB_SYNC_ALL,
  1439. .nr_to_write = LONG_MAX,
  1440. .range_start = 0,
  1441. .range_end = LLONG_MAX,
  1442. };
  1443. return sync_inode(inode, &wbc);
  1444. }
  1445. int nfs_wb_page_cancel(struct inode *inode, struct page *page)
  1446. {
  1447. struct nfs_page *req;
  1448. int ret = 0;
  1449. BUG_ON(!PageLocked(page));
  1450. for (;;) {
  1451. wait_on_page_writeback(page);
  1452. req = nfs_page_find_request(page);
  1453. if (req == NULL)
  1454. break;
  1455. if (nfs_lock_request_dontget(req)) {
  1456. nfs_clear_request_commit(req);
  1457. nfs_inode_remove_request(req);
  1458. /*
  1459. * In case nfs_inode_remove_request has marked the
  1460. * page as being dirty
  1461. */
  1462. cancel_dirty_page(page, PAGE_CACHE_SIZE);
  1463. nfs_unlock_request(req);
  1464. break;
  1465. }
  1466. ret = nfs_wait_on_request(req);
  1467. nfs_release_request(req);
  1468. if (ret < 0)
  1469. break;
  1470. }
  1471. return ret;
  1472. }
  1473. /*
  1474. * Write back all requests on one page - we do this before reading it.
  1475. */
  1476. int nfs_wb_page(struct inode *inode, struct page *page)
  1477. {
  1478. loff_t range_start = page_offset(page);
  1479. loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
  1480. struct writeback_control wbc = {
  1481. .sync_mode = WB_SYNC_ALL,
  1482. .nr_to_write = 0,
  1483. .range_start = range_start,
  1484. .range_end = range_end,
  1485. };
  1486. int ret;
  1487. for (;;) {
  1488. wait_on_page_writeback(page);
  1489. if (clear_page_dirty_for_io(page)) {
  1490. ret = nfs_writepage_locked(page, &wbc);
  1491. if (ret < 0)
  1492. goto out_error;
  1493. continue;
  1494. }
  1495. if (!PagePrivate(page))
  1496. break;
  1497. ret = nfs_commit_inode(inode, FLUSH_SYNC);
  1498. if (ret < 0)
  1499. goto out_error;
  1500. }
  1501. return 0;
  1502. out_error:
  1503. return ret;
  1504. }
  1505. #ifdef CONFIG_MIGRATION
  1506. int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
  1507. struct page *page, enum migrate_mode mode)
  1508. {
  1509. /*
  1510. * If PagePrivate is set, then the page is currently associated with
  1511. * an in-progress read or write request. Don't try to migrate it.
  1512. *
  1513. * FIXME: we could do this in principle, but we'll need a way to ensure
  1514. * that we can safely release the inode reference while holding
  1515. * the page lock.
  1516. */
  1517. if (PagePrivate(page))
  1518. return -EBUSY;
  1519. nfs_fscache_release_page(page, GFP_KERNEL);
  1520. return migrate_page(mapping, newpage, page, mode);
  1521. }
  1522. #endif
  1523. int __init nfs_init_writepagecache(void)
  1524. {
  1525. nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
  1526. sizeof(struct nfs_write_data),
  1527. 0, SLAB_HWCACHE_ALIGN,
  1528. NULL);
  1529. if (nfs_wdata_cachep == NULL)
  1530. return -ENOMEM;
  1531. nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
  1532. nfs_wdata_cachep);
  1533. if (nfs_wdata_mempool == NULL)
  1534. return -ENOMEM;
  1535. nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
  1536. sizeof(struct nfs_commit_data),
  1537. 0, SLAB_HWCACHE_ALIGN,
  1538. NULL);
  1539. if (nfs_cdata_cachep == NULL)
  1540. return -ENOMEM;
  1541. nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
  1542. nfs_wdata_cachep);
  1543. if (nfs_commit_mempool == NULL)
  1544. return -ENOMEM;
  1545. /*
  1546. * NFS congestion size, scale with available memory.
  1547. *
  1548. * 64MB: 8192k
  1549. * 128MB: 11585k
  1550. * 256MB: 16384k
  1551. * 512MB: 23170k
  1552. * 1GB: 32768k
  1553. * 2GB: 46340k
  1554. * 4GB: 65536k
  1555. * 8GB: 92681k
  1556. * 16GB: 131072k
  1557. *
  1558. * This allows larger machines to have larger/more transfers.
  1559. * Limit the default to 256M
  1560. */
  1561. nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
  1562. if (nfs_congestion_kb > 256*1024)
  1563. nfs_congestion_kb = 256*1024;
  1564. return 0;
  1565. }
  1566. void nfs_destroy_writepagecache(void)
  1567. {
  1568. mempool_destroy(nfs_commit_mempool);
  1569. mempool_destroy(nfs_wdata_mempool);
  1570. kmem_cache_destroy(nfs_wdata_cachep);
  1571. }