write.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827
  1. /* handling of writes to regular files and writing back to the server
  2. *
  3. * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/slab.h>
  12. #include <linux/fs.h>
  13. #include <linux/pagemap.h>
  14. #include <linux/writeback.h>
  15. #include <linux/pagevec.h>
  16. #include "internal.h"
  17. static int afs_write_back_from_locked_page(struct afs_writeback *wb,
  18. struct page *page);
  19. /*
  20. * mark a page as having been made dirty and thus needing writeback
  21. */
  22. int afs_set_page_dirty(struct page *page)
  23. {
  24. _enter("");
  25. return __set_page_dirty_nobuffers(page);
  26. }
  27. /*
  28. * unlink a writeback record because its usage has reached zero
  29. * - must be called with the wb->vnode->writeback_lock held
  30. */
  31. static void afs_unlink_writeback(struct afs_writeback *wb)
  32. {
  33. struct afs_writeback *front;
  34. struct afs_vnode *vnode = wb->vnode;
  35. list_del_init(&wb->link);
  36. if (!list_empty(&vnode->writebacks)) {
  37. /* if an fsync rises to the front of the queue then wake it
  38. * up */
  39. front = list_entry(vnode->writebacks.next,
  40. struct afs_writeback, link);
  41. if (front->state == AFS_WBACK_SYNCING) {
  42. _debug("wake up sync");
  43. front->state = AFS_WBACK_COMPLETE;
  44. wake_up(&front->waitq);
  45. }
  46. }
  47. }
  48. /*
  49. * free a writeback record
  50. */
  51. static void afs_free_writeback(struct afs_writeback *wb)
  52. {
  53. _enter("");
  54. key_put(wb->key);
  55. kfree(wb);
  56. }
  57. /*
  58. * dispose of a reference to a writeback record
  59. */
  60. void afs_put_writeback(struct afs_writeback *wb)
  61. {
  62. struct afs_vnode *vnode = wb->vnode;
  63. _enter("{%d}", wb->usage);
  64. spin_lock(&vnode->writeback_lock);
  65. if (--wb->usage == 0)
  66. afs_unlink_writeback(wb);
  67. else
  68. wb = NULL;
  69. spin_unlock(&vnode->writeback_lock);
  70. if (wb)
  71. afs_free_writeback(wb);
  72. }
  73. /*
  74. * partly or wholly fill a page that's under preparation for writing
  75. */
  76. static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
  77. unsigned start, unsigned len, struct page *page)
  78. {
  79. int ret;
  80. _enter(",,%u,%u", start, len);
  81. ASSERTCMP(start + len, <=, PAGE_SIZE);
  82. ret = afs_vnode_fetch_data(vnode, key, start, len, page);
  83. if (ret < 0) {
  84. if (ret == -ENOENT) {
  85. _debug("got NOENT from server"
  86. " - marking file deleted and stale");
  87. set_bit(AFS_VNODE_DELETED, &vnode->flags);
  88. ret = -ESTALE;
  89. }
  90. }
  91. _leave(" = %d", ret);
  92. return ret;
  93. }
  94. /*
  95. * prepare a page for being written to
  96. */
  97. static int afs_prepare_page(struct afs_vnode *vnode, struct page *page,
  98. struct key *key, unsigned offset, unsigned to)
  99. {
  100. unsigned eof, tail, start, stop, len;
  101. loff_t i_size, pos;
  102. void *p;
  103. int ret;
  104. _enter("");
  105. if (offset == 0 && to == PAGE_SIZE)
  106. return 0;
  107. p = kmap_atomic(page, KM_USER0);
  108. i_size = i_size_read(&vnode->vfs_inode);
  109. pos = (loff_t) page->index << PAGE_SHIFT;
  110. if (pos >= i_size) {
  111. /* partial write, page beyond EOF */
  112. _debug("beyond");
  113. if (offset > 0)
  114. memset(p, 0, offset);
  115. if (to < PAGE_SIZE)
  116. memset(p + to, 0, PAGE_SIZE - to);
  117. kunmap_atomic(p, KM_USER0);
  118. return 0;
  119. }
  120. if (i_size - pos >= PAGE_SIZE) {
  121. /* partial write, page entirely before EOF */
  122. _debug("before");
  123. tail = eof = PAGE_SIZE;
  124. } else {
  125. /* partial write, page overlaps EOF */
  126. eof = i_size - pos;
  127. _debug("overlap %u", eof);
  128. tail = max(eof, to);
  129. if (tail < PAGE_SIZE)
  130. memset(p + tail, 0, PAGE_SIZE - tail);
  131. if (offset > eof)
  132. memset(p + eof, 0, PAGE_SIZE - eof);
  133. }
  134. kunmap_atomic(p, KM_USER0);
  135. ret = 0;
  136. if (offset > 0 || eof > to) {
  137. /* need to fill one or two bits that aren't going to be written
  138. * (cover both fillers in one read if there are two) */
  139. start = (offset > 0) ? 0 : to;
  140. stop = (eof > to) ? eof : offset;
  141. len = stop - start;
  142. _debug("wr=%u-%u av=0-%u rd=%u@%u",
  143. offset, to, eof, start, len);
  144. ret = afs_fill_page(vnode, key, start, len, page);
  145. }
  146. _leave(" = %d", ret);
  147. return ret;
  148. }
  149. /*
  150. * prepare to perform part of a write to a page
  151. * - the caller holds the page locked, preventing it from being written out or
  152. * modified by anyone else
  153. */
  154. int afs_prepare_write(struct file *file, struct page *page,
  155. unsigned offset, unsigned to)
  156. {
  157. struct afs_writeback *candidate, *wb;
  158. struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
  159. struct key *key = file->private_data;
  160. pgoff_t index;
  161. int ret;
  162. _enter("{%x:%u},{%lx},%u,%u",
  163. vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
  164. candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
  165. if (!candidate)
  166. return -ENOMEM;
  167. candidate->vnode = vnode;
  168. candidate->first = candidate->last = page->index;
  169. candidate->offset_first = offset;
  170. candidate->to_last = to;
  171. candidate->usage = 1;
  172. candidate->state = AFS_WBACK_PENDING;
  173. init_waitqueue_head(&candidate->waitq);
  174. if (!PageUptodate(page)) {
  175. _debug("not up to date");
  176. ret = afs_prepare_page(vnode, page, key, offset, to);
  177. if (ret < 0) {
  178. kfree(candidate);
  179. _leave(" = %d [prep]", ret);
  180. return ret;
  181. }
  182. SetPageUptodate(page);
  183. }
  184. try_again:
  185. index = page->index;
  186. spin_lock(&vnode->writeback_lock);
  187. /* see if this page is already pending a writeback under a suitable key
  188. * - if so we can just join onto that one */
  189. wb = (struct afs_writeback *) page_private(page);
  190. if (wb) {
  191. if (wb->key == key && wb->state == AFS_WBACK_PENDING)
  192. goto subsume_in_current_wb;
  193. goto flush_conflicting_wb;
  194. }
  195. if (index > 0) {
  196. /* see if we can find an already pending writeback that we can
  197. * append this page to */
  198. list_for_each_entry(wb, &vnode->writebacks, link) {
  199. if (wb->last == index - 1 && wb->key == key &&
  200. wb->state == AFS_WBACK_PENDING)
  201. goto append_to_previous_wb;
  202. }
  203. }
  204. list_add_tail(&candidate->link, &vnode->writebacks);
  205. candidate->key = key_get(key);
  206. spin_unlock(&vnode->writeback_lock);
  207. SetPagePrivate(page);
  208. set_page_private(page, (unsigned long) candidate);
  209. _leave(" = 0 [new]");
  210. return 0;
  211. subsume_in_current_wb:
  212. _debug("subsume");
  213. ASSERTRANGE(wb->first, <=, index, <=, wb->last);
  214. if (index == wb->first && offset < wb->offset_first)
  215. wb->offset_first = offset;
  216. if (index == wb->last && to > wb->to_last)
  217. wb->to_last = to;
  218. spin_unlock(&vnode->writeback_lock);
  219. kfree(candidate);
  220. _leave(" = 0 [sub]");
  221. return 0;
  222. append_to_previous_wb:
  223. _debug("append into %lx-%lx", wb->first, wb->last);
  224. wb->usage++;
  225. wb->last++;
  226. wb->to_last = to;
  227. spin_unlock(&vnode->writeback_lock);
  228. SetPagePrivate(page);
  229. set_page_private(page, (unsigned long) wb);
  230. kfree(candidate);
  231. _leave(" = 0 [app]");
  232. return 0;
  233. /* the page is currently bound to another context, so if it's dirty we
  234. * need to flush it before we can use the new context */
  235. flush_conflicting_wb:
  236. _debug("flush conflict");
  237. if (wb->state == AFS_WBACK_PENDING)
  238. wb->state = AFS_WBACK_CONFLICTING;
  239. spin_unlock(&vnode->writeback_lock);
  240. if (PageDirty(page)) {
  241. ret = afs_write_back_from_locked_page(wb, page);
  242. if (ret < 0) {
  243. afs_put_writeback(candidate);
  244. _leave(" = %d", ret);
  245. return ret;
  246. }
  247. }
  248. /* the page holds a ref on the writeback record */
  249. afs_put_writeback(wb);
  250. set_page_private(page, 0);
  251. ClearPagePrivate(page);
  252. goto try_again;
  253. }
  254. /*
  255. * finalise part of a write to a page
  256. */
  257. int afs_commit_write(struct file *file, struct page *page,
  258. unsigned offset, unsigned to)
  259. {
  260. struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
  261. loff_t i_size, maybe_i_size;
  262. _enter("{%x:%u},{%lx},%u,%u",
  263. vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
  264. maybe_i_size = (loff_t) page->index << PAGE_SHIFT;
  265. maybe_i_size += to;
  266. i_size = i_size_read(&vnode->vfs_inode);
  267. if (maybe_i_size > i_size) {
  268. spin_lock(&vnode->writeback_lock);
  269. i_size = i_size_read(&vnode->vfs_inode);
  270. if (maybe_i_size > i_size)
  271. i_size_write(&vnode->vfs_inode, maybe_i_size);
  272. spin_unlock(&vnode->writeback_lock);
  273. }
  274. set_page_dirty(page);
  275. if (PageDirty(page))
  276. _debug("dirtied");
  277. return 0;
  278. }
  279. /*
  280. * kill all the pages in the given range
  281. */
  282. static void afs_kill_pages(struct afs_vnode *vnode, bool error,
  283. pgoff_t first, pgoff_t last)
  284. {
  285. struct pagevec pv;
  286. unsigned count, loop;
  287. _enter("{%x:%u},%lx-%lx",
  288. vnode->fid.vid, vnode->fid.vnode, first, last);
  289. pagevec_init(&pv, 0);
  290. do {
  291. _debug("kill %lx-%lx", first, last);
  292. count = last - first + 1;
  293. if (count > PAGEVEC_SIZE)
  294. count = PAGEVEC_SIZE;
  295. pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
  296. first, count, pv.pages);
  297. ASSERTCMP(pv.nr, ==, count);
  298. for (loop = 0; loop < count; loop++) {
  299. ClearPageUptodate(pv.pages[loop]);
  300. if (error)
  301. SetPageError(pv.pages[loop]);
  302. end_page_writeback(pv.pages[loop]);
  303. }
  304. __pagevec_release(&pv);
  305. } while (first < last);
  306. _leave("");
  307. }
  308. /*
  309. * synchronously write back the locked page and any subsequent non-locked dirty
  310. * pages also covered by the same writeback record
  311. */
  312. static int afs_write_back_from_locked_page(struct afs_writeback *wb,
  313. struct page *primary_page)
  314. {
  315. struct page *pages[8], *page;
  316. unsigned long count;
  317. unsigned n, offset, to;
  318. pgoff_t start, first, last;
  319. int loop, ret;
  320. _enter(",%lx", primary_page->index);
  321. count = 1;
  322. if (!clear_page_dirty_for_io(primary_page))
  323. BUG();
  324. if (test_set_page_writeback(primary_page))
  325. BUG();
  326. /* find all consecutive lockable dirty pages, stopping when we find a
  327. * page that is not immediately lockable, is not dirty or is missing,
  328. * or we reach the end of the range */
  329. start = primary_page->index;
  330. if (start >= wb->last)
  331. goto no_more;
  332. start++;
  333. do {
  334. _debug("more %lx [%lx]", start, count);
  335. n = wb->last - start + 1;
  336. if (n > ARRAY_SIZE(pages))
  337. n = ARRAY_SIZE(pages);
  338. n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
  339. start, n, pages);
  340. _debug("fgpc %u", n);
  341. if (n == 0)
  342. goto no_more;
  343. if (pages[0]->index != start) {
  344. for (n--; n >= 0; n--)
  345. put_page(pages[n]);
  346. goto no_more;
  347. }
  348. for (loop = 0; loop < n; loop++) {
  349. page = pages[loop];
  350. if (page->index > wb->last)
  351. break;
  352. if (TestSetPageLocked(page))
  353. break;
  354. if (!PageDirty(page) ||
  355. page_private(page) != (unsigned long) wb) {
  356. unlock_page(page);
  357. break;
  358. }
  359. if (!clear_page_dirty_for_io(page))
  360. BUG();
  361. if (test_set_page_writeback(page))
  362. BUG();
  363. unlock_page(page);
  364. put_page(page);
  365. }
  366. count += loop;
  367. if (loop < n) {
  368. for (; loop < n; loop++)
  369. put_page(pages[loop]);
  370. goto no_more;
  371. }
  372. start += loop;
  373. } while (start <= wb->last && count < 65536);
  374. no_more:
  375. /* we now have a contiguous set of dirty pages, each with writeback set
  376. * and the dirty mark cleared; the first page is locked and must remain
  377. * so, all the rest are unlocked */
  378. first = primary_page->index;
  379. last = first + count - 1;
  380. offset = (first == wb->first) ? wb->offset_first : 0;
  381. to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
  382. _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
  383. ret = afs_vnode_store_data(wb, first, last, offset, to);
  384. if (ret < 0) {
  385. switch (ret) {
  386. case -EDQUOT:
  387. case -ENOSPC:
  388. set_bit(AS_ENOSPC,
  389. &wb->vnode->vfs_inode.i_mapping->flags);
  390. break;
  391. case -EROFS:
  392. case -EIO:
  393. case -EREMOTEIO:
  394. case -EFBIG:
  395. case -ENOENT:
  396. case -ENOMEDIUM:
  397. case -ENXIO:
  398. afs_kill_pages(wb->vnode, true, first, last);
  399. set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags);
  400. break;
  401. case -EACCES:
  402. case -EPERM:
  403. case -ENOKEY:
  404. case -EKEYEXPIRED:
  405. case -EKEYREJECTED:
  406. case -EKEYREVOKED:
  407. afs_kill_pages(wb->vnode, false, first, last);
  408. break;
  409. default:
  410. break;
  411. }
  412. } else {
  413. ret = count;
  414. }
  415. _leave(" = %d", ret);
  416. return ret;
  417. }
  418. /*
  419. * write a page back to the server
  420. * - the caller locked the page for us
  421. */
  422. int afs_writepage(struct page *page, struct writeback_control *wbc)
  423. {
  424. struct backing_dev_info *bdi = page->mapping->backing_dev_info;
  425. struct afs_writeback *wb;
  426. int ret;
  427. _enter("{%lx},", page->index);
  428. wb = (struct afs_writeback *) page_private(page);
  429. ASSERT(wb != NULL);
  430. ret = afs_write_back_from_locked_page(wb, page);
  431. unlock_page(page);
  432. if (ret < 0) {
  433. _leave(" = %d", ret);
  434. return 0;
  435. }
  436. wbc->nr_to_write -= ret;
  437. if (wbc->nonblocking && bdi_write_congested(bdi))
  438. wbc->encountered_congestion = 1;
  439. _leave(" = 0");
  440. return 0;
  441. }
  442. /*
  443. * write a region of pages back to the server
  444. */
  445. int afs_writepages_region(struct address_space *mapping,
  446. struct writeback_control *wbc,
  447. pgoff_t index, pgoff_t end, pgoff_t *_next)
  448. {
  449. struct backing_dev_info *bdi = mapping->backing_dev_info;
  450. struct afs_writeback *wb;
  451. struct page *page;
  452. int ret, n;
  453. _enter(",,%lx,%lx,", index, end);
  454. do {
  455. n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
  456. 1, &page);
  457. if (!n)
  458. break;
  459. _debug("wback %lx", page->index);
  460. if (page->index > end) {
  461. *_next = index;
  462. page_cache_release(page);
  463. _leave(" = 0 [%lx]", *_next);
  464. return 0;
  465. }
  466. /* at this point we hold neither mapping->tree_lock nor lock on
  467. * the page itself: the page may be truncated or invalidated
  468. * (changing page->mapping to NULL), or even swizzled back from
  469. * swapper_space to tmpfs file mapping
  470. */
  471. lock_page(page);
  472. if (page->mapping != mapping) {
  473. unlock_page(page);
  474. page_cache_release(page);
  475. continue;
  476. }
  477. if (wbc->sync_mode != WB_SYNC_NONE)
  478. wait_on_page_writeback(page);
  479. if (PageWriteback(page) || !PageDirty(page)) {
  480. unlock_page(page);
  481. continue;
  482. }
  483. wb = (struct afs_writeback *) page_private(page);
  484. ASSERT(wb != NULL);
  485. spin_lock(&wb->vnode->writeback_lock);
  486. wb->state = AFS_WBACK_WRITING;
  487. spin_unlock(&wb->vnode->writeback_lock);
  488. ret = afs_write_back_from_locked_page(wb, page);
  489. unlock_page(page);
  490. page_cache_release(page);
  491. if (ret < 0) {
  492. _leave(" = %d", ret);
  493. return ret;
  494. }
  495. wbc->nr_to_write -= ret;
  496. if (wbc->nonblocking && bdi_write_congested(bdi)) {
  497. wbc->encountered_congestion = 1;
  498. break;
  499. }
  500. cond_resched();
  501. } while (index < end && wbc->nr_to_write > 0);
  502. *_next = index;
  503. _leave(" = 0 [%lx]", *_next);
  504. return 0;
  505. }
  506. /*
  507. * write some of the pending data back to the server
  508. */
  509. int afs_writepages(struct address_space *mapping,
  510. struct writeback_control *wbc)
  511. {
  512. struct backing_dev_info *bdi = mapping->backing_dev_info;
  513. pgoff_t start, end, next;
  514. int ret;
  515. _enter("");
  516. if (wbc->nonblocking && bdi_write_congested(bdi)) {
  517. wbc->encountered_congestion = 1;
  518. _leave(" = 0 [congest]");
  519. return 0;
  520. }
  521. if (wbc->range_cyclic) {
  522. start = mapping->writeback_index;
  523. end = -1;
  524. ret = afs_writepages_region(mapping, wbc, start, end, &next);
  525. if (start > 0 && wbc->nr_to_write > 0 && ret == 0 &&
  526. !(wbc->nonblocking && wbc->encountered_congestion))
  527. ret = afs_writepages_region(mapping, wbc, 0, start,
  528. &next);
  529. mapping->writeback_index = next;
  530. } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
  531. end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
  532. ret = afs_writepages_region(mapping, wbc, 0, end, &next);
  533. if (wbc->nr_to_write > 0)
  534. mapping->writeback_index = next;
  535. } else {
  536. start = wbc->range_start >> PAGE_CACHE_SHIFT;
  537. end = wbc->range_end >> PAGE_CACHE_SHIFT;
  538. ret = afs_writepages_region(mapping, wbc, start, end, &next);
  539. }
  540. _leave(" = %d", ret);
  541. return ret;
  542. }
  543. /*
  544. * write an inode back
  545. */
  546. int afs_write_inode(struct inode *inode, int sync)
  547. {
  548. struct afs_vnode *vnode = AFS_FS_I(inode);
  549. int ret;
  550. _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
  551. ret = 0;
  552. if (sync) {
  553. ret = filemap_fdatawait(inode->i_mapping);
  554. if (ret < 0)
  555. __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
  556. }
  557. _leave(" = %d", ret);
  558. return ret;
  559. }
  560. /*
  561. * completion of write to server
  562. */
  563. void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
  564. {
  565. struct afs_writeback *wb = call->wb;
  566. struct pagevec pv;
  567. unsigned count, loop;
  568. pgoff_t first = call->first, last = call->last;
  569. bool free_wb;
  570. _enter("{%x:%u},{%lx-%lx}",
  571. vnode->fid.vid, vnode->fid.vnode, first, last);
  572. ASSERT(wb != NULL);
  573. pagevec_init(&pv, 0);
  574. do {
  575. _debug("done %lx-%lx", first, last);
  576. count = last - first + 1;
  577. if (count > PAGEVEC_SIZE)
  578. count = PAGEVEC_SIZE;
  579. pv.nr = find_get_pages_contig(call->mapping, first, count,
  580. pv.pages);
  581. ASSERTCMP(pv.nr, ==, count);
  582. spin_lock(&vnode->writeback_lock);
  583. for (loop = 0; loop < count; loop++) {
  584. struct page *page = pv.pages[loop];
  585. end_page_writeback(page);
  586. if (page_private(page) == (unsigned long) wb) {
  587. set_page_private(page, 0);
  588. ClearPagePrivate(page);
  589. wb->usage--;
  590. }
  591. }
  592. free_wb = false;
  593. if (wb->usage == 0) {
  594. afs_unlink_writeback(wb);
  595. free_wb = true;
  596. }
  597. spin_unlock(&vnode->writeback_lock);
  598. first += count;
  599. if (free_wb) {
  600. afs_free_writeback(wb);
  601. wb = NULL;
  602. }
  603. __pagevec_release(&pv);
  604. } while (first <= last);
  605. _leave("");
  606. }
  607. /*
  608. * write to an AFS file
  609. */
  610. ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
  611. unsigned long nr_segs, loff_t pos)
  612. {
  613. struct dentry *dentry = iocb->ki_filp->f_path.dentry;
  614. struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
  615. ssize_t result;
  616. size_t count = iov_length(iov, nr_segs);
  617. int ret;
  618. _enter("{%x.%u},{%zu},%lu,",
  619. vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
  620. if (IS_SWAPFILE(&vnode->vfs_inode)) {
  621. printk(KERN_INFO
  622. "AFS: Attempt to write to active swap file!\n");
  623. return -EBUSY;
  624. }
  625. if (!count)
  626. return 0;
  627. result = generic_file_aio_write(iocb, iov, nr_segs, pos);
  628. if (IS_ERR_VALUE(result)) {
  629. _leave(" = %zd", result);
  630. return result;
  631. }
  632. /* return error values for O_SYNC and IS_SYNC() */
  633. if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) {
  634. ret = afs_fsync(iocb->ki_filp, dentry, 1);
  635. if (ret < 0)
  636. result = ret;
  637. }
  638. _leave(" = %zd", result);
  639. return result;
  640. }
  641. /*
  642. * flush the vnode to the fileserver
  643. */
  644. int afs_writeback_all(struct afs_vnode *vnode)
  645. {
  646. struct address_space *mapping = vnode->vfs_inode.i_mapping;
  647. struct writeback_control wbc = {
  648. .bdi = mapping->backing_dev_info,
  649. .sync_mode = WB_SYNC_ALL,
  650. .nr_to_write = LONG_MAX,
  651. .for_writepages = 1,
  652. .range_cyclic = 1,
  653. };
  654. int ret;
  655. _enter("");
  656. ret = mapping->a_ops->writepages(mapping, &wbc);
  657. __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
  658. _leave(" = %d", ret);
  659. return ret;
  660. }
  661. /*
  662. * flush any dirty pages for this process, and check for write errors.
  663. * - the return status from this call provides a reliable indication of
  664. * whether any write errors occurred for this process.
  665. */
  666. int afs_fsync(struct file *file, struct dentry *dentry, int datasync)
  667. {
  668. struct afs_writeback *wb, *xwb;
  669. struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
  670. int ret;
  671. _enter("{%x:%u},{n=%s},%d",
  672. vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
  673. datasync);
  674. /* use a writeback record as a marker in the queue - when this reaches
  675. * the front of the queue, all the outstanding writes are either
  676. * completed or rejected */
  677. wb = kzalloc(sizeof(*wb), GFP_KERNEL);
  678. if (!wb)
  679. return -ENOMEM;
  680. wb->vnode = vnode;
  681. wb->first = 0;
  682. wb->last = -1;
  683. wb->offset_first = 0;
  684. wb->to_last = PAGE_SIZE;
  685. wb->usage = 1;
  686. wb->state = AFS_WBACK_SYNCING;
  687. init_waitqueue_head(&wb->waitq);
  688. spin_lock(&vnode->writeback_lock);
  689. list_for_each_entry(xwb, &vnode->writebacks, link) {
  690. if (xwb->state == AFS_WBACK_PENDING)
  691. xwb->state = AFS_WBACK_CONFLICTING;
  692. }
  693. list_add_tail(&wb->link, &vnode->writebacks);
  694. spin_unlock(&vnode->writeback_lock);
  695. /* push all the outstanding writebacks to the server */
  696. ret = afs_writeback_all(vnode);
  697. if (ret < 0) {
  698. afs_put_writeback(wb);
  699. _leave(" = %d [wb]", ret);
  700. return ret;
  701. }
  702. /* wait for the preceding writes to actually complete */
  703. ret = wait_event_interruptible(wb->waitq,
  704. wb->state == AFS_WBACK_COMPLETE ||
  705. vnode->writebacks.next == &wb->link);
  706. afs_put_writeback(wb);
  707. _leave(" = %d", ret);
  708. return ret;
  709. }