write.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827
  1. /* handling of writes to regular files and writing back to the server
  2. *
  3. * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/slab.h>
  12. #include <linux/fs.h>
  13. #include <linux/pagemap.h>
  14. #include <linux/writeback.h>
  15. #include <linux/pagevec.h>
  16. #include "internal.h"
  17. static int afs_write_back_from_locked_page(struct afs_writeback *wb,
  18. struct page *page);
  19. /*
  20. * mark a page as having been made dirty and thus needing writeback
  21. */
  22. int afs_set_page_dirty(struct page *page)
  23. {
  24. _enter("");
  25. return __set_page_dirty_nobuffers(page);
  26. }
  27. /*
  28. * unlink a writeback record because its usage has reached zero
  29. * - must be called with the wb->vnode->writeback_lock held
  30. */
  31. static void afs_unlink_writeback(struct afs_writeback *wb)
  32. {
  33. struct afs_writeback *front;
  34. struct afs_vnode *vnode = wb->vnode;
  35. list_del_init(&wb->link);
  36. if (!list_empty(&vnode->writebacks)) {
  37. /* if an fsync rises to the front of the queue then wake it
  38. * up */
  39. front = list_entry(vnode->writebacks.next,
  40. struct afs_writeback, link);
  41. if (front->state == AFS_WBACK_SYNCING) {
  42. _debug("wake up sync");
  43. front->state = AFS_WBACK_COMPLETE;
  44. wake_up(&front->waitq);
  45. }
  46. }
  47. }
  48. /*
  49. * free a writeback record
  50. */
  51. static void afs_free_writeback(struct afs_writeback *wb)
  52. {
  53. _enter("");
  54. key_put(wb->key);
  55. kfree(wb);
  56. }
  57. /*
  58. * dispose of a reference to a writeback record
  59. */
  60. void afs_put_writeback(struct afs_writeback *wb)
  61. {
  62. struct afs_vnode *vnode = wb->vnode;
  63. _enter("{%d}", wb->usage);
  64. spin_lock(&vnode->writeback_lock);
  65. if (--wb->usage == 0)
  66. afs_unlink_writeback(wb);
  67. else
  68. wb = NULL;
  69. spin_unlock(&vnode->writeback_lock);
  70. if (wb)
  71. afs_free_writeback(wb);
  72. }
  73. /*
  74. * partly or wholly fill a page that's under preparation for writing
  75. */
  76. static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
  77. unsigned start, unsigned len, struct page *page)
  78. {
  79. int ret;
  80. _enter(",,%u,%u", start, len);
  81. ASSERTCMP(start + len, <=, PAGE_SIZE);
  82. ret = afs_vnode_fetch_data(vnode, key, start, len, page);
  83. if (ret < 0) {
  84. if (ret == -ENOENT) {
  85. _debug("got NOENT from server"
  86. " - marking file deleted and stale");
  87. set_bit(AFS_VNODE_DELETED, &vnode->flags);
  88. ret = -ESTALE;
  89. }
  90. }
  91. _leave(" = %d", ret);
  92. return ret;
  93. }
  94. /*
  95. * prepare a page for being written to
  96. */
  97. static int afs_prepare_page(struct afs_vnode *vnode, struct page *page,
  98. struct key *key, unsigned offset, unsigned to)
  99. {
  100. unsigned eof, tail, start, stop, len;
  101. loff_t i_size, pos;
  102. void *p;
  103. int ret;
  104. _enter("");
  105. if (offset == 0 && to == PAGE_SIZE)
  106. return 0;
  107. p = kmap_atomic(page, KM_USER0);
  108. i_size = i_size_read(&vnode->vfs_inode);
  109. pos = (loff_t) page->index << PAGE_SHIFT;
  110. if (pos >= i_size) {
  111. /* partial write, page beyond EOF */
  112. _debug("beyond");
  113. if (offset > 0)
  114. memset(p, 0, offset);
  115. if (to < PAGE_SIZE)
  116. memset(p + to, 0, PAGE_SIZE - to);
  117. kunmap_atomic(p, KM_USER0);
  118. return 0;
  119. }
  120. if (i_size - pos >= PAGE_SIZE) {
  121. /* partial write, page entirely before EOF */
  122. _debug("before");
  123. tail = eof = PAGE_SIZE;
  124. } else {
  125. /* partial write, page overlaps EOF */
  126. eof = i_size - pos;
  127. _debug("overlap %u", eof);
  128. tail = max(eof, to);
  129. if (tail < PAGE_SIZE)
  130. memset(p + tail, 0, PAGE_SIZE - tail);
  131. if (offset > eof)
  132. memset(p + eof, 0, PAGE_SIZE - eof);
  133. }
  134. kunmap_atomic(p, KM_USER0);
  135. ret = 0;
  136. if (offset > 0 || eof > to) {
  137. /* need to fill one or two bits that aren't going to be written
  138. * (cover both fillers in one read if there are two) */
  139. start = (offset > 0) ? 0 : to;
  140. stop = (eof > to) ? eof : offset;
  141. len = stop - start;
  142. _debug("wr=%u-%u av=0-%u rd=%u@%u",
  143. offset, to, eof, start, len);
  144. ret = afs_fill_page(vnode, key, start, len, page);
  145. }
  146. _leave(" = %d", ret);
  147. return ret;
  148. }
  149. /*
  150. * prepare to perform part of a write to a page
  151. * - the caller holds the page locked, preventing it from being written out or
  152. * modified by anyone else
  153. */
  154. int afs_prepare_write(struct file *file, struct page *page,
  155. unsigned offset, unsigned to)
  156. {
  157. struct afs_writeback *candidate, *wb;
  158. struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
  159. struct key *key = file->private_data;
  160. pgoff_t index;
  161. int ret;
  162. _enter("{%x:%u},{%lx},%u,%u",
  163. vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
  164. candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
  165. if (!candidate)
  166. return -ENOMEM;
  167. candidate->vnode = vnode;
  168. candidate->first = candidate->last = page->index;
  169. candidate->offset_first = offset;
  170. candidate->to_last = to;
  171. candidate->usage = 1;
  172. candidate->state = AFS_WBACK_PENDING;
  173. init_waitqueue_head(&candidate->waitq);
  174. if (!PageUptodate(page)) {
  175. _debug("not up to date");
  176. ret = afs_prepare_page(vnode, page, key, offset, to);
  177. if (ret < 0) {
  178. kfree(candidate);
  179. _leave(" = %d [prep]", ret);
  180. return ret;
  181. }
  182. }
  183. try_again:
  184. index = page->index;
  185. spin_lock(&vnode->writeback_lock);
  186. /* see if this page is already pending a writeback under a suitable key
  187. * - if so we can just join onto that one */
  188. wb = (struct afs_writeback *) page_private(page);
  189. if (wb) {
  190. if (wb->key == key && wb->state == AFS_WBACK_PENDING)
  191. goto subsume_in_current_wb;
  192. goto flush_conflicting_wb;
  193. }
  194. if (index > 0) {
  195. /* see if we can find an already pending writeback that we can
  196. * append this page to */
  197. list_for_each_entry(wb, &vnode->writebacks, link) {
  198. if (wb->last == index - 1 && wb->key == key &&
  199. wb->state == AFS_WBACK_PENDING)
  200. goto append_to_previous_wb;
  201. }
  202. }
  203. list_add_tail(&candidate->link, &vnode->writebacks);
  204. candidate->key = key_get(key);
  205. spin_unlock(&vnode->writeback_lock);
  206. SetPagePrivate(page);
  207. set_page_private(page, (unsigned long) candidate);
  208. _leave(" = 0 [new]");
  209. return 0;
  210. subsume_in_current_wb:
  211. _debug("subsume");
  212. ASSERTRANGE(wb->first, <=, index, <=, wb->last);
  213. if (index == wb->first && offset < wb->offset_first)
  214. wb->offset_first = offset;
  215. if (index == wb->last && to > wb->to_last)
  216. wb->to_last = to;
  217. spin_unlock(&vnode->writeback_lock);
  218. kfree(candidate);
  219. _leave(" = 0 [sub]");
  220. return 0;
  221. append_to_previous_wb:
  222. _debug("append into %lx-%lx", wb->first, wb->last);
  223. wb->usage++;
  224. wb->last++;
  225. wb->to_last = to;
  226. spin_unlock(&vnode->writeback_lock);
  227. SetPagePrivate(page);
  228. set_page_private(page, (unsigned long) wb);
  229. kfree(candidate);
  230. _leave(" = 0 [app]");
  231. return 0;
  232. /* the page is currently bound to another context, so if it's dirty we
  233. * need to flush it before we can use the new context */
  234. flush_conflicting_wb:
  235. _debug("flush conflict");
  236. if (wb->state == AFS_WBACK_PENDING)
  237. wb->state = AFS_WBACK_CONFLICTING;
  238. spin_unlock(&vnode->writeback_lock);
  239. if (PageDirty(page)) {
  240. ret = afs_write_back_from_locked_page(wb, page);
  241. if (ret < 0) {
  242. afs_put_writeback(candidate);
  243. _leave(" = %d", ret);
  244. return ret;
  245. }
  246. }
  247. /* the page holds a ref on the writeback record */
  248. afs_put_writeback(wb);
  249. set_page_private(page, 0);
  250. ClearPagePrivate(page);
  251. goto try_again;
  252. }
  253. /*
  254. * finalise part of a write to a page
  255. */
  256. int afs_commit_write(struct file *file, struct page *page,
  257. unsigned offset, unsigned to)
  258. {
  259. struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
  260. loff_t i_size, maybe_i_size;
  261. _enter("{%x:%u},{%lx},%u,%u",
  262. vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
  263. maybe_i_size = (loff_t) page->index << PAGE_SHIFT;
  264. maybe_i_size += to;
  265. i_size = i_size_read(&vnode->vfs_inode);
  266. if (maybe_i_size > i_size) {
  267. spin_lock(&vnode->writeback_lock);
  268. i_size = i_size_read(&vnode->vfs_inode);
  269. if (maybe_i_size > i_size)
  270. i_size_write(&vnode->vfs_inode, maybe_i_size);
  271. spin_unlock(&vnode->writeback_lock);
  272. }
  273. SetPageUptodate(page);
  274. set_page_dirty(page);
  275. if (PageDirty(page))
  276. _debug("dirtied");
  277. return 0;
  278. }
  279. /*
  280. * kill all the pages in the given range
  281. */
  282. static void afs_kill_pages(struct afs_vnode *vnode, bool error,
  283. pgoff_t first, pgoff_t last)
  284. {
  285. struct pagevec pv;
  286. unsigned count, loop;
  287. _enter("{%x:%u},%lx-%lx",
  288. vnode->fid.vid, vnode->fid.vnode, first, last);
  289. pagevec_init(&pv, 0);
  290. do {
  291. _debug("kill %lx-%lx", first, last);
  292. count = last - first + 1;
  293. if (count > PAGEVEC_SIZE)
  294. count = PAGEVEC_SIZE;
  295. pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
  296. first, count, pv.pages);
  297. ASSERTCMP(pv.nr, ==, count);
  298. for (loop = 0; loop < count; loop++) {
  299. ClearPageUptodate(pv.pages[loop]);
  300. if (error)
  301. SetPageError(pv.pages[loop]);
  302. end_page_writeback(pv.pages[loop]);
  303. }
  304. __pagevec_release(&pv);
  305. } while (first < last);
  306. _leave("");
  307. }
  308. /*
  309. * synchronously write back the locked page and any subsequent non-locked dirty
  310. * pages also covered by the same writeback record
  311. */
  312. static int afs_write_back_from_locked_page(struct afs_writeback *wb,
  313. struct page *primary_page)
  314. {
  315. struct page *pages[8], *page;
  316. unsigned long count;
  317. unsigned n, offset, to;
  318. pgoff_t start, first, last;
  319. int loop, ret;
  320. _enter(",%lx", primary_page->index);
  321. count = 1;
  322. if (!clear_page_dirty_for_io(primary_page))
  323. BUG();
  324. if (test_set_page_writeback(primary_page))
  325. BUG();
  326. /* find all consecutive lockable dirty pages, stopping when we find a
  327. * page that is not immediately lockable, is not dirty or is missing,
  328. * or we reach the end of the range */
  329. start = primary_page->index;
  330. if (start >= wb->last)
  331. goto no_more;
  332. start++;
  333. do {
  334. _debug("more %lx [%lx]", start, count);
  335. n = wb->last - start + 1;
  336. if (n > ARRAY_SIZE(pages))
  337. n = ARRAY_SIZE(pages);
  338. n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
  339. start, n, pages);
  340. _debug("fgpc %u", n);
  341. if (n == 0)
  342. goto no_more;
  343. if (pages[0]->index != start) {
  344. do {
  345. put_page(pages[--n]);
  346. } while (n > 0);
  347. goto no_more;
  348. }
  349. for (loop = 0; loop < n; loop++) {
  350. page = pages[loop];
  351. if (page->index > wb->last)
  352. break;
  353. if (TestSetPageLocked(page))
  354. break;
  355. if (!PageDirty(page) ||
  356. page_private(page) != (unsigned long) wb) {
  357. unlock_page(page);
  358. break;
  359. }
  360. if (!clear_page_dirty_for_io(page))
  361. BUG();
  362. if (test_set_page_writeback(page))
  363. BUG();
  364. unlock_page(page);
  365. put_page(page);
  366. }
  367. count += loop;
  368. if (loop < n) {
  369. for (; loop < n; loop++)
  370. put_page(pages[loop]);
  371. goto no_more;
  372. }
  373. start += loop;
  374. } while (start <= wb->last && count < 65536);
  375. no_more:
  376. /* we now have a contiguous set of dirty pages, each with writeback set
  377. * and the dirty mark cleared; the first page is locked and must remain
  378. * so, all the rest are unlocked */
  379. first = primary_page->index;
  380. last = first + count - 1;
  381. offset = (first == wb->first) ? wb->offset_first : 0;
  382. to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
  383. _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
  384. ret = afs_vnode_store_data(wb, first, last, offset, to);
  385. if (ret < 0) {
  386. switch (ret) {
  387. case -EDQUOT:
  388. case -ENOSPC:
  389. set_bit(AS_ENOSPC,
  390. &wb->vnode->vfs_inode.i_mapping->flags);
  391. break;
  392. case -EROFS:
  393. case -EIO:
  394. case -EREMOTEIO:
  395. case -EFBIG:
  396. case -ENOENT:
  397. case -ENOMEDIUM:
  398. case -ENXIO:
  399. afs_kill_pages(wb->vnode, true, first, last);
  400. set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags);
  401. break;
  402. case -EACCES:
  403. case -EPERM:
  404. case -ENOKEY:
  405. case -EKEYEXPIRED:
  406. case -EKEYREJECTED:
  407. case -EKEYREVOKED:
  408. afs_kill_pages(wb->vnode, false, first, last);
  409. break;
  410. default:
  411. break;
  412. }
  413. } else {
  414. ret = count;
  415. }
  416. _leave(" = %d", ret);
  417. return ret;
  418. }
  419. /*
  420. * write a page back to the server
  421. * - the caller locked the page for us
  422. */
  423. int afs_writepage(struct page *page, struct writeback_control *wbc)
  424. {
  425. struct backing_dev_info *bdi = page->mapping->backing_dev_info;
  426. struct afs_writeback *wb;
  427. int ret;
  428. _enter("{%lx},", page->index);
  429. wb = (struct afs_writeback *) page_private(page);
  430. ASSERT(wb != NULL);
  431. ret = afs_write_back_from_locked_page(wb, page);
  432. unlock_page(page);
  433. if (ret < 0) {
  434. _leave(" = %d", ret);
  435. return 0;
  436. }
  437. wbc->nr_to_write -= ret;
  438. if (wbc->nonblocking && bdi_write_congested(bdi))
  439. wbc->encountered_congestion = 1;
  440. _leave(" = 0");
  441. return 0;
  442. }
  443. /*
  444. * write a region of pages back to the server
  445. */
  446. int afs_writepages_region(struct address_space *mapping,
  447. struct writeback_control *wbc,
  448. pgoff_t index, pgoff_t end, pgoff_t *_next)
  449. {
  450. struct backing_dev_info *bdi = mapping->backing_dev_info;
  451. struct afs_writeback *wb;
  452. struct page *page;
  453. int ret, n;
  454. _enter(",,%lx,%lx,", index, end);
  455. do {
  456. n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
  457. 1, &page);
  458. if (!n)
  459. break;
  460. _debug("wback %lx", page->index);
  461. if (page->index > end) {
  462. *_next = index;
  463. page_cache_release(page);
  464. _leave(" = 0 [%lx]", *_next);
  465. return 0;
  466. }
  467. /* at this point we hold neither mapping->tree_lock nor lock on
  468. * the page itself: the page may be truncated or invalidated
  469. * (changing page->mapping to NULL), or even swizzled back from
  470. * swapper_space to tmpfs file mapping
  471. */
  472. lock_page(page);
  473. if (page->mapping != mapping) {
  474. unlock_page(page);
  475. page_cache_release(page);
  476. continue;
  477. }
  478. if (wbc->sync_mode != WB_SYNC_NONE)
  479. wait_on_page_writeback(page);
  480. if (PageWriteback(page) || !PageDirty(page)) {
  481. unlock_page(page);
  482. continue;
  483. }
  484. wb = (struct afs_writeback *) page_private(page);
  485. ASSERT(wb != NULL);
  486. spin_lock(&wb->vnode->writeback_lock);
  487. wb->state = AFS_WBACK_WRITING;
  488. spin_unlock(&wb->vnode->writeback_lock);
  489. ret = afs_write_back_from_locked_page(wb, page);
  490. unlock_page(page);
  491. page_cache_release(page);
  492. if (ret < 0) {
  493. _leave(" = %d", ret);
  494. return ret;
  495. }
  496. wbc->nr_to_write -= ret;
  497. if (wbc->nonblocking && bdi_write_congested(bdi)) {
  498. wbc->encountered_congestion = 1;
  499. break;
  500. }
  501. cond_resched();
  502. } while (index < end && wbc->nr_to_write > 0);
  503. *_next = index;
  504. _leave(" = 0 [%lx]", *_next);
  505. return 0;
  506. }
  507. /*
  508. * write some of the pending data back to the server
  509. */
  510. int afs_writepages(struct address_space *mapping,
  511. struct writeback_control *wbc)
  512. {
  513. struct backing_dev_info *bdi = mapping->backing_dev_info;
  514. pgoff_t start, end, next;
  515. int ret;
  516. _enter("");
  517. if (wbc->nonblocking && bdi_write_congested(bdi)) {
  518. wbc->encountered_congestion = 1;
  519. _leave(" = 0 [congest]");
  520. return 0;
  521. }
  522. if (wbc->range_cyclic) {
  523. start = mapping->writeback_index;
  524. end = -1;
  525. ret = afs_writepages_region(mapping, wbc, start, end, &next);
  526. if (start > 0 && wbc->nr_to_write > 0 && ret == 0 &&
  527. !(wbc->nonblocking && wbc->encountered_congestion))
  528. ret = afs_writepages_region(mapping, wbc, 0, start,
  529. &next);
  530. mapping->writeback_index = next;
  531. } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
  532. end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
  533. ret = afs_writepages_region(mapping, wbc, 0, end, &next);
  534. if (wbc->nr_to_write > 0)
  535. mapping->writeback_index = next;
  536. } else {
  537. start = wbc->range_start >> PAGE_CACHE_SHIFT;
  538. end = wbc->range_end >> PAGE_CACHE_SHIFT;
  539. ret = afs_writepages_region(mapping, wbc, start, end, &next);
  540. }
  541. _leave(" = %d", ret);
  542. return ret;
  543. }
  544. /*
  545. * write an inode back
  546. */
  547. int afs_write_inode(struct inode *inode, int sync)
  548. {
  549. struct afs_vnode *vnode = AFS_FS_I(inode);
  550. int ret;
  551. _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
  552. ret = 0;
  553. if (sync) {
  554. ret = filemap_fdatawait(inode->i_mapping);
  555. if (ret < 0)
  556. __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
  557. }
  558. _leave(" = %d", ret);
  559. return ret;
  560. }
  561. /*
  562. * completion of write to server
  563. */
  564. void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
  565. {
  566. struct afs_writeback *wb = call->wb;
  567. struct pagevec pv;
  568. unsigned count, loop;
  569. pgoff_t first = call->first, last = call->last;
  570. bool free_wb;
  571. _enter("{%x:%u},{%lx-%lx}",
  572. vnode->fid.vid, vnode->fid.vnode, first, last);
  573. ASSERT(wb != NULL);
  574. pagevec_init(&pv, 0);
  575. do {
  576. _debug("done %lx-%lx", first, last);
  577. count = last - first + 1;
  578. if (count > PAGEVEC_SIZE)
  579. count = PAGEVEC_SIZE;
  580. pv.nr = find_get_pages_contig(call->mapping, first, count,
  581. pv.pages);
  582. ASSERTCMP(pv.nr, ==, count);
  583. spin_lock(&vnode->writeback_lock);
  584. for (loop = 0; loop < count; loop++) {
  585. struct page *page = pv.pages[loop];
  586. end_page_writeback(page);
  587. if (page_private(page) == (unsigned long) wb) {
  588. set_page_private(page, 0);
  589. ClearPagePrivate(page);
  590. wb->usage--;
  591. }
  592. }
  593. free_wb = false;
  594. if (wb->usage == 0) {
  595. afs_unlink_writeback(wb);
  596. free_wb = true;
  597. }
  598. spin_unlock(&vnode->writeback_lock);
  599. first += count;
  600. if (free_wb) {
  601. afs_free_writeback(wb);
  602. wb = NULL;
  603. }
  604. __pagevec_release(&pv);
  605. } while (first <= last);
  606. _leave("");
  607. }
  608. /*
  609. * write to an AFS file
  610. */
  611. ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
  612. unsigned long nr_segs, loff_t pos)
  613. {
  614. struct dentry *dentry = iocb->ki_filp->f_path.dentry;
  615. struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
  616. ssize_t result;
  617. size_t count = iov_length(iov, nr_segs);
  618. int ret;
  619. _enter("{%x.%u},{%zu},%lu,",
  620. vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
  621. if (IS_SWAPFILE(&vnode->vfs_inode)) {
  622. printk(KERN_INFO
  623. "AFS: Attempt to write to active swap file!\n");
  624. return -EBUSY;
  625. }
  626. if (!count)
  627. return 0;
  628. result = generic_file_aio_write(iocb, iov, nr_segs, pos);
  629. if (IS_ERR_VALUE(result)) {
  630. _leave(" = %zd", result);
  631. return result;
  632. }
  633. /* return error values for O_SYNC and IS_SYNC() */
  634. if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) {
  635. ret = afs_fsync(iocb->ki_filp, dentry, 1);
  636. if (ret < 0)
  637. result = ret;
  638. }
  639. _leave(" = %zd", result);
  640. return result;
  641. }
  642. /*
  643. * flush the vnode to the fileserver
  644. */
  645. int afs_writeback_all(struct afs_vnode *vnode)
  646. {
  647. struct address_space *mapping = vnode->vfs_inode.i_mapping;
  648. struct writeback_control wbc = {
  649. .bdi = mapping->backing_dev_info,
  650. .sync_mode = WB_SYNC_ALL,
  651. .nr_to_write = LONG_MAX,
  652. .for_writepages = 1,
  653. .range_cyclic = 1,
  654. };
  655. int ret;
  656. _enter("");
  657. ret = mapping->a_ops->writepages(mapping, &wbc);
  658. __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
  659. _leave(" = %d", ret);
  660. return ret;
  661. }
  662. /*
  663. * flush any dirty pages for this process, and check for write errors.
  664. * - the return status from this call provides a reliable indication of
  665. * whether any write errors occurred for this process.
  666. */
  667. int afs_fsync(struct file *file, struct dentry *dentry, int datasync)
  668. {
  669. struct afs_writeback *wb, *xwb;
  670. struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
  671. int ret;
  672. _enter("{%x:%u},{n=%s},%d",
  673. vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
  674. datasync);
  675. /* use a writeback record as a marker in the queue - when this reaches
  676. * the front of the queue, all the outstanding writes are either
  677. * completed or rejected */
  678. wb = kzalloc(sizeof(*wb), GFP_KERNEL);
  679. if (!wb)
  680. return -ENOMEM;
  681. wb->vnode = vnode;
  682. wb->first = 0;
  683. wb->last = -1;
  684. wb->offset_first = 0;
  685. wb->to_last = PAGE_SIZE;
  686. wb->usage = 1;
  687. wb->state = AFS_WBACK_SYNCING;
  688. init_waitqueue_head(&wb->waitq);
  689. spin_lock(&vnode->writeback_lock);
  690. list_for_each_entry(xwb, &vnode->writebacks, link) {
  691. if (xwb->state == AFS_WBACK_PENDING)
  692. xwb->state = AFS_WBACK_CONFLICTING;
  693. }
  694. list_add_tail(&wb->link, &vnode->writebacks);
  695. spin_unlock(&vnode->writeback_lock);
  696. /* push all the outstanding writebacks to the server */
  697. ret = afs_writeback_all(vnode);
  698. if (ret < 0) {
  699. afs_put_writeback(wb);
  700. _leave(" = %d [wb]", ret);
  701. return ret;
  702. }
  703. /* wait for the preceding writes to actually complete */
  704. ret = wait_event_interruptible(wb->waitq,
  705. wb->state == AFS_WBACK_COMPLETE ||
  706. vnode->writebacks.next == &wb->link);
  707. afs_put_writeback(wb);
  708. _leave(" = %d", ret);
  709. return ret;
  710. }