write.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827
  1. /* handling of writes to regular files and writing back to the server
  2. *
  3. * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/backing-dev.h>
  12. #include <linux/slab.h>
  13. #include <linux/fs.h>
  14. #include <linux/pagemap.h>
  15. #include <linux/writeback.h>
  16. #include <linux/pagevec.h>
  17. #include "internal.h"
  18. static int afs_write_back_from_locked_page(struct afs_writeback *wb,
  19. struct page *page);
  20. /*
  21. * mark a page as having been made dirty and thus needing writeback
  22. */
  23. int afs_set_page_dirty(struct page *page)
  24. {
  25. _enter("");
  26. return __set_page_dirty_nobuffers(page);
  27. }
  28. /*
  29. * unlink a writeback record because its usage has reached zero
  30. * - must be called with the wb->vnode->writeback_lock held
  31. */
  32. static void afs_unlink_writeback(struct afs_writeback *wb)
  33. {
  34. struct afs_writeback *front;
  35. struct afs_vnode *vnode = wb->vnode;
  36. list_del_init(&wb->link);
  37. if (!list_empty(&vnode->writebacks)) {
  38. /* if an fsync rises to the front of the queue then wake it
  39. * up */
  40. front = list_entry(vnode->writebacks.next,
  41. struct afs_writeback, link);
  42. if (front->state == AFS_WBACK_SYNCING) {
  43. _debug("wake up sync");
  44. front->state = AFS_WBACK_COMPLETE;
  45. wake_up(&front->waitq);
  46. }
  47. }
  48. }
  49. /*
  50. * free a writeback record
  51. */
  52. static void afs_free_writeback(struct afs_writeback *wb)
  53. {
  54. _enter("");
  55. key_put(wb->key);
  56. kfree(wb);
  57. }
  58. /*
  59. * dispose of a reference to a writeback record
  60. */
  61. void afs_put_writeback(struct afs_writeback *wb)
  62. {
  63. struct afs_vnode *vnode = wb->vnode;
  64. _enter("{%d}", wb->usage);
  65. spin_lock(&vnode->writeback_lock);
  66. if (--wb->usage == 0)
  67. afs_unlink_writeback(wb);
  68. else
  69. wb = NULL;
  70. spin_unlock(&vnode->writeback_lock);
  71. if (wb)
  72. afs_free_writeback(wb);
  73. }
  74. /*
  75. * partly or wholly fill a page that's under preparation for writing
  76. */
  77. static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
  78. unsigned start, unsigned len, struct page *page)
  79. {
  80. int ret;
  81. _enter(",,%u,%u", start, len);
  82. ASSERTCMP(start + len, <=, PAGE_SIZE);
  83. ret = afs_vnode_fetch_data(vnode, key, start, len, page);
  84. if (ret < 0) {
  85. if (ret == -ENOENT) {
  86. _debug("got NOENT from server"
  87. " - marking file deleted and stale");
  88. set_bit(AFS_VNODE_DELETED, &vnode->flags);
  89. ret = -ESTALE;
  90. }
  91. }
  92. _leave(" = %d", ret);
  93. return ret;
  94. }
  95. /*
  96. * prepare a page for being written to
  97. */
  98. static int afs_prepare_page(struct afs_vnode *vnode, struct page *page,
  99. struct key *key, unsigned offset, unsigned to)
  100. {
  101. unsigned eof, tail, start, stop, len;
  102. loff_t i_size, pos;
  103. void *p;
  104. int ret;
  105. _enter("");
  106. if (offset == 0 && to == PAGE_SIZE)
  107. return 0;
  108. p = kmap_atomic(page, KM_USER0);
  109. i_size = i_size_read(&vnode->vfs_inode);
  110. pos = (loff_t) page->index << PAGE_SHIFT;
  111. if (pos >= i_size) {
  112. /* partial write, page beyond EOF */
  113. _debug("beyond");
  114. if (offset > 0)
  115. memset(p, 0, offset);
  116. if (to < PAGE_SIZE)
  117. memset(p + to, 0, PAGE_SIZE - to);
  118. kunmap_atomic(p, KM_USER0);
  119. return 0;
  120. }
  121. if (i_size - pos >= PAGE_SIZE) {
  122. /* partial write, page entirely before EOF */
  123. _debug("before");
  124. tail = eof = PAGE_SIZE;
  125. } else {
  126. /* partial write, page overlaps EOF */
  127. eof = i_size - pos;
  128. _debug("overlap %u", eof);
  129. tail = max(eof, to);
  130. if (tail < PAGE_SIZE)
  131. memset(p + tail, 0, PAGE_SIZE - tail);
  132. if (offset > eof)
  133. memset(p + eof, 0, PAGE_SIZE - eof);
  134. }
  135. kunmap_atomic(p, KM_USER0);
  136. ret = 0;
  137. if (offset > 0 || eof > to) {
  138. /* need to fill one or two bits that aren't going to be written
  139. * (cover both fillers in one read if there are two) */
  140. start = (offset > 0) ? 0 : to;
  141. stop = (eof > to) ? eof : offset;
  142. len = stop - start;
  143. _debug("wr=%u-%u av=0-%u rd=%u@%u",
  144. offset, to, eof, start, len);
  145. ret = afs_fill_page(vnode, key, start, len, page);
  146. }
  147. _leave(" = %d", ret);
  148. return ret;
  149. }
  150. /*
  151. * prepare to perform part of a write to a page
  152. * - the caller holds the page locked, preventing it from being written out or
  153. * modified by anyone else
  154. */
  155. int afs_prepare_write(struct file *file, struct page *page,
  156. unsigned offset, unsigned to)
  157. {
  158. struct afs_writeback *candidate, *wb;
  159. struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
  160. struct key *key = file->private_data;
  161. pgoff_t index;
  162. int ret;
  163. _enter("{%x:%u},{%lx},%u,%u",
  164. vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
  165. candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
  166. if (!candidate)
  167. return -ENOMEM;
  168. candidate->vnode = vnode;
  169. candidate->first = candidate->last = page->index;
  170. candidate->offset_first = offset;
  171. candidate->to_last = to;
  172. candidate->usage = 1;
  173. candidate->state = AFS_WBACK_PENDING;
  174. init_waitqueue_head(&candidate->waitq);
  175. if (!PageUptodate(page)) {
  176. _debug("not up to date");
  177. ret = afs_prepare_page(vnode, page, key, offset, to);
  178. if (ret < 0) {
  179. kfree(candidate);
  180. _leave(" = %d [prep]", ret);
  181. return ret;
  182. }
  183. }
  184. try_again:
  185. index = page->index;
  186. spin_lock(&vnode->writeback_lock);
  187. /* see if this page is already pending a writeback under a suitable key
  188. * - if so we can just join onto that one */
  189. wb = (struct afs_writeback *) page_private(page);
  190. if (wb) {
  191. if (wb->key == key && wb->state == AFS_WBACK_PENDING)
  192. goto subsume_in_current_wb;
  193. goto flush_conflicting_wb;
  194. }
  195. if (index > 0) {
  196. /* see if we can find an already pending writeback that we can
  197. * append this page to */
  198. list_for_each_entry(wb, &vnode->writebacks, link) {
  199. if (wb->last == index - 1 && wb->key == key &&
  200. wb->state == AFS_WBACK_PENDING)
  201. goto append_to_previous_wb;
  202. }
  203. }
  204. list_add_tail(&candidate->link, &vnode->writebacks);
  205. candidate->key = key_get(key);
  206. spin_unlock(&vnode->writeback_lock);
  207. SetPagePrivate(page);
  208. set_page_private(page, (unsigned long) candidate);
  209. _leave(" = 0 [new]");
  210. return 0;
  211. subsume_in_current_wb:
  212. _debug("subsume");
  213. ASSERTRANGE(wb->first, <=, index, <=, wb->last);
  214. if (index == wb->first && offset < wb->offset_first)
  215. wb->offset_first = offset;
  216. if (index == wb->last && to > wb->to_last)
  217. wb->to_last = to;
  218. spin_unlock(&vnode->writeback_lock);
  219. kfree(candidate);
  220. _leave(" = 0 [sub]");
  221. return 0;
  222. append_to_previous_wb:
  223. _debug("append into %lx-%lx", wb->first, wb->last);
  224. wb->usage++;
  225. wb->last++;
  226. wb->to_last = to;
  227. spin_unlock(&vnode->writeback_lock);
  228. SetPagePrivate(page);
  229. set_page_private(page, (unsigned long) wb);
  230. kfree(candidate);
  231. _leave(" = 0 [app]");
  232. return 0;
  233. /* the page is currently bound to another context, so if it's dirty we
  234. * need to flush it before we can use the new context */
  235. flush_conflicting_wb:
  236. _debug("flush conflict");
  237. if (wb->state == AFS_WBACK_PENDING)
  238. wb->state = AFS_WBACK_CONFLICTING;
  239. spin_unlock(&vnode->writeback_lock);
  240. if (PageDirty(page)) {
  241. ret = afs_write_back_from_locked_page(wb, page);
  242. if (ret < 0) {
  243. afs_put_writeback(candidate);
  244. _leave(" = %d", ret);
  245. return ret;
  246. }
  247. }
  248. /* the page holds a ref on the writeback record */
  249. afs_put_writeback(wb);
  250. set_page_private(page, 0);
  251. ClearPagePrivate(page);
  252. goto try_again;
  253. }
  254. /*
  255. * finalise part of a write to a page
  256. */
  257. int afs_commit_write(struct file *file, struct page *page,
  258. unsigned offset, unsigned to)
  259. {
  260. struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
  261. loff_t i_size, maybe_i_size;
  262. _enter("{%x:%u},{%lx},%u,%u",
  263. vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
  264. maybe_i_size = (loff_t) page->index << PAGE_SHIFT;
  265. maybe_i_size += to;
  266. i_size = i_size_read(&vnode->vfs_inode);
  267. if (maybe_i_size > i_size) {
  268. spin_lock(&vnode->writeback_lock);
  269. i_size = i_size_read(&vnode->vfs_inode);
  270. if (maybe_i_size > i_size)
  271. i_size_write(&vnode->vfs_inode, maybe_i_size);
  272. spin_unlock(&vnode->writeback_lock);
  273. }
  274. SetPageUptodate(page);
  275. set_page_dirty(page);
  276. if (PageDirty(page))
  277. _debug("dirtied");
  278. return 0;
  279. }
  280. /*
  281. * kill all the pages in the given range
  282. */
  283. static void afs_kill_pages(struct afs_vnode *vnode, bool error,
  284. pgoff_t first, pgoff_t last)
  285. {
  286. struct pagevec pv;
  287. unsigned count, loop;
  288. _enter("{%x:%u},%lx-%lx",
  289. vnode->fid.vid, vnode->fid.vnode, first, last);
  290. pagevec_init(&pv, 0);
  291. do {
  292. _debug("kill %lx-%lx", first, last);
  293. count = last - first + 1;
  294. if (count > PAGEVEC_SIZE)
  295. count = PAGEVEC_SIZE;
  296. pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
  297. first, count, pv.pages);
  298. ASSERTCMP(pv.nr, ==, count);
  299. for (loop = 0; loop < count; loop++) {
  300. ClearPageUptodate(pv.pages[loop]);
  301. if (error)
  302. SetPageError(pv.pages[loop]);
  303. end_page_writeback(pv.pages[loop]);
  304. }
  305. __pagevec_release(&pv);
  306. } while (first < last);
  307. _leave("");
  308. }
  309. /*
  310. * synchronously write back the locked page and any subsequent non-locked dirty
  311. * pages also covered by the same writeback record
  312. */
  313. static int afs_write_back_from_locked_page(struct afs_writeback *wb,
  314. struct page *primary_page)
  315. {
  316. struct page *pages[8], *page;
  317. unsigned long count;
  318. unsigned n, offset, to;
  319. pgoff_t start, first, last;
  320. int loop, ret;
  321. _enter(",%lx", primary_page->index);
  322. count = 1;
  323. if (!clear_page_dirty_for_io(primary_page))
  324. BUG();
  325. if (test_set_page_writeback(primary_page))
  326. BUG();
  327. /* find all consecutive lockable dirty pages, stopping when we find a
  328. * page that is not immediately lockable, is not dirty or is missing,
  329. * or we reach the end of the range */
  330. start = primary_page->index;
  331. if (start >= wb->last)
  332. goto no_more;
  333. start++;
  334. do {
  335. _debug("more %lx [%lx]", start, count);
  336. n = wb->last - start + 1;
  337. if (n > ARRAY_SIZE(pages))
  338. n = ARRAY_SIZE(pages);
  339. n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
  340. start, n, pages);
  341. _debug("fgpc %u", n);
  342. if (n == 0)
  343. goto no_more;
  344. if (pages[0]->index != start) {
  345. do {
  346. put_page(pages[--n]);
  347. } while (n > 0);
  348. goto no_more;
  349. }
  350. for (loop = 0; loop < n; loop++) {
  351. page = pages[loop];
  352. if (page->index > wb->last)
  353. break;
  354. if (TestSetPageLocked(page))
  355. break;
  356. if (!PageDirty(page) ||
  357. page_private(page) != (unsigned long) wb) {
  358. unlock_page(page);
  359. break;
  360. }
  361. if (!clear_page_dirty_for_io(page))
  362. BUG();
  363. if (test_set_page_writeback(page))
  364. BUG();
  365. unlock_page(page);
  366. put_page(page);
  367. }
  368. count += loop;
  369. if (loop < n) {
  370. for (; loop < n; loop++)
  371. put_page(pages[loop]);
  372. goto no_more;
  373. }
  374. start += loop;
  375. } while (start <= wb->last && count < 65536);
  376. no_more:
  377. /* we now have a contiguous set of dirty pages, each with writeback set
  378. * and the dirty mark cleared; the first page is locked and must remain
  379. * so, all the rest are unlocked */
  380. first = primary_page->index;
  381. last = first + count - 1;
  382. offset = (first == wb->first) ? wb->offset_first : 0;
  383. to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
  384. _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
  385. ret = afs_vnode_store_data(wb, first, last, offset, to);
  386. if (ret < 0) {
  387. switch (ret) {
  388. case -EDQUOT:
  389. case -ENOSPC:
  390. set_bit(AS_ENOSPC,
  391. &wb->vnode->vfs_inode.i_mapping->flags);
  392. break;
  393. case -EROFS:
  394. case -EIO:
  395. case -EREMOTEIO:
  396. case -EFBIG:
  397. case -ENOENT:
  398. case -ENOMEDIUM:
  399. case -ENXIO:
  400. afs_kill_pages(wb->vnode, true, first, last);
  401. set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags);
  402. break;
  403. case -EACCES:
  404. case -EPERM:
  405. case -ENOKEY:
  406. case -EKEYEXPIRED:
  407. case -EKEYREJECTED:
  408. case -EKEYREVOKED:
  409. afs_kill_pages(wb->vnode, false, first, last);
  410. break;
  411. default:
  412. break;
  413. }
  414. } else {
  415. ret = count;
  416. }
  417. _leave(" = %d", ret);
  418. return ret;
  419. }
  420. /*
  421. * write a page back to the server
  422. * - the caller locked the page for us
  423. */
  424. int afs_writepage(struct page *page, struct writeback_control *wbc)
  425. {
  426. struct backing_dev_info *bdi = page->mapping->backing_dev_info;
  427. struct afs_writeback *wb;
  428. int ret;
  429. _enter("{%lx},", page->index);
  430. wb = (struct afs_writeback *) page_private(page);
  431. ASSERT(wb != NULL);
  432. ret = afs_write_back_from_locked_page(wb, page);
  433. unlock_page(page);
  434. if (ret < 0) {
  435. _leave(" = %d", ret);
  436. return 0;
  437. }
  438. wbc->nr_to_write -= ret;
  439. if (wbc->nonblocking && bdi_write_congested(bdi))
  440. wbc->encountered_congestion = 1;
  441. _leave(" = 0");
  442. return 0;
  443. }
  444. /*
  445. * write a region of pages back to the server
  446. */
  447. static int afs_writepages_region(struct address_space *mapping,
  448. struct writeback_control *wbc,
  449. pgoff_t index, pgoff_t end, pgoff_t *_next)
  450. {
  451. struct backing_dev_info *bdi = mapping->backing_dev_info;
  452. struct afs_writeback *wb;
  453. struct page *page;
  454. int ret, n;
  455. _enter(",,%lx,%lx,", index, end);
  456. do {
  457. n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
  458. 1, &page);
  459. if (!n)
  460. break;
  461. _debug("wback %lx", page->index);
  462. if (page->index > end) {
  463. *_next = index;
  464. page_cache_release(page);
  465. _leave(" = 0 [%lx]", *_next);
  466. return 0;
  467. }
  468. /* at this point we hold neither mapping->tree_lock nor lock on
  469. * the page itself: the page may be truncated or invalidated
  470. * (changing page->mapping to NULL), or even swizzled back from
  471. * swapper_space to tmpfs file mapping
  472. */
  473. lock_page(page);
  474. if (page->mapping != mapping) {
  475. unlock_page(page);
  476. page_cache_release(page);
  477. continue;
  478. }
  479. if (wbc->sync_mode != WB_SYNC_NONE)
  480. wait_on_page_writeback(page);
  481. if (PageWriteback(page) || !PageDirty(page)) {
  482. unlock_page(page);
  483. continue;
  484. }
  485. wb = (struct afs_writeback *) page_private(page);
  486. ASSERT(wb != NULL);
  487. spin_lock(&wb->vnode->writeback_lock);
  488. wb->state = AFS_WBACK_WRITING;
  489. spin_unlock(&wb->vnode->writeback_lock);
  490. ret = afs_write_back_from_locked_page(wb, page);
  491. unlock_page(page);
  492. page_cache_release(page);
  493. if (ret < 0) {
  494. _leave(" = %d", ret);
  495. return ret;
  496. }
  497. wbc->nr_to_write -= ret;
  498. if (wbc->nonblocking && bdi_write_congested(bdi)) {
  499. wbc->encountered_congestion = 1;
  500. break;
  501. }
  502. cond_resched();
  503. } while (index < end && wbc->nr_to_write > 0);
  504. *_next = index;
  505. _leave(" = 0 [%lx]", *_next);
  506. return 0;
  507. }
  508. /*
  509. * write some of the pending data back to the server
  510. */
  511. int afs_writepages(struct address_space *mapping,
  512. struct writeback_control *wbc)
  513. {
  514. struct backing_dev_info *bdi = mapping->backing_dev_info;
  515. pgoff_t start, end, next;
  516. int ret;
  517. _enter("");
  518. if (wbc->nonblocking && bdi_write_congested(bdi)) {
  519. wbc->encountered_congestion = 1;
  520. _leave(" = 0 [congest]");
  521. return 0;
  522. }
  523. if (wbc->range_cyclic) {
  524. start = mapping->writeback_index;
  525. end = -1;
  526. ret = afs_writepages_region(mapping, wbc, start, end, &next);
  527. if (start > 0 && wbc->nr_to_write > 0 && ret == 0 &&
  528. !(wbc->nonblocking && wbc->encountered_congestion))
  529. ret = afs_writepages_region(mapping, wbc, 0, start,
  530. &next);
  531. mapping->writeback_index = next;
  532. } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
  533. end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
  534. ret = afs_writepages_region(mapping, wbc, 0, end, &next);
  535. if (wbc->nr_to_write > 0)
  536. mapping->writeback_index = next;
  537. } else {
  538. start = wbc->range_start >> PAGE_CACHE_SHIFT;
  539. end = wbc->range_end >> PAGE_CACHE_SHIFT;
  540. ret = afs_writepages_region(mapping, wbc, start, end, &next);
  541. }
  542. _leave(" = %d", ret);
  543. return ret;
  544. }
  545. /*
  546. * write an inode back
  547. */
  548. int afs_write_inode(struct inode *inode, int sync)
  549. {
  550. struct afs_vnode *vnode = AFS_FS_I(inode);
  551. int ret;
  552. _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
  553. ret = 0;
  554. if (sync) {
  555. ret = filemap_fdatawait(inode->i_mapping);
  556. if (ret < 0)
  557. __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
  558. }
  559. _leave(" = %d", ret);
  560. return ret;
  561. }
  562. /*
  563. * completion of write to server
  564. */
  565. void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
  566. {
  567. struct afs_writeback *wb = call->wb;
  568. struct pagevec pv;
  569. unsigned count, loop;
  570. pgoff_t first = call->first, last = call->last;
  571. bool free_wb;
  572. _enter("{%x:%u},{%lx-%lx}",
  573. vnode->fid.vid, vnode->fid.vnode, first, last);
  574. ASSERT(wb != NULL);
  575. pagevec_init(&pv, 0);
  576. do {
  577. _debug("done %lx-%lx", first, last);
  578. count = last - first + 1;
  579. if (count > PAGEVEC_SIZE)
  580. count = PAGEVEC_SIZE;
  581. pv.nr = find_get_pages_contig(call->mapping, first, count,
  582. pv.pages);
  583. ASSERTCMP(pv.nr, ==, count);
  584. spin_lock(&vnode->writeback_lock);
  585. for (loop = 0; loop < count; loop++) {
  586. struct page *page = pv.pages[loop];
  587. end_page_writeback(page);
  588. if (page_private(page) == (unsigned long) wb) {
  589. set_page_private(page, 0);
  590. ClearPagePrivate(page);
  591. wb->usage--;
  592. }
  593. }
  594. free_wb = false;
  595. if (wb->usage == 0) {
  596. afs_unlink_writeback(wb);
  597. free_wb = true;
  598. }
  599. spin_unlock(&vnode->writeback_lock);
  600. first += count;
  601. if (free_wb) {
  602. afs_free_writeback(wb);
  603. wb = NULL;
  604. }
  605. __pagevec_release(&pv);
  606. } while (first <= last);
  607. _leave("");
  608. }
  609. /*
  610. * write to an AFS file
  611. */
  612. ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
  613. unsigned long nr_segs, loff_t pos)
  614. {
  615. struct dentry *dentry = iocb->ki_filp->f_path.dentry;
  616. struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
  617. ssize_t result;
  618. size_t count = iov_length(iov, nr_segs);
  619. int ret;
  620. _enter("{%x.%u},{%zu},%lu,",
  621. vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
  622. if (IS_SWAPFILE(&vnode->vfs_inode)) {
  623. printk(KERN_INFO
  624. "AFS: Attempt to write to active swap file!\n");
  625. return -EBUSY;
  626. }
  627. if (!count)
  628. return 0;
  629. result = generic_file_aio_write(iocb, iov, nr_segs, pos);
  630. if (IS_ERR_VALUE(result)) {
  631. _leave(" = %zd", result);
  632. return result;
  633. }
  634. /* return error values for O_SYNC and IS_SYNC() */
  635. if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) {
  636. ret = afs_fsync(iocb->ki_filp, dentry, 1);
  637. if (ret < 0)
  638. result = ret;
  639. }
  640. _leave(" = %zd", result);
  641. return result;
  642. }
  643. /*
  644. * flush the vnode to the fileserver
  645. */
  646. int afs_writeback_all(struct afs_vnode *vnode)
  647. {
  648. struct address_space *mapping = vnode->vfs_inode.i_mapping;
  649. struct writeback_control wbc = {
  650. .bdi = mapping->backing_dev_info,
  651. .sync_mode = WB_SYNC_ALL,
  652. .nr_to_write = LONG_MAX,
  653. .for_writepages = 1,
  654. .range_cyclic = 1,
  655. };
  656. int ret;
  657. _enter("");
  658. ret = mapping->a_ops->writepages(mapping, &wbc);
  659. __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
  660. _leave(" = %d", ret);
  661. return ret;
  662. }
  663. /*
  664. * flush any dirty pages for this process, and check for write errors.
  665. * - the return status from this call provides a reliable indication of
  666. * whether any write errors occurred for this process.
  667. */
  668. int afs_fsync(struct file *file, struct dentry *dentry, int datasync)
  669. {
  670. struct afs_writeback *wb, *xwb;
  671. struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
  672. int ret;
  673. _enter("{%x:%u},{n=%s},%d",
  674. vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
  675. datasync);
  676. /* use a writeback record as a marker in the queue - when this reaches
  677. * the front of the queue, all the outstanding writes are either
  678. * completed or rejected */
  679. wb = kzalloc(sizeof(*wb), GFP_KERNEL);
  680. if (!wb)
  681. return -ENOMEM;
  682. wb->vnode = vnode;
  683. wb->first = 0;
  684. wb->last = -1;
  685. wb->offset_first = 0;
  686. wb->to_last = PAGE_SIZE;
  687. wb->usage = 1;
  688. wb->state = AFS_WBACK_SYNCING;
  689. init_waitqueue_head(&wb->waitq);
  690. spin_lock(&vnode->writeback_lock);
  691. list_for_each_entry(xwb, &vnode->writebacks, link) {
  692. if (xwb->state == AFS_WBACK_PENDING)
  693. xwb->state = AFS_WBACK_CONFLICTING;
  694. }
  695. list_add_tail(&wb->link, &vnode->writebacks);
  696. spin_unlock(&vnode->writeback_lock);
  697. /* push all the outstanding writebacks to the server */
  698. ret = afs_writeback_all(vnode);
  699. if (ret < 0) {
  700. afs_put_writeback(wb);
  701. _leave(" = %d [wb]", ret);
  702. return ret;
  703. }
  704. /* wait for the preceding writes to actually complete */
  705. ret = wait_event_interruptible(wb->waitq,
  706. wb->state == AFS_WBACK_COMPLETE ||
  707. vnode->writebacks.next == &wb->link);
  708. afs_put_writeback(wb);
  709. _leave(" = %d", ret);
  710. return ret;
  711. }