rdwr.c 25 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004
  1. /* Storage object read/write
  2. *
  3. * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public Licence
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the Licence, or (at your option) any later version.
  10. */
  11. #include <linux/mount.h>
  12. #include <linux/slab.h>
  13. #include <linux/file.h>
  14. #include "internal.h"
  15. /*
  16. * detect wake up events generated by the unlocking of pages in which we're
  17. * interested
  18. * - we use this to detect read completion of backing pages
  19. * - the caller holds the waitqueue lock
  20. */
  21. static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
  22. int sync, void *_key)
  23. {
  24. struct cachefiles_one_read *monitor =
  25. container_of(wait, struct cachefiles_one_read, monitor);
  26. struct cachefiles_object *object;
  27. struct wait_bit_key *key = _key;
  28. struct page *page = wait->private;
  29. ASSERT(key);
  30. _enter("{%lu},%u,%d,{%p,%u}",
  31. monitor->netfs_page->index, mode, sync,
  32. key->flags, key->bit_nr);
  33. if (key->flags != &page->flags ||
  34. key->bit_nr != PG_locked)
  35. return 0;
  36. _debug("--- monitor %p %lx ---", page, page->flags);
  37. if (!PageUptodate(page) && !PageError(page)) {
  38. /* unlocked, not uptodate and not erronous? */
  39. _debug("page probably truncated");
  40. }
  41. /* remove from the waitqueue */
  42. list_del(&wait->task_list);
  43. /* move onto the action list and queue for FS-Cache thread pool */
  44. ASSERT(monitor->op);
  45. object = container_of(monitor->op->op.object,
  46. struct cachefiles_object, fscache);
  47. spin_lock(&object->work_lock);
  48. list_add_tail(&monitor->op_link, &monitor->op->to_do);
  49. spin_unlock(&object->work_lock);
  50. fscache_enqueue_retrieval(monitor->op);
  51. return 0;
  52. }
  53. /*
  54. * handle a probably truncated page
  55. * - check to see if the page is still relevant and reissue the read if
  56. * possible
  57. * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we
  58. * must wait again and 0 if successful
  59. */
  60. static int cachefiles_read_reissue(struct cachefiles_object *object,
  61. struct cachefiles_one_read *monitor)
  62. {
  63. struct address_space *bmapping = object->backer->d_inode->i_mapping;
  64. struct page *backpage = monitor->back_page, *backpage2;
  65. int ret;
  66. _enter("{ino=%lx},{%lx,%lx}",
  67. object->backer->d_inode->i_ino,
  68. backpage->index, backpage->flags);
  69. /* skip if the page was truncated away completely */
  70. if (backpage->mapping != bmapping) {
  71. _leave(" = -ENODATA [mapping]");
  72. return -ENODATA;
  73. }
  74. backpage2 = find_get_page(bmapping, backpage->index);
  75. if (!backpage2) {
  76. _leave(" = -ENODATA [gone]");
  77. return -ENODATA;
  78. }
  79. if (backpage != backpage2) {
  80. put_page(backpage2);
  81. _leave(" = -ENODATA [different]");
  82. return -ENODATA;
  83. }
  84. /* the page is still there and we already have a ref on it, so we don't
  85. * need a second */
  86. put_page(backpage2);
  87. INIT_LIST_HEAD(&monitor->op_link);
  88. add_page_wait_queue(backpage, &monitor->monitor);
  89. if (trylock_page(backpage)) {
  90. ret = -EIO;
  91. if (PageError(backpage))
  92. goto unlock_discard;
  93. ret = 0;
  94. if (PageUptodate(backpage))
  95. goto unlock_discard;
  96. _debug("reissue read");
  97. ret = bmapping->a_ops->readpage(NULL, backpage);
  98. if (ret < 0)
  99. goto unlock_discard;
  100. }
  101. /* but the page may have been read before the monitor was installed, so
  102. * the monitor may miss the event - so we have to ensure that we do get
  103. * one in such a case */
  104. if (trylock_page(backpage)) {
  105. _debug("jumpstart %p {%lx}", backpage, backpage->flags);
  106. unlock_page(backpage);
  107. }
  108. /* it'll reappear on the todo list */
  109. _leave(" = -EINPROGRESS");
  110. return -EINPROGRESS;
  111. unlock_discard:
  112. unlock_page(backpage);
  113. spin_lock_irq(&object->work_lock);
  114. list_del(&monitor->op_link);
  115. spin_unlock_irq(&object->work_lock);
  116. _leave(" = %d", ret);
  117. return ret;
  118. }
  119. /*
  120. * copy data from backing pages to netfs pages to complete a read operation
  121. * - driven by FS-Cache's thread pool
  122. */
  123. static void cachefiles_read_copier(struct fscache_operation *_op)
  124. {
  125. struct cachefiles_one_read *monitor;
  126. struct cachefiles_object *object;
  127. struct fscache_retrieval *op;
  128. struct pagevec pagevec;
  129. int error, max;
  130. op = container_of(_op, struct fscache_retrieval, op);
  131. object = container_of(op->op.object,
  132. struct cachefiles_object, fscache);
  133. _enter("{ino=%lu}", object->backer->d_inode->i_ino);
  134. pagevec_init(&pagevec, 0);
  135. max = 8;
  136. spin_lock_irq(&object->work_lock);
  137. while (!list_empty(&op->to_do)) {
  138. monitor = list_entry(op->to_do.next,
  139. struct cachefiles_one_read, op_link);
  140. list_del(&monitor->op_link);
  141. spin_unlock_irq(&object->work_lock);
  142. _debug("- copy {%lu}", monitor->back_page->index);
  143. recheck:
  144. if (test_bit(FSCACHE_COOKIE_INVALIDATING,
  145. &object->fscache.cookie->flags)) {
  146. error = -ESTALE;
  147. } else if (PageUptodate(monitor->back_page)) {
  148. copy_highpage(monitor->netfs_page, monitor->back_page);
  149. fscache_mark_page_cached(monitor->op,
  150. monitor->netfs_page);
  151. error = 0;
  152. } else if (!PageError(monitor->back_page)) {
  153. /* the page has probably been truncated */
  154. error = cachefiles_read_reissue(object, monitor);
  155. if (error == -EINPROGRESS)
  156. goto next;
  157. goto recheck;
  158. } else {
  159. cachefiles_io_error_obj(
  160. object,
  161. "Readpage failed on backing file %lx",
  162. (unsigned long) monitor->back_page->flags);
  163. error = -EIO;
  164. }
  165. page_cache_release(monitor->back_page);
  166. fscache_end_io(op, monitor->netfs_page, error);
  167. page_cache_release(monitor->netfs_page);
  168. fscache_retrieval_complete(op, 1);
  169. fscache_put_retrieval(op);
  170. kfree(monitor);
  171. next:
  172. /* let the thread pool have some air occasionally */
  173. max--;
  174. if (max < 0 || need_resched()) {
  175. if (!list_empty(&op->to_do))
  176. fscache_enqueue_retrieval(op);
  177. _leave(" [maxed out]");
  178. return;
  179. }
  180. spin_lock_irq(&object->work_lock);
  181. }
  182. spin_unlock_irq(&object->work_lock);
  183. _leave("");
  184. }
  185. /*
  186. * read the corresponding page to the given set from the backing file
  187. * - an uncertain page is simply discarded, to be tried again another time
  188. */
  189. static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
  190. struct fscache_retrieval *op,
  191. struct page *netpage,
  192. struct pagevec *pagevec)
  193. {
  194. struct cachefiles_one_read *monitor;
  195. struct address_space *bmapping;
  196. struct page *newpage, *backpage;
  197. int ret;
  198. _enter("");
  199. pagevec_reinit(pagevec);
  200. _debug("read back %p{%lu,%d}",
  201. netpage, netpage->index, page_count(netpage));
  202. monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);
  203. if (!monitor)
  204. goto nomem;
  205. monitor->netfs_page = netpage;
  206. monitor->op = fscache_get_retrieval(op);
  207. init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
  208. /* attempt to get hold of the backing page */
  209. bmapping = object->backer->d_inode->i_mapping;
  210. newpage = NULL;
  211. for (;;) {
  212. backpage = find_get_page(bmapping, netpage->index);
  213. if (backpage)
  214. goto backing_page_already_present;
  215. if (!newpage) {
  216. newpage = __page_cache_alloc(cachefiles_gfp |
  217. __GFP_COLD);
  218. if (!newpage)
  219. goto nomem_monitor;
  220. }
  221. ret = add_to_page_cache(newpage, bmapping,
  222. netpage->index, cachefiles_gfp);
  223. if (ret == 0)
  224. goto installed_new_backing_page;
  225. if (ret != -EEXIST)
  226. goto nomem_page;
  227. }
  228. /* we've installed a new backing page, so now we need to add it
  229. * to the LRU list and start it reading */
  230. installed_new_backing_page:
  231. _debug("- new %p", newpage);
  232. backpage = newpage;
  233. newpage = NULL;
  234. page_cache_get(backpage);
  235. pagevec_add(pagevec, backpage);
  236. __pagevec_lru_add_file(pagevec);
  237. read_backing_page:
  238. ret = bmapping->a_ops->readpage(NULL, backpage);
  239. if (ret < 0)
  240. goto read_error;
  241. /* set the monitor to transfer the data across */
  242. monitor_backing_page:
  243. _debug("- monitor add");
  244. /* install the monitor */
  245. page_cache_get(monitor->netfs_page);
  246. page_cache_get(backpage);
  247. monitor->back_page = backpage;
  248. monitor->monitor.private = backpage;
  249. add_page_wait_queue(backpage, &monitor->monitor);
  250. monitor = NULL;
  251. /* but the page may have been read before the monitor was installed, so
  252. * the monitor may miss the event - so we have to ensure that we do get
  253. * one in such a case */
  254. if (trylock_page(backpage)) {
  255. _debug("jumpstart %p {%lx}", backpage, backpage->flags);
  256. unlock_page(backpage);
  257. }
  258. goto success;
  259. /* if the backing page is already present, it can be in one of
  260. * three states: read in progress, read failed or read okay */
  261. backing_page_already_present:
  262. _debug("- present");
  263. if (newpage) {
  264. page_cache_release(newpage);
  265. newpage = NULL;
  266. }
  267. if (PageError(backpage))
  268. goto io_error;
  269. if (PageUptodate(backpage))
  270. goto backing_page_already_uptodate;
  271. if (!trylock_page(backpage))
  272. goto monitor_backing_page;
  273. _debug("read %p {%lx}", backpage, backpage->flags);
  274. goto read_backing_page;
  275. /* the backing page is already up to date, attach the netfs
  276. * page to the pagecache and LRU and copy the data across */
  277. backing_page_already_uptodate:
  278. _debug("- uptodate");
  279. fscache_mark_page_cached(op, netpage);
  280. copy_highpage(netpage, backpage);
  281. fscache_end_io(op, netpage, 0);
  282. fscache_retrieval_complete(op, 1);
  283. success:
  284. _debug("success");
  285. ret = 0;
  286. out:
  287. if (backpage)
  288. page_cache_release(backpage);
  289. if (monitor) {
  290. fscache_put_retrieval(monitor->op);
  291. kfree(monitor);
  292. }
  293. _leave(" = %d", ret);
  294. return ret;
  295. read_error:
  296. _debug("read error %d", ret);
  297. if (ret == -ENOMEM) {
  298. fscache_retrieval_complete(op, 1);
  299. goto out;
  300. }
  301. io_error:
  302. cachefiles_io_error_obj(object, "Page read error on backing file");
  303. fscache_retrieval_complete(op, 1);
  304. ret = -ENOBUFS;
  305. goto out;
  306. nomem_page:
  307. page_cache_release(newpage);
  308. nomem_monitor:
  309. fscache_put_retrieval(monitor->op);
  310. kfree(monitor);
  311. nomem:
  312. fscache_retrieval_complete(op, 1);
  313. _leave(" = -ENOMEM");
  314. return -ENOMEM;
  315. }
  316. /*
  317. * read a page from the cache or allocate a block in which to store it
  318. * - cache withdrawal is prevented by the caller
  319. * - returns -EINTR if interrupted
  320. * - returns -ENOMEM if ran out of memory
  321. * - returns -ENOBUFS if no buffers can be made available
  322. * - returns -ENOBUFS if page is beyond EOF
  323. * - if the page is backed by a block in the cache:
  324. * - a read will be started which will call the callback on completion
  325. * - 0 will be returned
  326. * - else if the page is unbacked:
  327. * - the metadata will be retained
  328. * - -ENODATA will be returned
  329. */
  330. int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
  331. struct page *page,
  332. gfp_t gfp)
  333. {
  334. struct cachefiles_object *object;
  335. struct cachefiles_cache *cache;
  336. struct pagevec pagevec;
  337. struct inode *inode;
  338. sector_t block0, block;
  339. unsigned shift;
  340. int ret;
  341. object = container_of(op->op.object,
  342. struct cachefiles_object, fscache);
  343. cache = container_of(object->fscache.cache,
  344. struct cachefiles_cache, cache);
  345. _enter("{%p},{%lx},,,", object, page->index);
  346. if (!object->backer)
  347. goto enobufs;
  348. inode = object->backer->d_inode;
  349. ASSERT(S_ISREG(inode->i_mode));
  350. ASSERT(inode->i_mapping->a_ops->bmap);
  351. ASSERT(inode->i_mapping->a_ops->readpages);
  352. /* calculate the shift required to use bmap */
  353. if (inode->i_sb->s_blocksize > PAGE_SIZE)
  354. goto enobufs;
  355. shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
  356. op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
  357. op->op.flags |= FSCACHE_OP_ASYNC;
  358. op->op.processor = cachefiles_read_copier;
  359. pagevec_init(&pagevec, 0);
  360. /* we assume the absence or presence of the first block is a good
  361. * enough indication for the page as a whole
  362. * - TODO: don't use bmap() for this as it is _not_ actually good
  363. * enough for this as it doesn't indicate errors, but it's all we've
  364. * got for the moment
  365. */
  366. block0 = page->index;
  367. block0 <<= shift;
  368. block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
  369. _debug("%llx -> %llx",
  370. (unsigned long long) block0,
  371. (unsigned long long) block);
  372. if (block) {
  373. /* submit the apparently valid page to the backing fs to be
  374. * read from disk */
  375. ret = cachefiles_read_backing_file_one(object, op, page,
  376. &pagevec);
  377. } else if (cachefiles_has_space(cache, 0, 1) == 0) {
  378. /* there's space in the cache we can use */
  379. fscache_mark_page_cached(op, page);
  380. fscache_retrieval_complete(op, 1);
  381. ret = -ENODATA;
  382. } else {
  383. goto enobufs;
  384. }
  385. _leave(" = %d", ret);
  386. return ret;
  387. enobufs:
  388. fscache_retrieval_complete(op, 1);
  389. _leave(" = -ENOBUFS");
  390. return -ENOBUFS;
  391. }
  392. /*
  393. * read the corresponding pages to the given set from the backing file
  394. * - any uncertain pages are simply discarded, to be tried again another time
  395. */
  396. static int cachefiles_read_backing_file(struct cachefiles_object *object,
  397. struct fscache_retrieval *op,
  398. struct list_head *list)
  399. {
  400. struct cachefiles_one_read *monitor = NULL;
  401. struct address_space *bmapping = object->backer->d_inode->i_mapping;
  402. struct pagevec lru_pvec;
  403. struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
  404. int ret = 0;
  405. _enter("");
  406. pagevec_init(&lru_pvec, 0);
  407. list_for_each_entry_safe(netpage, _n, list, lru) {
  408. list_del(&netpage->lru);
  409. _debug("read back %p{%lu,%d}",
  410. netpage, netpage->index, page_count(netpage));
  411. if (!monitor) {
  412. monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);
  413. if (!monitor)
  414. goto nomem;
  415. monitor->op = fscache_get_retrieval(op);
  416. init_waitqueue_func_entry(&monitor->monitor,
  417. cachefiles_read_waiter);
  418. }
  419. for (;;) {
  420. backpage = find_get_page(bmapping, netpage->index);
  421. if (backpage)
  422. goto backing_page_already_present;
  423. if (!newpage) {
  424. newpage = __page_cache_alloc(cachefiles_gfp |
  425. __GFP_COLD);
  426. if (!newpage)
  427. goto nomem;
  428. }
  429. ret = add_to_page_cache(newpage, bmapping,
  430. netpage->index, cachefiles_gfp);
  431. if (ret == 0)
  432. goto installed_new_backing_page;
  433. if (ret != -EEXIST)
  434. goto nomem;
  435. }
  436. /* we've installed a new backing page, so now we need to add it
  437. * to the LRU list and start it reading */
  438. installed_new_backing_page:
  439. _debug("- new %p", newpage);
  440. backpage = newpage;
  441. newpage = NULL;
  442. page_cache_get(backpage);
  443. if (!pagevec_add(&lru_pvec, backpage))
  444. __pagevec_lru_add_file(&lru_pvec);
  445. reread_backing_page:
  446. ret = bmapping->a_ops->readpage(NULL, backpage);
  447. if (ret < 0)
  448. goto read_error;
  449. /* add the netfs page to the pagecache and LRU, and set the
  450. * monitor to transfer the data across */
  451. monitor_backing_page:
  452. _debug("- monitor add");
  453. ret = add_to_page_cache(netpage, op->mapping, netpage->index,
  454. cachefiles_gfp);
  455. if (ret < 0) {
  456. if (ret == -EEXIST) {
  457. page_cache_release(netpage);
  458. fscache_retrieval_complete(op, 1);
  459. continue;
  460. }
  461. goto nomem;
  462. }
  463. page_cache_get(netpage);
  464. if (!pagevec_add(&lru_pvec, netpage))
  465. __pagevec_lru_add_file(&lru_pvec);
  466. /* install a monitor */
  467. page_cache_get(netpage);
  468. monitor->netfs_page = netpage;
  469. page_cache_get(backpage);
  470. monitor->back_page = backpage;
  471. monitor->monitor.private = backpage;
  472. add_page_wait_queue(backpage, &monitor->monitor);
  473. monitor = NULL;
  474. /* but the page may have been read before the monitor was
  475. * installed, so the monitor may miss the event - so we have to
  476. * ensure that we do get one in such a case */
  477. if (trylock_page(backpage)) {
  478. _debug("2unlock %p {%lx}", backpage, backpage->flags);
  479. unlock_page(backpage);
  480. }
  481. page_cache_release(backpage);
  482. backpage = NULL;
  483. page_cache_release(netpage);
  484. netpage = NULL;
  485. continue;
  486. /* if the backing page is already present, it can be in one of
  487. * three states: read in progress, read failed or read okay */
  488. backing_page_already_present:
  489. _debug("- present %p", backpage);
  490. if (PageError(backpage))
  491. goto io_error;
  492. if (PageUptodate(backpage))
  493. goto backing_page_already_uptodate;
  494. _debug("- not ready %p{%lx}", backpage, backpage->flags);
  495. if (!trylock_page(backpage))
  496. goto monitor_backing_page;
  497. if (PageError(backpage)) {
  498. _debug("error %lx", backpage->flags);
  499. unlock_page(backpage);
  500. goto io_error;
  501. }
  502. if (PageUptodate(backpage))
  503. goto backing_page_already_uptodate_unlock;
  504. /* we've locked a page that's neither up to date nor erroneous,
  505. * so we need to attempt to read it again */
  506. goto reread_backing_page;
  507. /* the backing page is already up to date, attach the netfs
  508. * page to the pagecache and LRU and copy the data across */
  509. backing_page_already_uptodate_unlock:
  510. _debug("uptodate %lx", backpage->flags);
  511. unlock_page(backpage);
  512. backing_page_already_uptodate:
  513. _debug("- uptodate");
  514. ret = add_to_page_cache(netpage, op->mapping, netpage->index,
  515. cachefiles_gfp);
  516. if (ret < 0) {
  517. if (ret == -EEXIST) {
  518. page_cache_release(netpage);
  519. fscache_retrieval_complete(op, 1);
  520. continue;
  521. }
  522. goto nomem;
  523. }
  524. copy_highpage(netpage, backpage);
  525. page_cache_release(backpage);
  526. backpage = NULL;
  527. fscache_mark_page_cached(op, netpage);
  528. page_cache_get(netpage);
  529. if (!pagevec_add(&lru_pvec, netpage))
  530. __pagevec_lru_add_file(&lru_pvec);
  531. /* the netpage is unlocked and marked up to date here */
  532. fscache_end_io(op, netpage, 0);
  533. page_cache_release(netpage);
  534. netpage = NULL;
  535. fscache_retrieval_complete(op, 1);
  536. continue;
  537. }
  538. netpage = NULL;
  539. _debug("out");
  540. out:
  541. /* tidy up */
  542. pagevec_lru_add_file(&lru_pvec);
  543. if (newpage)
  544. page_cache_release(newpage);
  545. if (netpage)
  546. page_cache_release(netpage);
  547. if (backpage)
  548. page_cache_release(backpage);
  549. if (monitor) {
  550. fscache_put_retrieval(op);
  551. kfree(monitor);
  552. }
  553. list_for_each_entry_safe(netpage, _n, list, lru) {
  554. list_del(&netpage->lru);
  555. page_cache_release(netpage);
  556. fscache_retrieval_complete(op, 1);
  557. }
  558. _leave(" = %d", ret);
  559. return ret;
  560. nomem:
  561. _debug("nomem");
  562. ret = -ENOMEM;
  563. goto record_page_complete;
  564. read_error:
  565. _debug("read error %d", ret);
  566. if (ret == -ENOMEM)
  567. goto record_page_complete;
  568. io_error:
  569. cachefiles_io_error_obj(object, "Page read error on backing file");
  570. ret = -ENOBUFS;
  571. record_page_complete:
  572. fscache_retrieval_complete(op, 1);
  573. goto out;
  574. }
  575. /*
  576. * read a list of pages from the cache or allocate blocks in which to store
  577. * them
  578. */
  579. int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
  580. struct list_head *pages,
  581. unsigned *nr_pages,
  582. gfp_t gfp)
  583. {
  584. struct cachefiles_object *object;
  585. struct cachefiles_cache *cache;
  586. struct list_head backpages;
  587. struct pagevec pagevec;
  588. struct inode *inode;
  589. struct page *page, *_n;
  590. unsigned shift, nrbackpages;
  591. int ret, ret2, space;
  592. object = container_of(op->op.object,
  593. struct cachefiles_object, fscache);
  594. cache = container_of(object->fscache.cache,
  595. struct cachefiles_cache, cache);
  596. _enter("{OBJ%x,%d},,%d,,",
  597. object->fscache.debug_id, atomic_read(&op->op.usage),
  598. *nr_pages);
  599. if (!object->backer)
  600. goto all_enobufs;
  601. space = 1;
  602. if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
  603. space = 0;
  604. inode = object->backer->d_inode;
  605. ASSERT(S_ISREG(inode->i_mode));
  606. ASSERT(inode->i_mapping->a_ops->bmap);
  607. ASSERT(inode->i_mapping->a_ops->readpages);
  608. /* calculate the shift required to use bmap */
  609. if (inode->i_sb->s_blocksize > PAGE_SIZE)
  610. goto all_enobufs;
  611. shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
  612. pagevec_init(&pagevec, 0);
  613. op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
  614. op->op.flags |= FSCACHE_OP_ASYNC;
  615. op->op.processor = cachefiles_read_copier;
  616. INIT_LIST_HEAD(&backpages);
  617. nrbackpages = 0;
  618. ret = space ? -ENODATA : -ENOBUFS;
  619. list_for_each_entry_safe(page, _n, pages, lru) {
  620. sector_t block0, block;
  621. /* we assume the absence or presence of the first block is a
  622. * good enough indication for the page as a whole
  623. * - TODO: don't use bmap() for this as it is _not_ actually
  624. * good enough for this as it doesn't indicate errors, but
  625. * it's all we've got for the moment
  626. */
  627. block0 = page->index;
  628. block0 <<= shift;
  629. block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
  630. block0);
  631. _debug("%llx -> %llx",
  632. (unsigned long long) block0,
  633. (unsigned long long) block);
  634. if (block) {
  635. /* we have data - add it to the list to give to the
  636. * backing fs */
  637. list_move(&page->lru, &backpages);
  638. (*nr_pages)--;
  639. nrbackpages++;
  640. } else if (space && pagevec_add(&pagevec, page) == 0) {
  641. fscache_mark_pages_cached(op, &pagevec);
  642. fscache_retrieval_complete(op, 1);
  643. ret = -ENODATA;
  644. } else {
  645. fscache_retrieval_complete(op, 1);
  646. }
  647. }
  648. if (pagevec_count(&pagevec) > 0)
  649. fscache_mark_pages_cached(op, &pagevec);
  650. if (list_empty(pages))
  651. ret = 0;
  652. /* submit the apparently valid pages to the backing fs to be read from
  653. * disk */
  654. if (nrbackpages > 0) {
  655. ret2 = cachefiles_read_backing_file(object, op, &backpages);
  656. if (ret2 == -ENOMEM || ret2 == -EINTR)
  657. ret = ret2;
  658. }
  659. _leave(" = %d [nr=%u%s]",
  660. ret, *nr_pages, list_empty(pages) ? " empty" : "");
  661. return ret;
  662. all_enobufs:
  663. fscache_retrieval_complete(op, *nr_pages);
  664. return -ENOBUFS;
  665. }
  666. /*
  667. * allocate a block in the cache in which to store a page
  668. * - cache withdrawal is prevented by the caller
  669. * - returns -EINTR if interrupted
  670. * - returns -ENOMEM if ran out of memory
  671. * - returns -ENOBUFS if no buffers can be made available
  672. * - returns -ENOBUFS if page is beyond EOF
  673. * - otherwise:
  674. * - the metadata will be retained
  675. * - 0 will be returned
  676. */
  677. int cachefiles_allocate_page(struct fscache_retrieval *op,
  678. struct page *page,
  679. gfp_t gfp)
  680. {
  681. struct cachefiles_object *object;
  682. struct cachefiles_cache *cache;
  683. int ret;
  684. object = container_of(op->op.object,
  685. struct cachefiles_object, fscache);
  686. cache = container_of(object->fscache.cache,
  687. struct cachefiles_cache, cache);
  688. _enter("%p,{%lx},", object, page->index);
  689. ret = cachefiles_has_space(cache, 0, 1);
  690. if (ret == 0)
  691. fscache_mark_page_cached(op, page);
  692. else
  693. ret = -ENOBUFS;
  694. fscache_retrieval_complete(op, 1);
  695. _leave(" = %d", ret);
  696. return ret;
  697. }
  698. /*
  699. * allocate blocks in the cache in which to store a set of pages
  700. * - cache withdrawal is prevented by the caller
  701. * - returns -EINTR if interrupted
  702. * - returns -ENOMEM if ran out of memory
  703. * - returns -ENOBUFS if some buffers couldn't be made available
  704. * - returns -ENOBUFS if some pages are beyond EOF
  705. * - otherwise:
  706. * - -ENODATA will be returned
  707. * - metadata will be retained for any page marked
  708. */
  709. int cachefiles_allocate_pages(struct fscache_retrieval *op,
  710. struct list_head *pages,
  711. unsigned *nr_pages,
  712. gfp_t gfp)
  713. {
  714. struct cachefiles_object *object;
  715. struct cachefiles_cache *cache;
  716. struct pagevec pagevec;
  717. struct page *page;
  718. int ret;
  719. object = container_of(op->op.object,
  720. struct cachefiles_object, fscache);
  721. cache = container_of(object->fscache.cache,
  722. struct cachefiles_cache, cache);
  723. _enter("%p,,,%d,", object, *nr_pages);
  724. ret = cachefiles_has_space(cache, 0, *nr_pages);
  725. if (ret == 0) {
  726. pagevec_init(&pagevec, 0);
  727. list_for_each_entry(page, pages, lru) {
  728. if (pagevec_add(&pagevec, page) == 0)
  729. fscache_mark_pages_cached(op, &pagevec);
  730. }
  731. if (pagevec_count(&pagevec) > 0)
  732. fscache_mark_pages_cached(op, &pagevec);
  733. ret = -ENODATA;
  734. } else {
  735. ret = -ENOBUFS;
  736. }
  737. fscache_retrieval_complete(op, *nr_pages);
  738. _leave(" = %d", ret);
  739. return ret;
  740. }
  741. /*
  742. * request a page be stored in the cache
  743. * - cache withdrawal is prevented by the caller
  744. * - this request may be ignored if there's no cache block available, in which
  745. * case -ENOBUFS will be returned
  746. * - if the op is in progress, 0 will be returned
  747. */
  748. int cachefiles_write_page(struct fscache_storage *op, struct page *page)
  749. {
  750. struct cachefiles_object *object;
  751. struct cachefiles_cache *cache;
  752. mm_segment_t old_fs;
  753. struct file *file;
  754. struct path path;
  755. loff_t pos, eof;
  756. size_t len;
  757. void *data;
  758. int ret;
  759. ASSERT(op != NULL);
  760. ASSERT(page != NULL);
  761. object = container_of(op->op.object,
  762. struct cachefiles_object, fscache);
  763. _enter("%p,%p{%lx},,,", object, page, page->index);
  764. if (!object->backer) {
  765. _leave(" = -ENOBUFS");
  766. return -ENOBUFS;
  767. }
  768. ASSERT(S_ISREG(object->backer->d_inode->i_mode));
  769. cache = container_of(object->fscache.cache,
  770. struct cachefiles_cache, cache);
  771. /* write the page to the backing filesystem and let it store it in its
  772. * own time */
  773. path.mnt = cache->mnt;
  774. path.dentry = object->backer;
  775. file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred);
  776. if (IS_ERR(file)) {
  777. ret = PTR_ERR(file);
  778. } else {
  779. ret = -EIO;
  780. if (file->f_op->write) {
  781. pos = (loff_t) page->index << PAGE_SHIFT;
  782. /* we mustn't write more data than we have, so we have
  783. * to beware of a partial page at EOF */
  784. eof = object->fscache.store_limit_l;
  785. len = PAGE_SIZE;
  786. if (eof & ~PAGE_MASK) {
  787. ASSERTCMP(pos, <, eof);
  788. if (eof - pos < PAGE_SIZE) {
  789. _debug("cut short %llx to %llx",
  790. pos, eof);
  791. len = eof - pos;
  792. ASSERTCMP(pos + len, ==, eof);
  793. }
  794. }
  795. data = kmap(page);
  796. old_fs = get_fs();
  797. set_fs(KERNEL_DS);
  798. ret = file->f_op->write(
  799. file, (const void __user *) data, len, &pos);
  800. set_fs(old_fs);
  801. kunmap(page);
  802. if (ret != len)
  803. ret = -EIO;
  804. }
  805. fput(file);
  806. }
  807. if (ret < 0) {
  808. if (ret == -EIO)
  809. cachefiles_io_error_obj(
  810. object, "Write page to backing file failed");
  811. ret = -ENOBUFS;
  812. }
  813. _leave(" = %d", ret);
  814. return ret;
  815. }
  816. /*
  817. * detach a backing block from a page
  818. * - cache withdrawal is prevented by the caller
  819. */
  820. void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
  821. {
  822. struct cachefiles_object *object;
  823. struct cachefiles_cache *cache;
  824. object = container_of(_object, struct cachefiles_object, fscache);
  825. cache = container_of(object->fscache.cache,
  826. struct cachefiles_cache, cache);
  827. _enter("%p,{%lu}", object, page->index);
  828. spin_unlock(&object->fscache.cookie->lock);
  829. }