pnfs.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458
  1. /*
  2. * pNFS functions to call and manage layout drivers.
  3. *
  4. * Copyright (c) 2002 [year of first publication]
  5. * The Regents of the University of Michigan
  6. * All Rights Reserved
  7. *
  8. * Dean Hildebrand <dhildebz@umich.edu>
  9. *
  10. * Permission is granted to use, copy, create derivative works, and
  11. * redistribute this software and such derivative works for any purpose,
  12. * so long as the name of the University of Michigan is not used in
  13. * any advertising or publicity pertaining to the use or distribution
  14. * of this software without specific, written prior authorization. If
  15. * the above copyright notice or any other identification of the
  16. * University of Michigan is included in any copy of any portion of
  17. * this software, then the disclaimer below must also be included.
  18. *
  19. * This software is provided as is, without representation or warranty
  20. * of any kind either express or implied, including without limitation
  21. * the implied warranties of merchantability, fitness for a particular
  22. * purpose, or noninfringement. The Regents of the University of
  23. * Michigan shall not be liable for any damages, including special,
  24. * indirect, incidental, or consequential damages, with respect to any
  25. * claim arising out of or in connection with the use of the software,
  26. * even if it has been or is hereafter advised of the possibility of
  27. * such damages.
  28. */
  29. #include <linux/nfs_fs.h>
  30. #include <linux/nfs_page.h>
  31. #include "internal.h"
  32. #include "pnfs.h"
  33. #include "iostat.h"
  34. #define NFSDBG_FACILITY NFSDBG_PNFS
  35. /* Locking:
  36. *
  37. * pnfs_spinlock:
  38. * protects pnfs_modules_tbl.
  39. */
  40. static DEFINE_SPINLOCK(pnfs_spinlock);
  41. /*
  42. * pnfs_modules_tbl holds all pnfs modules
  43. */
  44. static LIST_HEAD(pnfs_modules_tbl);
  45. /* Return the registered pnfs layout driver module matching given id */
  46. static struct pnfs_layoutdriver_type *
  47. find_pnfs_driver_locked(u32 id)
  48. {
  49. struct pnfs_layoutdriver_type *local;
  50. list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
  51. if (local->id == id)
  52. goto out;
  53. local = NULL;
  54. out:
  55. dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
  56. return local;
  57. }
  58. static struct pnfs_layoutdriver_type *
  59. find_pnfs_driver(u32 id)
  60. {
  61. struct pnfs_layoutdriver_type *local;
  62. spin_lock(&pnfs_spinlock);
  63. local = find_pnfs_driver_locked(id);
  64. spin_unlock(&pnfs_spinlock);
  65. return local;
  66. }
  67. void
  68. unset_pnfs_layoutdriver(struct nfs_server *nfss)
  69. {
  70. if (nfss->pnfs_curr_ld)
  71. module_put(nfss->pnfs_curr_ld->owner);
  72. nfss->pnfs_curr_ld = NULL;
  73. }
  74. /*
  75. * Try to set the server's pnfs module to the pnfs layout type specified by id.
  76. * Currently only one pNFS layout driver per filesystem is supported.
  77. *
  78. * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
  79. */
  80. void
  81. set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
  82. {
  83. struct pnfs_layoutdriver_type *ld_type = NULL;
  84. if (id == 0)
  85. goto out_no_driver;
  86. if (!(server->nfs_client->cl_exchange_flags &
  87. (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
  88. printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
  89. id, server->nfs_client->cl_exchange_flags);
  90. goto out_no_driver;
  91. }
  92. ld_type = find_pnfs_driver(id);
  93. if (!ld_type) {
  94. request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
  95. ld_type = find_pnfs_driver(id);
  96. if (!ld_type) {
  97. dprintk("%s: No pNFS module found for %u.\n",
  98. __func__, id);
  99. goto out_no_driver;
  100. }
  101. }
  102. if (!try_module_get(ld_type->owner)) {
  103. dprintk("%s: Could not grab reference on module\n", __func__);
  104. goto out_no_driver;
  105. }
  106. server->pnfs_curr_ld = ld_type;
  107. dprintk("%s: pNFS module for %u set\n", __func__, id);
  108. return;
  109. out_no_driver:
  110. dprintk("%s: Using NFSv4 I/O\n", __func__);
  111. server->pnfs_curr_ld = NULL;
  112. }
  113. int
  114. pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
  115. {
  116. int status = -EINVAL;
  117. struct pnfs_layoutdriver_type *tmp;
  118. if (ld_type->id == 0) {
  119. printk(KERN_ERR "%s id 0 is reserved\n", __func__);
  120. return status;
  121. }
  122. if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
  123. printk(KERN_ERR "%s Layout driver must provide "
  124. "alloc_lseg and free_lseg.\n", __func__);
  125. return status;
  126. }
  127. spin_lock(&pnfs_spinlock);
  128. tmp = find_pnfs_driver_locked(ld_type->id);
  129. if (!tmp) {
  130. list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
  131. status = 0;
  132. dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
  133. ld_type->name);
  134. } else {
  135. printk(KERN_ERR "%s Module with id %d already loaded!\n",
  136. __func__, ld_type->id);
  137. }
  138. spin_unlock(&pnfs_spinlock);
  139. return status;
  140. }
  141. EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
  142. void
  143. pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
  144. {
  145. dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
  146. spin_lock(&pnfs_spinlock);
  147. list_del(&ld_type->pnfs_tblid);
  148. spin_unlock(&pnfs_spinlock);
  149. }
  150. EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
  151. /*
  152. * pNFS client layout cache
  153. */
  154. /* Need to hold i_lock if caller does not already hold reference */
  155. void
  156. get_layout_hdr(struct pnfs_layout_hdr *lo)
  157. {
  158. atomic_inc(&lo->plh_refcount);
  159. }
  160. static struct pnfs_layout_hdr *
  161. pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
  162. {
  163. struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
  164. return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) :
  165. kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
  166. }
  167. static void
  168. pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
  169. {
  170. struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld;
  171. return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo);
  172. }
  173. static void
  174. destroy_layout_hdr(struct pnfs_layout_hdr *lo)
  175. {
  176. dprintk("%s: freeing layout cache %p\n", __func__, lo);
  177. BUG_ON(!list_empty(&lo->plh_layouts));
  178. NFS_I(lo->plh_inode)->layout = NULL;
  179. pnfs_free_layout_hdr(lo);
  180. }
  181. static void
  182. put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
  183. {
  184. if (atomic_dec_and_test(&lo->plh_refcount))
  185. destroy_layout_hdr(lo);
  186. }
  187. void
  188. put_layout_hdr(struct pnfs_layout_hdr *lo)
  189. {
  190. struct inode *inode = lo->plh_inode;
  191. if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
  192. destroy_layout_hdr(lo);
  193. spin_unlock(&inode->i_lock);
  194. }
  195. }
  196. static void
  197. init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
  198. {
  199. INIT_LIST_HEAD(&lseg->pls_list);
  200. atomic_set(&lseg->pls_refcount, 1);
  201. smp_mb();
  202. set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
  203. lseg->pls_layout = lo;
  204. }
  205. static void free_lseg(struct pnfs_layout_segment *lseg)
  206. {
  207. struct inode *ino = lseg->pls_layout->plh_inode;
  208. NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
  209. /* Matched by get_layout_hdr in pnfs_insert_layout */
  210. put_layout_hdr(NFS_I(ino)->layout);
  211. }
  212. static void
  213. put_lseg_common(struct pnfs_layout_segment *lseg)
  214. {
  215. struct inode *inode = lseg->pls_layout->plh_inode;
  216. WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
  217. list_del_init(&lseg->pls_list);
  218. if (list_empty(&lseg->pls_layout->plh_segs)) {
  219. set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
  220. /* Matched by initial refcount set in alloc_init_layout_hdr */
  221. put_layout_hdr_locked(lseg->pls_layout);
  222. }
  223. rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
  224. }
  225. void
  226. put_lseg(struct pnfs_layout_segment *lseg)
  227. {
  228. struct inode *inode;
  229. if (!lseg)
  230. return;
  231. dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
  232. atomic_read(&lseg->pls_refcount),
  233. test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
  234. inode = lseg->pls_layout->plh_inode;
  235. if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
  236. LIST_HEAD(free_me);
  237. put_lseg_common(lseg);
  238. list_add(&lseg->pls_list, &free_me);
  239. spin_unlock(&inode->i_lock);
  240. pnfs_free_lseg_list(&free_me);
  241. }
  242. }
  243. EXPORT_SYMBOL_GPL(put_lseg);
  244. static inline u64
  245. end_offset(u64 start, u64 len)
  246. {
  247. u64 end;
  248. end = start + len;
  249. return end >= start ? end : NFS4_MAX_UINT64;
  250. }
  251. /* last octet in a range */
  252. static inline u64
  253. last_byte_offset(u64 start, u64 len)
  254. {
  255. u64 end;
  256. BUG_ON(!len);
  257. end = start + len;
  258. return end > start ? end - 1 : NFS4_MAX_UINT64;
  259. }
  260. /*
  261. * is l2 fully contained in l1?
  262. * start1 end1
  263. * [----------------------------------)
  264. * start2 end2
  265. * [----------------)
  266. */
  267. static inline int
  268. lo_seg_contained(struct pnfs_layout_range *l1,
  269. struct pnfs_layout_range *l2)
  270. {
  271. u64 start1 = l1->offset;
  272. u64 end1 = end_offset(start1, l1->length);
  273. u64 start2 = l2->offset;
  274. u64 end2 = end_offset(start2, l2->length);
  275. return (start1 <= start2) && (end1 >= end2);
  276. }
  277. /*
  278. * is l1 and l2 intersecting?
  279. * start1 end1
  280. * [----------------------------------)
  281. * start2 end2
  282. * [----------------)
  283. */
  284. static inline int
  285. lo_seg_intersecting(struct pnfs_layout_range *l1,
  286. struct pnfs_layout_range *l2)
  287. {
  288. u64 start1 = l1->offset;
  289. u64 end1 = end_offset(start1, l1->length);
  290. u64 start2 = l2->offset;
  291. u64 end2 = end_offset(start2, l2->length);
  292. return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
  293. (end2 == NFS4_MAX_UINT64 || end2 > start1);
  294. }
  295. static bool
  296. should_free_lseg(struct pnfs_layout_range *lseg_range,
  297. struct pnfs_layout_range *recall_range)
  298. {
  299. return (recall_range->iomode == IOMODE_ANY ||
  300. lseg_range->iomode == recall_range->iomode) &&
  301. lo_seg_intersecting(lseg_range, recall_range);
  302. }
  303. /* Returns 1 if lseg is removed from list, 0 otherwise */
  304. static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
  305. struct list_head *tmp_list)
  306. {
  307. int rv = 0;
  308. if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
  309. /* Remove the reference keeping the lseg in the
  310. * list. It will now be removed when all
  311. * outstanding io is finished.
  312. */
  313. dprintk("%s: lseg %p ref %d\n", __func__, lseg,
  314. atomic_read(&lseg->pls_refcount));
  315. if (atomic_dec_and_test(&lseg->pls_refcount)) {
  316. put_lseg_common(lseg);
  317. list_add(&lseg->pls_list, tmp_list);
  318. rv = 1;
  319. }
  320. }
  321. return rv;
  322. }
  323. /* Returns count of number of matching invalid lsegs remaining in list
  324. * after call.
  325. */
  326. int
  327. mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
  328. struct list_head *tmp_list,
  329. struct pnfs_layout_range *recall_range)
  330. {
  331. struct pnfs_layout_segment *lseg, *next;
  332. int invalid = 0, removed = 0;
  333. dprintk("%s:Begin lo %p\n", __func__, lo);
  334. if (list_empty(&lo->plh_segs)) {
  335. if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
  336. put_layout_hdr_locked(lo);
  337. return 0;
  338. }
  339. list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
  340. if (!recall_range ||
  341. should_free_lseg(&lseg->pls_range, recall_range)) {
  342. dprintk("%s: freeing lseg %p iomode %d "
  343. "offset %llu length %llu\n", __func__,
  344. lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
  345. lseg->pls_range.length);
  346. invalid++;
  347. removed += mark_lseg_invalid(lseg, tmp_list);
  348. }
  349. dprintk("%s:Return %i\n", __func__, invalid - removed);
  350. return invalid - removed;
  351. }
  352. /* note free_me must contain lsegs from a single layout_hdr */
  353. void
  354. pnfs_free_lseg_list(struct list_head *free_me)
  355. {
  356. struct pnfs_layout_segment *lseg, *tmp;
  357. struct pnfs_layout_hdr *lo;
  358. if (list_empty(free_me))
  359. return;
  360. lo = list_first_entry(free_me, struct pnfs_layout_segment,
  361. pls_list)->pls_layout;
  362. if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
  363. struct nfs_client *clp;
  364. clp = NFS_SERVER(lo->plh_inode)->nfs_client;
  365. spin_lock(&clp->cl_lock);
  366. list_del_init(&lo->plh_layouts);
  367. spin_unlock(&clp->cl_lock);
  368. }
  369. list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
  370. list_del(&lseg->pls_list);
  371. free_lseg(lseg);
  372. }
  373. }
  374. void
  375. pnfs_destroy_layout(struct nfs_inode *nfsi)
  376. {
  377. struct pnfs_layout_hdr *lo;
  378. LIST_HEAD(tmp_list);
  379. spin_lock(&nfsi->vfs_inode.i_lock);
  380. lo = nfsi->layout;
  381. if (lo) {
  382. lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
  383. mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
  384. }
  385. spin_unlock(&nfsi->vfs_inode.i_lock);
  386. pnfs_free_lseg_list(&tmp_list);
  387. }
  388. /*
  389. * Called by the state manger to remove all layouts established under an
  390. * expired lease.
  391. */
  392. void
  393. pnfs_destroy_all_layouts(struct nfs_client *clp)
  394. {
  395. struct nfs_server *server;
  396. struct pnfs_layout_hdr *lo;
  397. LIST_HEAD(tmp_list);
  398. nfs4_deviceid_mark_client_invalid(clp);
  399. nfs4_deviceid_purge_client(clp);
  400. spin_lock(&clp->cl_lock);
  401. rcu_read_lock();
  402. list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
  403. if (!list_empty(&server->layouts))
  404. list_splice_init(&server->layouts, &tmp_list);
  405. }
  406. rcu_read_unlock();
  407. spin_unlock(&clp->cl_lock);
  408. while (!list_empty(&tmp_list)) {
  409. lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
  410. plh_layouts);
  411. dprintk("%s freeing layout for inode %lu\n", __func__,
  412. lo->plh_inode->i_ino);
  413. list_del_init(&lo->plh_layouts);
  414. pnfs_destroy_layout(NFS_I(lo->plh_inode));
  415. }
  416. }
  417. /* update lo->plh_stateid with new if is more recent */
  418. void
  419. pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
  420. bool update_barrier)
  421. {
  422. u32 oldseq, newseq;
  423. oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
  424. newseq = be32_to_cpu(new->stateid.seqid);
  425. if ((int)(newseq - oldseq) > 0) {
  426. memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
  427. if (update_barrier) {
  428. u32 new_barrier = be32_to_cpu(new->stateid.seqid);
  429. if ((int)(new_barrier - lo->plh_barrier))
  430. lo->plh_barrier = new_barrier;
  431. } else {
  432. /* Because of wraparound, we want to keep the barrier
  433. * "close" to the current seqids. It needs to be
  434. * within 2**31 to count as "behind", so if it
  435. * gets too near that limit, give us a litle leeway
  436. * and bring it to within 2**30.
  437. * NOTE - and yes, this is all unsigned arithmetic.
  438. */
  439. if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
  440. lo->plh_barrier = newseq - (1 << 30);
  441. }
  442. }
  443. }
  444. /* lget is set to 1 if called from inside send_layoutget call chain */
  445. static bool
  446. pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
  447. int lget)
  448. {
  449. if ((stateid) &&
  450. (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
  451. return true;
  452. return lo->plh_block_lgets ||
  453. test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
  454. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
  455. (list_empty(&lo->plh_segs) &&
  456. (atomic_read(&lo->plh_outstanding) > lget));
  457. }
  458. int
  459. pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
  460. struct nfs4_state *open_state)
  461. {
  462. int status = 0;
  463. dprintk("--> %s\n", __func__);
  464. spin_lock(&lo->plh_inode->i_lock);
  465. if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
  466. status = -EAGAIN;
  467. } else if (list_empty(&lo->plh_segs)) {
  468. int seq;
  469. do {
  470. seq = read_seqbegin(&open_state->seqlock);
  471. memcpy(dst->data, open_state->stateid.data,
  472. sizeof(open_state->stateid.data));
  473. } while (read_seqretry(&open_state->seqlock, seq));
  474. } else
  475. memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
  476. spin_unlock(&lo->plh_inode->i_lock);
  477. dprintk("<-- %s\n", __func__);
  478. return status;
  479. }
  480. /*
  481. * Get layout from server.
  482. * for now, assume that whole file layouts are requested.
  483. * arg->offset: 0
  484. * arg->length: all ones
  485. */
  486. static struct pnfs_layout_segment *
  487. send_layoutget(struct pnfs_layout_hdr *lo,
  488. struct nfs_open_context *ctx,
  489. struct pnfs_layout_range *range,
  490. gfp_t gfp_flags)
  491. {
  492. struct inode *ino = lo->plh_inode;
  493. struct nfs_server *server = NFS_SERVER(ino);
  494. struct nfs4_layoutget *lgp;
  495. struct pnfs_layout_segment *lseg = NULL;
  496. struct page **pages = NULL;
  497. int i;
  498. u32 max_resp_sz, max_pages;
  499. dprintk("--> %s\n", __func__);
  500. BUG_ON(ctx == NULL);
  501. lgp = kzalloc(sizeof(*lgp), gfp_flags);
  502. if (lgp == NULL)
  503. return NULL;
  504. /* allocate pages for xdr post processing */
  505. max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
  506. max_pages = max_resp_sz >> PAGE_SHIFT;
  507. pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
  508. if (!pages)
  509. goto out_err_free;
  510. for (i = 0; i < max_pages; i++) {
  511. pages[i] = alloc_page(gfp_flags);
  512. if (!pages[i])
  513. goto out_err_free;
  514. }
  515. lgp->args.minlength = PAGE_CACHE_SIZE;
  516. if (lgp->args.minlength > range->length)
  517. lgp->args.minlength = range->length;
  518. lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
  519. lgp->args.range = *range;
  520. lgp->args.type = server->pnfs_curr_ld->id;
  521. lgp->args.inode = ino;
  522. lgp->args.ctx = get_nfs_open_context(ctx);
  523. lgp->args.layout.pages = pages;
  524. lgp->args.layout.pglen = max_pages * PAGE_SIZE;
  525. lgp->lsegpp = &lseg;
  526. lgp->gfp_flags = gfp_flags;
  527. /* Synchronously retrieve layout information from server and
  528. * store in lseg.
  529. */
  530. nfs4_proc_layoutget(lgp);
  531. if (!lseg) {
  532. /* remember that LAYOUTGET failed and suspend trying */
  533. set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
  534. }
  535. /* free xdr pages */
  536. for (i = 0; i < max_pages; i++)
  537. __free_page(pages[i]);
  538. kfree(pages);
  539. return lseg;
  540. out_err_free:
  541. /* free any allocated xdr pages, lgp as it's not used */
  542. if (pages) {
  543. for (i = 0; i < max_pages; i++) {
  544. if (!pages[i])
  545. break;
  546. __free_page(pages[i]);
  547. }
  548. kfree(pages);
  549. }
  550. kfree(lgp);
  551. return NULL;
  552. }
  553. /* Initiates a LAYOUTRETURN(FILE) */
  554. int
  555. _pnfs_return_layout(struct inode *ino)
  556. {
  557. struct pnfs_layout_hdr *lo = NULL;
  558. struct nfs_inode *nfsi = NFS_I(ino);
  559. LIST_HEAD(tmp_list);
  560. struct nfs4_layoutreturn *lrp;
  561. nfs4_stateid stateid;
  562. int status = 0;
  563. dprintk("--> %s\n", __func__);
  564. spin_lock(&ino->i_lock);
  565. lo = nfsi->layout;
  566. if (!lo) {
  567. spin_unlock(&ino->i_lock);
  568. dprintk("%s: no layout to return\n", __func__);
  569. return status;
  570. }
  571. stateid = nfsi->layout->plh_stateid;
  572. /* Reference matched in nfs4_layoutreturn_release */
  573. get_layout_hdr(lo);
  574. mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
  575. lo->plh_block_lgets++;
  576. spin_unlock(&ino->i_lock);
  577. pnfs_free_lseg_list(&tmp_list);
  578. WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
  579. lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
  580. if (unlikely(lrp == NULL)) {
  581. status = -ENOMEM;
  582. set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags);
  583. set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags);
  584. put_layout_hdr(lo);
  585. goto out;
  586. }
  587. lrp->args.stateid = stateid;
  588. lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
  589. lrp->args.inode = ino;
  590. lrp->args.layout = lo;
  591. lrp->clp = NFS_SERVER(ino)->nfs_client;
  592. status = nfs4_proc_layoutreturn(lrp);
  593. out:
  594. dprintk("<-- %s status: %d\n", __func__, status);
  595. return status;
  596. }
  597. bool pnfs_roc(struct inode *ino)
  598. {
  599. struct pnfs_layout_hdr *lo;
  600. struct pnfs_layout_segment *lseg, *tmp;
  601. LIST_HEAD(tmp_list);
  602. bool found = false;
  603. spin_lock(&ino->i_lock);
  604. lo = NFS_I(ino)->layout;
  605. if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
  606. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
  607. goto out_nolayout;
  608. list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
  609. if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
  610. mark_lseg_invalid(lseg, &tmp_list);
  611. found = true;
  612. }
  613. if (!found)
  614. goto out_nolayout;
  615. lo->plh_block_lgets++;
  616. get_layout_hdr(lo); /* matched in pnfs_roc_release */
  617. spin_unlock(&ino->i_lock);
  618. pnfs_free_lseg_list(&tmp_list);
  619. return true;
  620. out_nolayout:
  621. spin_unlock(&ino->i_lock);
  622. return false;
  623. }
  624. void pnfs_roc_release(struct inode *ino)
  625. {
  626. struct pnfs_layout_hdr *lo;
  627. spin_lock(&ino->i_lock);
  628. lo = NFS_I(ino)->layout;
  629. lo->plh_block_lgets--;
  630. put_layout_hdr_locked(lo);
  631. spin_unlock(&ino->i_lock);
  632. }
  633. void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
  634. {
  635. struct pnfs_layout_hdr *lo;
  636. spin_lock(&ino->i_lock);
  637. lo = NFS_I(ino)->layout;
  638. if ((int)(barrier - lo->plh_barrier) > 0)
  639. lo->plh_barrier = barrier;
  640. spin_unlock(&ino->i_lock);
  641. }
  642. bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
  643. {
  644. struct nfs_inode *nfsi = NFS_I(ino);
  645. struct pnfs_layout_segment *lseg;
  646. bool found = false;
  647. spin_lock(&ino->i_lock);
  648. list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
  649. if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
  650. found = true;
  651. break;
  652. }
  653. if (!found) {
  654. struct pnfs_layout_hdr *lo = nfsi->layout;
  655. u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
  656. /* Since close does not return a layout stateid for use as
  657. * a barrier, we choose the worst-case barrier.
  658. */
  659. *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
  660. }
  661. spin_unlock(&ino->i_lock);
  662. return found;
  663. }
  664. /*
  665. * Compare two layout segments for sorting into layout cache.
  666. * We want to preferentially return RW over RO layouts, so ensure those
  667. * are seen first.
  668. */
  669. static s64
  670. cmp_layout(struct pnfs_layout_range *l1,
  671. struct pnfs_layout_range *l2)
  672. {
  673. s64 d;
  674. /* high offset > low offset */
  675. d = l1->offset - l2->offset;
  676. if (d)
  677. return d;
  678. /* short length > long length */
  679. d = l2->length - l1->length;
  680. if (d)
  681. return d;
  682. /* read > read/write */
  683. return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
  684. }
  685. static void
  686. pnfs_insert_layout(struct pnfs_layout_hdr *lo,
  687. struct pnfs_layout_segment *lseg)
  688. {
  689. struct pnfs_layout_segment *lp;
  690. dprintk("%s:Begin\n", __func__);
  691. assert_spin_locked(&lo->plh_inode->i_lock);
  692. list_for_each_entry(lp, &lo->plh_segs, pls_list) {
  693. if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)
  694. continue;
  695. list_add_tail(&lseg->pls_list, &lp->pls_list);
  696. dprintk("%s: inserted lseg %p "
  697. "iomode %d offset %llu length %llu before "
  698. "lp %p iomode %d offset %llu length %llu\n",
  699. __func__, lseg, lseg->pls_range.iomode,
  700. lseg->pls_range.offset, lseg->pls_range.length,
  701. lp, lp->pls_range.iomode, lp->pls_range.offset,
  702. lp->pls_range.length);
  703. goto out;
  704. }
  705. list_add_tail(&lseg->pls_list, &lo->plh_segs);
  706. dprintk("%s: inserted lseg %p "
  707. "iomode %d offset %llu length %llu at tail\n",
  708. __func__, lseg, lseg->pls_range.iomode,
  709. lseg->pls_range.offset, lseg->pls_range.length);
  710. out:
  711. get_layout_hdr(lo);
  712. dprintk("%s:Return\n", __func__);
  713. }
  714. static struct pnfs_layout_hdr *
  715. alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
  716. {
  717. struct pnfs_layout_hdr *lo;
  718. lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
  719. if (!lo)
  720. return NULL;
  721. atomic_set(&lo->plh_refcount, 1);
  722. INIT_LIST_HEAD(&lo->plh_layouts);
  723. INIT_LIST_HEAD(&lo->plh_segs);
  724. INIT_LIST_HEAD(&lo->plh_bulk_recall);
  725. lo->plh_inode = ino;
  726. return lo;
  727. }
  728. static struct pnfs_layout_hdr *
  729. pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
  730. {
  731. struct nfs_inode *nfsi = NFS_I(ino);
  732. struct pnfs_layout_hdr *new = NULL;
  733. dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
  734. assert_spin_locked(&ino->i_lock);
  735. if (nfsi->layout) {
  736. if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
  737. return NULL;
  738. else
  739. return nfsi->layout;
  740. }
  741. spin_unlock(&ino->i_lock);
  742. new = alloc_init_layout_hdr(ino, gfp_flags);
  743. spin_lock(&ino->i_lock);
  744. if (likely(nfsi->layout == NULL)) /* Won the race? */
  745. nfsi->layout = new;
  746. else
  747. pnfs_free_layout_hdr(new);
  748. return nfsi->layout;
  749. }
  750. /*
  751. * iomode matching rules:
  752. * iomode lseg match
  753. * ----- ----- -----
  754. * ANY READ true
  755. * ANY RW true
  756. * RW READ false
  757. * RW RW true
  758. * READ READ true
  759. * READ RW true
  760. */
  761. static int
  762. is_matching_lseg(struct pnfs_layout_range *ls_range,
  763. struct pnfs_layout_range *range)
  764. {
  765. struct pnfs_layout_range range1;
  766. if ((range->iomode == IOMODE_RW &&
  767. ls_range->iomode != IOMODE_RW) ||
  768. !lo_seg_intersecting(ls_range, range))
  769. return 0;
  770. /* range1 covers only the first byte in the range */
  771. range1 = *range;
  772. range1.length = 1;
  773. return lo_seg_contained(ls_range, &range1);
  774. }
  775. /*
  776. * lookup range in layout
  777. */
  778. static struct pnfs_layout_segment *
  779. pnfs_find_lseg(struct pnfs_layout_hdr *lo,
  780. struct pnfs_layout_range *range)
  781. {
  782. struct pnfs_layout_segment *lseg, *ret = NULL;
  783. dprintk("%s:Begin\n", __func__);
  784. assert_spin_locked(&lo->plh_inode->i_lock);
  785. list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
  786. if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
  787. is_matching_lseg(&lseg->pls_range, range)) {
  788. ret = get_lseg(lseg);
  789. break;
  790. }
  791. if (lseg->pls_range.offset > range->offset)
  792. break;
  793. }
  794. dprintk("%s:Return lseg %p ref %d\n",
  795. __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
  796. return ret;
  797. }
  798. /*
  799. * Layout segment is retreived from the server if not cached.
  800. * The appropriate layout segment is referenced and returned to the caller.
  801. */
  802. struct pnfs_layout_segment *
  803. pnfs_update_layout(struct inode *ino,
  804. struct nfs_open_context *ctx,
  805. loff_t pos,
  806. u64 count,
  807. enum pnfs_iomode iomode,
  808. gfp_t gfp_flags)
  809. {
  810. struct pnfs_layout_range arg = {
  811. .iomode = iomode,
  812. .offset = pos,
  813. .length = count,
  814. };
  815. unsigned pg_offset;
  816. struct nfs_inode *nfsi = NFS_I(ino);
  817. struct nfs_server *server = NFS_SERVER(ino);
  818. struct nfs_client *clp = server->nfs_client;
  819. struct pnfs_layout_hdr *lo;
  820. struct pnfs_layout_segment *lseg = NULL;
  821. bool first = false;
  822. if (!pnfs_enabled_sb(NFS_SERVER(ino)))
  823. return NULL;
  824. spin_lock(&ino->i_lock);
  825. lo = pnfs_find_alloc_layout(ino, gfp_flags);
  826. if (lo == NULL) {
  827. dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
  828. goto out_unlock;
  829. }
  830. /* Do we even need to bother with this? */
  831. if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
  832. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
  833. dprintk("%s matches recall, use MDS\n", __func__);
  834. goto out_unlock;
  835. }
  836. /* if LAYOUTGET already failed once we don't try again */
  837. if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
  838. goto out_unlock;
  839. /* Check to see if the layout for the given range already exists */
  840. lseg = pnfs_find_lseg(lo, &arg);
  841. if (lseg)
  842. goto out_unlock;
  843. if (pnfs_layoutgets_blocked(lo, NULL, 0))
  844. goto out_unlock;
  845. atomic_inc(&lo->plh_outstanding);
  846. get_layout_hdr(lo);
  847. if (list_empty(&lo->plh_segs))
  848. first = true;
  849. spin_unlock(&ino->i_lock);
  850. if (first) {
  851. /* The lo must be on the clp list if there is any
  852. * chance of a CB_LAYOUTRECALL(FILE) coming in.
  853. */
  854. spin_lock(&clp->cl_lock);
  855. BUG_ON(!list_empty(&lo->plh_layouts));
  856. list_add_tail(&lo->plh_layouts, &server->layouts);
  857. spin_unlock(&clp->cl_lock);
  858. }
  859. pg_offset = arg.offset & ~PAGE_CACHE_MASK;
  860. if (pg_offset) {
  861. arg.offset -= pg_offset;
  862. arg.length += pg_offset;
  863. }
  864. if (arg.length != NFS4_MAX_UINT64)
  865. arg.length = PAGE_CACHE_ALIGN(arg.length);
  866. lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
  867. if (!lseg && first) {
  868. spin_lock(&clp->cl_lock);
  869. list_del_init(&lo->plh_layouts);
  870. spin_unlock(&clp->cl_lock);
  871. }
  872. atomic_dec(&lo->plh_outstanding);
  873. put_layout_hdr(lo);
  874. out:
  875. dprintk("%s end, state 0x%lx lseg %p\n", __func__,
  876. nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
  877. return lseg;
  878. out_unlock:
  879. spin_unlock(&ino->i_lock);
  880. goto out;
  881. }
  882. EXPORT_SYMBOL_GPL(pnfs_update_layout);
  883. int
  884. pnfs_layout_process(struct nfs4_layoutget *lgp)
  885. {
  886. struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
  887. struct nfs4_layoutget_res *res = &lgp->res;
  888. struct pnfs_layout_segment *lseg;
  889. struct inode *ino = lo->plh_inode;
  890. struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
  891. int status = 0;
  892. /* Inject layout blob into I/O device driver */
  893. lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
  894. if (!lseg || IS_ERR(lseg)) {
  895. if (!lseg)
  896. status = -ENOMEM;
  897. else
  898. status = PTR_ERR(lseg);
  899. dprintk("%s: Could not allocate layout: error %d\n",
  900. __func__, status);
  901. goto out;
  902. }
  903. spin_lock(&ino->i_lock);
  904. if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
  905. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
  906. dprintk("%s forget reply due to recall\n", __func__);
  907. goto out_forget_reply;
  908. }
  909. if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
  910. dprintk("%s forget reply due to state\n", __func__);
  911. goto out_forget_reply;
  912. }
  913. init_lseg(lo, lseg);
  914. lseg->pls_range = res->range;
  915. *lgp->lsegpp = get_lseg(lseg);
  916. pnfs_insert_layout(lo, lseg);
  917. if (res->return_on_close) {
  918. set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
  919. set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
  920. }
  921. /* Done processing layoutget. Set the layout stateid */
  922. pnfs_set_layout_stateid(lo, &res->stateid, false);
  923. spin_unlock(&ino->i_lock);
  924. out:
  925. return status;
  926. out_forget_reply:
  927. spin_unlock(&ino->i_lock);
  928. lseg->pls_layout = lo;
  929. NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
  930. goto out;
  931. }
  932. void
  933. pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
  934. {
  935. BUG_ON(pgio->pg_lseg != NULL);
  936. pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
  937. req->wb_context,
  938. req_offset(req),
  939. req->wb_bytes,
  940. IOMODE_READ,
  941. GFP_KERNEL);
  942. /* If no lseg, fall back to read through mds */
  943. if (pgio->pg_lseg == NULL)
  944. nfs_pageio_reset_read_mds(pgio);
  945. }
  946. EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
  947. void
  948. pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
  949. {
  950. BUG_ON(pgio->pg_lseg != NULL);
  951. pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
  952. req->wb_context,
  953. req_offset(req),
  954. req->wb_bytes,
  955. IOMODE_RW,
  956. GFP_NOFS);
  957. /* If no lseg, fall back to write through mds */
  958. if (pgio->pg_lseg == NULL)
  959. nfs_pageio_reset_write_mds(pgio);
  960. }
  961. EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
  962. bool
  963. pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
  964. {
  965. struct nfs_server *server = NFS_SERVER(inode);
  966. struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
  967. if (ld == NULL)
  968. return false;
  969. nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
  970. return true;
  971. }
  972. bool
  973. pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
  974. {
  975. struct nfs_server *server = NFS_SERVER(inode);
  976. struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
  977. if (ld == NULL)
  978. return false;
  979. nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
  980. return true;
  981. }
  982. bool
  983. pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
  984. struct nfs_page *req)
  985. {
  986. if (pgio->pg_lseg == NULL)
  987. return nfs_generic_pg_test(pgio, prev, req);
  988. /*
  989. * Test if a nfs_page is fully contained in the pnfs_layout_range.
  990. * Note that this test makes several assumptions:
  991. * - that the previous nfs_page in the struct nfs_pageio_descriptor
  992. * is known to lie within the range.
  993. * - that the nfs_page being tested is known to be contiguous with the
  994. * previous nfs_page.
  995. * - Layout ranges are page aligned, so we only have to test the
  996. * start offset of the request.
  997. *
  998. * Please also note that 'end_offset' is actually the offset of the
  999. * first byte that lies outside the pnfs_layout_range. FIXME?
  1000. *
  1001. */
  1002. return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
  1003. pgio->pg_lseg->pls_range.length);
  1004. }
  1005. EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
  1006. /*
  1007. * Called by non rpc-based layout drivers
  1008. */
  1009. int
  1010. pnfs_ld_write_done(struct nfs_write_data *data)
  1011. {
  1012. int status;
  1013. if (!data->pnfs_error) {
  1014. pnfs_set_layoutcommit(data);
  1015. data->mds_ops->rpc_call_done(&data->task, data);
  1016. data->mds_ops->rpc_release(data);
  1017. return 0;
  1018. }
  1019. dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
  1020. data->pnfs_error);
  1021. status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
  1022. data->mds_ops, NFS_FILE_SYNC);
  1023. return status ? : -EAGAIN;
  1024. }
  1025. EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
  1026. static void
  1027. pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
  1028. struct nfs_write_data *data)
  1029. {
  1030. list_splice_tail_init(&data->pages, &desc->pg_list);
  1031. if (data->req && list_empty(&data->req->wb_list))
  1032. nfs_list_add_request(data->req, &desc->pg_list);
  1033. nfs_pageio_reset_write_mds(desc);
  1034. desc->pg_recoalesce = 1;
  1035. nfs_writedata_release(data);
  1036. }
  1037. static enum pnfs_try_status
  1038. pnfs_try_to_write_data(struct nfs_write_data *wdata,
  1039. const struct rpc_call_ops *call_ops,
  1040. struct pnfs_layout_segment *lseg,
  1041. int how)
  1042. {
  1043. struct inode *inode = wdata->inode;
  1044. enum pnfs_try_status trypnfs;
  1045. struct nfs_server *nfss = NFS_SERVER(inode);
  1046. wdata->mds_ops = call_ops;
  1047. wdata->lseg = get_lseg(lseg);
  1048. dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
  1049. inode->i_ino, wdata->args.count, wdata->args.offset, how);
  1050. trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
  1051. if (trypnfs == PNFS_NOT_ATTEMPTED) {
  1052. put_lseg(wdata->lseg);
  1053. wdata->lseg = NULL;
  1054. } else
  1055. nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
  1056. dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
  1057. return trypnfs;
  1058. }
  1059. static void
  1060. pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
  1061. {
  1062. struct nfs_write_data *data;
  1063. const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
  1064. struct pnfs_layout_segment *lseg = desc->pg_lseg;
  1065. desc->pg_lseg = NULL;
  1066. while (!list_empty(head)) {
  1067. enum pnfs_try_status trypnfs;
  1068. data = list_entry(head->next, struct nfs_write_data, list);
  1069. list_del_init(&data->list);
  1070. trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
  1071. if (trypnfs == PNFS_NOT_ATTEMPTED)
  1072. pnfs_write_through_mds(desc, data);
  1073. }
  1074. put_lseg(lseg);
  1075. }
  1076. int
  1077. pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
  1078. {
  1079. LIST_HEAD(head);
  1080. int ret;
  1081. ret = nfs_generic_flush(desc, &head);
  1082. if (ret != 0) {
  1083. put_lseg(desc->pg_lseg);
  1084. desc->pg_lseg = NULL;
  1085. return ret;
  1086. }
  1087. pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
  1088. return 0;
  1089. }
  1090. EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
  1091. /*
  1092. * Called by non rpc-based layout drivers
  1093. */
  1094. int
  1095. pnfs_ld_read_done(struct nfs_read_data *data)
  1096. {
  1097. int status;
  1098. if (!data->pnfs_error) {
  1099. __nfs4_read_done_cb(data);
  1100. data->mds_ops->rpc_call_done(&data->task, data);
  1101. data->mds_ops->rpc_release(data);
  1102. return 0;
  1103. }
  1104. dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
  1105. data->pnfs_error);
  1106. status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
  1107. data->mds_ops);
  1108. return status ? : -EAGAIN;
  1109. }
  1110. EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
  1111. static void
  1112. pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
  1113. struct nfs_read_data *data)
  1114. {
  1115. list_splice_tail_init(&data->pages, &desc->pg_list);
  1116. if (data->req && list_empty(&data->req->wb_list))
  1117. nfs_list_add_request(data->req, &desc->pg_list);
  1118. nfs_pageio_reset_read_mds(desc);
  1119. desc->pg_recoalesce = 1;
  1120. nfs_readdata_release(data);
  1121. }
  1122. /*
  1123. * Call the appropriate parallel I/O subsystem read function.
  1124. */
  1125. static enum pnfs_try_status
  1126. pnfs_try_to_read_data(struct nfs_read_data *rdata,
  1127. const struct rpc_call_ops *call_ops,
  1128. struct pnfs_layout_segment *lseg)
  1129. {
  1130. struct inode *inode = rdata->inode;
  1131. struct nfs_server *nfss = NFS_SERVER(inode);
  1132. enum pnfs_try_status trypnfs;
  1133. rdata->mds_ops = call_ops;
  1134. rdata->lseg = get_lseg(lseg);
  1135. dprintk("%s: Reading ino:%lu %u@%llu\n",
  1136. __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
  1137. trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
  1138. if (trypnfs == PNFS_NOT_ATTEMPTED) {
  1139. put_lseg(rdata->lseg);
  1140. rdata->lseg = NULL;
  1141. } else {
  1142. nfs_inc_stats(inode, NFSIOS_PNFS_READ);
  1143. }
  1144. dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
  1145. return trypnfs;
  1146. }
  1147. static void
  1148. pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
  1149. {
  1150. struct nfs_read_data *data;
  1151. const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
  1152. struct pnfs_layout_segment *lseg = desc->pg_lseg;
  1153. desc->pg_lseg = NULL;
  1154. while (!list_empty(head)) {
  1155. enum pnfs_try_status trypnfs;
  1156. data = list_entry(head->next, struct nfs_read_data, list);
  1157. list_del_init(&data->list);
  1158. trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
  1159. if (trypnfs == PNFS_NOT_ATTEMPTED)
  1160. pnfs_read_through_mds(desc, data);
  1161. }
  1162. put_lseg(lseg);
  1163. }
  1164. int
  1165. pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
  1166. {
  1167. LIST_HEAD(head);
  1168. int ret;
  1169. ret = nfs_generic_pagein(desc, &head);
  1170. if (ret != 0) {
  1171. put_lseg(desc->pg_lseg);
  1172. desc->pg_lseg = NULL;
  1173. return ret;
  1174. }
  1175. pnfs_do_multiple_reads(desc, &head);
  1176. return 0;
  1177. }
  1178. EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
  1179. /*
  1180. * Currently there is only one (whole file) write lseg.
  1181. */
  1182. static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
  1183. {
  1184. struct pnfs_layout_segment *lseg, *rv = NULL;
  1185. list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
  1186. if (lseg->pls_range.iomode == IOMODE_RW)
  1187. rv = lseg;
  1188. return rv;
  1189. }
  1190. void
  1191. pnfs_set_layoutcommit(struct nfs_write_data *wdata)
  1192. {
  1193. struct nfs_inode *nfsi = NFS_I(wdata->inode);
  1194. loff_t end_pos = wdata->mds_offset + wdata->res.count;
  1195. bool mark_as_dirty = false;
  1196. spin_lock(&nfsi->vfs_inode.i_lock);
  1197. if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
  1198. /* references matched in nfs4_layoutcommit_release */
  1199. get_lseg(wdata->lseg);
  1200. wdata->lseg->pls_lc_cred =
  1201. get_rpccred(wdata->args.context->state->owner->so_cred);
  1202. mark_as_dirty = true;
  1203. dprintk("%s: Set layoutcommit for inode %lu ",
  1204. __func__, wdata->inode->i_ino);
  1205. }
  1206. if (end_pos > wdata->lseg->pls_end_pos)
  1207. wdata->lseg->pls_end_pos = end_pos;
  1208. spin_unlock(&nfsi->vfs_inode.i_lock);
  1209. /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
  1210. * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
  1211. if (mark_as_dirty)
  1212. mark_inode_dirty_sync(wdata->inode);
  1213. }
  1214. EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
  1215. /*
  1216. * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
  1217. * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
  1218. * data to disk to allow the server to recover the data if it crashes.
  1219. * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
  1220. * is off, and a COMMIT is sent to a data server, or
  1221. * if WRITEs to a data server return NFS_DATA_SYNC.
  1222. */
  1223. int
  1224. pnfs_layoutcommit_inode(struct inode *inode, bool sync)
  1225. {
  1226. struct nfs4_layoutcommit_data *data;
  1227. struct nfs_inode *nfsi = NFS_I(inode);
  1228. struct pnfs_layout_segment *lseg;
  1229. struct rpc_cred *cred;
  1230. loff_t end_pos;
  1231. int status = 0;
  1232. dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
  1233. if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
  1234. return 0;
  1235. /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
  1236. data = kzalloc(sizeof(*data), GFP_NOFS);
  1237. if (!data) {
  1238. mark_inode_dirty_sync(inode);
  1239. status = -ENOMEM;
  1240. goto out;
  1241. }
  1242. spin_lock(&inode->i_lock);
  1243. if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
  1244. spin_unlock(&inode->i_lock);
  1245. kfree(data);
  1246. goto out;
  1247. }
  1248. /*
  1249. * Currently only one (whole file) write lseg which is referenced
  1250. * in pnfs_set_layoutcommit and will be found.
  1251. */
  1252. lseg = pnfs_list_write_lseg(inode);
  1253. end_pos = lseg->pls_end_pos;
  1254. cred = lseg->pls_lc_cred;
  1255. lseg->pls_end_pos = 0;
  1256. lseg->pls_lc_cred = NULL;
  1257. memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data,
  1258. sizeof(nfsi->layout->plh_stateid.data));
  1259. spin_unlock(&inode->i_lock);
  1260. data->args.inode = inode;
  1261. data->lseg = lseg;
  1262. data->cred = cred;
  1263. nfs_fattr_init(&data->fattr);
  1264. data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
  1265. data->res.fattr = &data->fattr;
  1266. data->args.lastbytewritten = end_pos - 1;
  1267. data->res.server = NFS_SERVER(inode);
  1268. status = nfs4_proc_layoutcommit(data, sync);
  1269. out:
  1270. dprintk("<-- %s status %d\n", __func__, status);
  1271. return status;
  1272. }