pnfs.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015
  1. /*
  2. * pNFS functions to call and manage layout drivers.
  3. *
  4. * Copyright (c) 2002 [year of first publication]
  5. * The Regents of the University of Michigan
  6. * All Rights Reserved
  7. *
  8. * Dean Hildebrand <dhildebz@umich.edu>
  9. *
  10. * Permission is granted to use, copy, create derivative works, and
  11. * redistribute this software and such derivative works for any purpose,
  12. * so long as the name of the University of Michigan is not used in
  13. * any advertising or publicity pertaining to the use or distribution
  14. * of this software without specific, written prior authorization. If
  15. * the above copyright notice or any other identification of the
  16. * University of Michigan is included in any copy of any portion of
  17. * this software, then the disclaimer below must also be included.
  18. *
  19. * This software is provided as is, without representation or warranty
  20. * of any kind either express or implied, including without limitation
  21. * the implied warranties of merchantability, fitness for a particular
  22. * purpose, or noninfringement. The Regents of the University of
  23. * Michigan shall not be liable for any damages, including special,
  24. * indirect, incidental, or consequential damages, with respect to any
  25. * claim arising out of or in connection with the use of the software,
  26. * even if it has been or is hereafter advised of the possibility of
  27. * such damages.
  28. */
  29. #include <linux/nfs_fs.h>
  30. #include "internal.h"
  31. #include "pnfs.h"
  32. #define NFSDBG_FACILITY NFSDBG_PNFS
  33. /* Locking:
  34. *
  35. * pnfs_spinlock:
  36. * protects pnfs_modules_tbl.
  37. */
  38. static DEFINE_SPINLOCK(pnfs_spinlock);
  39. /*
  40. * pnfs_modules_tbl holds all pnfs modules
  41. */
  42. static LIST_HEAD(pnfs_modules_tbl);
  43. /* Return the registered pnfs layout driver module matching given id */
  44. static struct pnfs_layoutdriver_type *
  45. find_pnfs_driver_locked(u32 id)
  46. {
  47. struct pnfs_layoutdriver_type *local;
  48. list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
  49. if (local->id == id)
  50. goto out;
  51. local = NULL;
  52. out:
  53. dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
  54. return local;
  55. }
  56. static struct pnfs_layoutdriver_type *
  57. find_pnfs_driver(u32 id)
  58. {
  59. struct pnfs_layoutdriver_type *local;
  60. spin_lock(&pnfs_spinlock);
  61. local = find_pnfs_driver_locked(id);
  62. spin_unlock(&pnfs_spinlock);
  63. return local;
  64. }
  65. void
  66. unset_pnfs_layoutdriver(struct nfs_server *nfss)
  67. {
  68. if (nfss->pnfs_curr_ld) {
  69. nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
  70. module_put(nfss->pnfs_curr_ld->owner);
  71. }
  72. nfss->pnfs_curr_ld = NULL;
  73. }
  74. /*
  75. * Try to set the server's pnfs module to the pnfs layout type specified by id.
  76. * Currently only one pNFS layout driver per filesystem is supported.
  77. *
  78. * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
  79. */
  80. void
  81. set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
  82. {
  83. struct pnfs_layoutdriver_type *ld_type = NULL;
  84. if (id == 0)
  85. goto out_no_driver;
  86. if (!(server->nfs_client->cl_exchange_flags &
  87. (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
  88. printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
  89. id, server->nfs_client->cl_exchange_flags);
  90. goto out_no_driver;
  91. }
  92. ld_type = find_pnfs_driver(id);
  93. if (!ld_type) {
  94. request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
  95. ld_type = find_pnfs_driver(id);
  96. if (!ld_type) {
  97. dprintk("%s: No pNFS module found for %u.\n",
  98. __func__, id);
  99. goto out_no_driver;
  100. }
  101. }
  102. if (!try_module_get(ld_type->owner)) {
  103. dprintk("%s: Could not grab reference on module\n", __func__);
  104. goto out_no_driver;
  105. }
  106. server->pnfs_curr_ld = ld_type;
  107. if (ld_type->set_layoutdriver(server)) {
  108. printk(KERN_ERR
  109. "%s: Error initializing mount point for layout driver %u.\n",
  110. __func__, id);
  111. module_put(ld_type->owner);
  112. goto out_no_driver;
  113. }
  114. dprintk("%s: pNFS module for %u set\n", __func__, id);
  115. return;
  116. out_no_driver:
  117. dprintk("%s: Using NFSv4 I/O\n", __func__);
  118. server->pnfs_curr_ld = NULL;
  119. }
  120. int
  121. pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
  122. {
  123. int status = -EINVAL;
  124. struct pnfs_layoutdriver_type *tmp;
  125. if (ld_type->id == 0) {
  126. printk(KERN_ERR "%s id 0 is reserved\n", __func__);
  127. return status;
  128. }
  129. if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
  130. printk(KERN_ERR "%s Layout driver must provide "
  131. "alloc_lseg and free_lseg.\n", __func__);
  132. return status;
  133. }
  134. spin_lock(&pnfs_spinlock);
  135. tmp = find_pnfs_driver_locked(ld_type->id);
  136. if (!tmp) {
  137. list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
  138. status = 0;
  139. dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
  140. ld_type->name);
  141. } else {
  142. printk(KERN_ERR "%s Module with id %d already loaded!\n",
  143. __func__, ld_type->id);
  144. }
  145. spin_unlock(&pnfs_spinlock);
  146. return status;
  147. }
  148. EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
  149. void
  150. pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
  151. {
  152. dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
  153. spin_lock(&pnfs_spinlock);
  154. list_del(&ld_type->pnfs_tblid);
  155. spin_unlock(&pnfs_spinlock);
  156. }
  157. EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
  158. /*
  159. * pNFS client layout cache
  160. */
  161. /* Need to hold i_lock if caller does not already hold reference */
  162. void
  163. get_layout_hdr(struct pnfs_layout_hdr *lo)
  164. {
  165. atomic_inc(&lo->plh_refcount);
  166. }
  167. static void
  168. destroy_layout_hdr(struct pnfs_layout_hdr *lo)
  169. {
  170. dprintk("%s: freeing layout cache %p\n", __func__, lo);
  171. BUG_ON(!list_empty(&lo->plh_layouts));
  172. NFS_I(lo->plh_inode)->layout = NULL;
  173. kfree(lo);
  174. }
  175. static void
  176. put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
  177. {
  178. if (atomic_dec_and_test(&lo->plh_refcount))
  179. destroy_layout_hdr(lo);
  180. }
  181. void
  182. put_layout_hdr(struct pnfs_layout_hdr *lo)
  183. {
  184. struct inode *inode = lo->plh_inode;
  185. if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
  186. destroy_layout_hdr(lo);
  187. spin_unlock(&inode->i_lock);
  188. }
  189. }
  190. static void
  191. init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
  192. {
  193. INIT_LIST_HEAD(&lseg->pls_list);
  194. atomic_set(&lseg->pls_refcount, 1);
  195. smp_mb();
  196. set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
  197. lseg->pls_layout = lo;
  198. }
  199. static void free_lseg(struct pnfs_layout_segment *lseg)
  200. {
  201. struct inode *ino = lseg->pls_layout->plh_inode;
  202. NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
  203. /* Matched by get_layout_hdr in pnfs_insert_layout */
  204. put_layout_hdr(NFS_I(ino)->layout);
  205. }
  206. static void
  207. put_lseg_common(struct pnfs_layout_segment *lseg)
  208. {
  209. struct inode *inode = lseg->pls_layout->plh_inode;
  210. BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
  211. list_del_init(&lseg->pls_list);
  212. if (list_empty(&lseg->pls_layout->plh_segs)) {
  213. set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
  214. /* Matched by initial refcount set in alloc_init_layout_hdr */
  215. put_layout_hdr_locked(lseg->pls_layout);
  216. }
  217. rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
  218. }
  219. void
  220. put_lseg(struct pnfs_layout_segment *lseg)
  221. {
  222. struct inode *inode;
  223. if (!lseg)
  224. return;
  225. dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
  226. atomic_read(&lseg->pls_refcount),
  227. test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
  228. inode = lseg->pls_layout->plh_inode;
  229. if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
  230. LIST_HEAD(free_me);
  231. put_lseg_common(lseg);
  232. list_add(&lseg->pls_list, &free_me);
  233. spin_unlock(&inode->i_lock);
  234. pnfs_free_lseg_list(&free_me);
  235. }
  236. }
  237. static bool
  238. should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
  239. {
  240. return (recall_iomode == IOMODE_ANY ||
  241. lseg_iomode == recall_iomode);
  242. }
  243. /* Returns 1 if lseg is removed from list, 0 otherwise */
  244. static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
  245. struct list_head *tmp_list)
  246. {
  247. int rv = 0;
  248. if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
  249. /* Remove the reference keeping the lseg in the
  250. * list. It will now be removed when all
  251. * outstanding io is finished.
  252. */
  253. dprintk("%s: lseg %p ref %d\n", __func__, lseg,
  254. atomic_read(&lseg->pls_refcount));
  255. if (atomic_dec_and_test(&lseg->pls_refcount)) {
  256. put_lseg_common(lseg);
  257. list_add(&lseg->pls_list, tmp_list);
  258. rv = 1;
  259. }
  260. }
  261. return rv;
  262. }
  263. /* Returns count of number of matching invalid lsegs remaining in list
  264. * after call.
  265. */
  266. int
  267. mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
  268. struct list_head *tmp_list,
  269. u32 iomode)
  270. {
  271. struct pnfs_layout_segment *lseg, *next;
  272. int invalid = 0, removed = 0;
  273. dprintk("%s:Begin lo %p\n", __func__, lo);
  274. if (list_empty(&lo->plh_segs)) {
  275. if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
  276. put_layout_hdr_locked(lo);
  277. return 0;
  278. }
  279. list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
  280. if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
  281. dprintk("%s: freeing lseg %p iomode %d "
  282. "offset %llu length %llu\n", __func__,
  283. lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
  284. lseg->pls_range.length);
  285. invalid++;
  286. removed += mark_lseg_invalid(lseg, tmp_list);
  287. }
  288. dprintk("%s:Return %i\n", __func__, invalid - removed);
  289. return invalid - removed;
  290. }
  291. /* note free_me must contain lsegs from a single layout_hdr */
  292. void
  293. pnfs_free_lseg_list(struct list_head *free_me)
  294. {
  295. struct pnfs_layout_segment *lseg, *tmp;
  296. struct pnfs_layout_hdr *lo;
  297. if (list_empty(free_me))
  298. return;
  299. lo = list_first_entry(free_me, struct pnfs_layout_segment,
  300. pls_list)->pls_layout;
  301. if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
  302. struct nfs_client *clp;
  303. clp = NFS_SERVER(lo->plh_inode)->nfs_client;
  304. spin_lock(&clp->cl_lock);
  305. list_del_init(&lo->plh_layouts);
  306. spin_unlock(&clp->cl_lock);
  307. }
  308. list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
  309. list_del(&lseg->pls_list);
  310. free_lseg(lseg);
  311. }
  312. }
  313. void
  314. pnfs_destroy_layout(struct nfs_inode *nfsi)
  315. {
  316. struct pnfs_layout_hdr *lo;
  317. LIST_HEAD(tmp_list);
  318. spin_lock(&nfsi->vfs_inode.i_lock);
  319. lo = nfsi->layout;
  320. if (lo) {
  321. lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
  322. mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
  323. }
  324. spin_unlock(&nfsi->vfs_inode.i_lock);
  325. pnfs_free_lseg_list(&tmp_list);
  326. }
  327. /*
  328. * Called by the state manger to remove all layouts established under an
  329. * expired lease.
  330. */
  331. void
  332. pnfs_destroy_all_layouts(struct nfs_client *clp)
  333. {
  334. struct pnfs_layout_hdr *lo;
  335. LIST_HEAD(tmp_list);
  336. spin_lock(&clp->cl_lock);
  337. list_splice_init(&clp->cl_layouts, &tmp_list);
  338. spin_unlock(&clp->cl_lock);
  339. while (!list_empty(&tmp_list)) {
  340. lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
  341. plh_layouts);
  342. dprintk("%s freeing layout for inode %lu\n", __func__,
  343. lo->plh_inode->i_ino);
  344. pnfs_destroy_layout(NFS_I(lo->plh_inode));
  345. }
  346. }
  347. /* update lo->plh_stateid with new if is more recent */
  348. void
  349. pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
  350. bool update_barrier)
  351. {
  352. u32 oldseq, newseq;
  353. oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
  354. newseq = be32_to_cpu(new->stateid.seqid);
  355. if ((int)(newseq - oldseq) > 0) {
  356. memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
  357. if (update_barrier) {
  358. u32 new_barrier = be32_to_cpu(new->stateid.seqid);
  359. if ((int)(new_barrier - lo->plh_barrier))
  360. lo->plh_barrier = new_barrier;
  361. } else {
  362. /* Because of wraparound, we want to keep the barrier
  363. * "close" to the current seqids. It needs to be
  364. * within 2**31 to count as "behind", so if it
  365. * gets too near that limit, give us a litle leeway
  366. * and bring it to within 2**30.
  367. * NOTE - and yes, this is all unsigned arithmetic.
  368. */
  369. if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
  370. lo->plh_barrier = newseq - (1 << 30);
  371. }
  372. }
  373. }
  374. /* lget is set to 1 if called from inside send_layoutget call chain */
  375. static bool
  376. pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
  377. int lget)
  378. {
  379. if ((stateid) &&
  380. (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
  381. return true;
  382. return lo->plh_block_lgets ||
  383. test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
  384. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
  385. (list_empty(&lo->plh_segs) &&
  386. (atomic_read(&lo->plh_outstanding) > lget));
  387. }
  388. int
  389. pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
  390. struct nfs4_state *open_state)
  391. {
  392. int status = 0;
  393. dprintk("--> %s\n", __func__);
  394. spin_lock(&lo->plh_inode->i_lock);
  395. if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
  396. status = -EAGAIN;
  397. } else if (list_empty(&lo->plh_segs)) {
  398. int seq;
  399. do {
  400. seq = read_seqbegin(&open_state->seqlock);
  401. memcpy(dst->data, open_state->stateid.data,
  402. sizeof(open_state->stateid.data));
  403. } while (read_seqretry(&open_state->seqlock, seq));
  404. } else
  405. memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
  406. spin_unlock(&lo->plh_inode->i_lock);
  407. dprintk("<-- %s\n", __func__);
  408. return status;
  409. }
  410. /*
  411. * Get layout from server.
  412. * for now, assume that whole file layouts are requested.
  413. * arg->offset: 0
  414. * arg->length: all ones
  415. */
  416. static struct pnfs_layout_segment *
  417. send_layoutget(struct pnfs_layout_hdr *lo,
  418. struct nfs_open_context *ctx,
  419. u32 iomode)
  420. {
  421. struct inode *ino = lo->plh_inode;
  422. struct nfs_server *server = NFS_SERVER(ino);
  423. struct nfs4_layoutget *lgp;
  424. struct pnfs_layout_segment *lseg = NULL;
  425. dprintk("--> %s\n", __func__);
  426. BUG_ON(ctx == NULL);
  427. lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
  428. if (lgp == NULL)
  429. return NULL;
  430. lgp->args.minlength = NFS4_MAX_UINT64;
  431. lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
  432. lgp->args.range.iomode = iomode;
  433. lgp->args.range.offset = 0;
  434. lgp->args.range.length = NFS4_MAX_UINT64;
  435. lgp->args.type = server->pnfs_curr_ld->id;
  436. lgp->args.inode = ino;
  437. lgp->args.ctx = get_nfs_open_context(ctx);
  438. lgp->lsegpp = &lseg;
  439. /* Synchronously retrieve layout information from server and
  440. * store in lseg.
  441. */
  442. nfs4_proc_layoutget(lgp);
  443. if (!lseg) {
  444. /* remember that LAYOUTGET failed and suspend trying */
  445. set_bit(lo_fail_bit(iomode), &lo->plh_flags);
  446. }
  447. return lseg;
  448. }
  449. bool pnfs_roc(struct inode *ino)
  450. {
  451. struct pnfs_layout_hdr *lo;
  452. struct pnfs_layout_segment *lseg, *tmp;
  453. LIST_HEAD(tmp_list);
  454. bool found = false;
  455. spin_lock(&ino->i_lock);
  456. lo = NFS_I(ino)->layout;
  457. if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
  458. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
  459. goto out_nolayout;
  460. list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
  461. if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
  462. mark_lseg_invalid(lseg, &tmp_list);
  463. found = true;
  464. }
  465. if (!found)
  466. goto out_nolayout;
  467. lo->plh_block_lgets++;
  468. get_layout_hdr(lo); /* matched in pnfs_roc_release */
  469. spin_unlock(&ino->i_lock);
  470. pnfs_free_lseg_list(&tmp_list);
  471. return true;
  472. out_nolayout:
  473. spin_unlock(&ino->i_lock);
  474. return false;
  475. }
  476. void pnfs_roc_release(struct inode *ino)
  477. {
  478. struct pnfs_layout_hdr *lo;
  479. spin_lock(&ino->i_lock);
  480. lo = NFS_I(ino)->layout;
  481. lo->plh_block_lgets--;
  482. put_layout_hdr_locked(lo);
  483. spin_unlock(&ino->i_lock);
  484. }
  485. void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
  486. {
  487. struct pnfs_layout_hdr *lo;
  488. spin_lock(&ino->i_lock);
  489. lo = NFS_I(ino)->layout;
  490. if ((int)(barrier - lo->plh_barrier) > 0)
  491. lo->plh_barrier = barrier;
  492. spin_unlock(&ino->i_lock);
  493. }
  494. bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
  495. {
  496. struct nfs_inode *nfsi = NFS_I(ino);
  497. struct pnfs_layout_segment *lseg;
  498. bool found = false;
  499. spin_lock(&ino->i_lock);
  500. list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
  501. if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
  502. found = true;
  503. break;
  504. }
  505. if (!found) {
  506. struct pnfs_layout_hdr *lo = nfsi->layout;
  507. u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
  508. /* Since close does not return a layout stateid for use as
  509. * a barrier, we choose the worst-case barrier.
  510. */
  511. *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
  512. }
  513. spin_unlock(&ino->i_lock);
  514. return found;
  515. }
  516. /*
  517. * Compare two layout segments for sorting into layout cache.
  518. * We want to preferentially return RW over RO layouts, so ensure those
  519. * are seen first.
  520. */
  521. static s64
  522. cmp_layout(u32 iomode1, u32 iomode2)
  523. {
  524. /* read > read/write */
  525. return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
  526. }
  527. static void
  528. pnfs_insert_layout(struct pnfs_layout_hdr *lo,
  529. struct pnfs_layout_segment *lseg)
  530. {
  531. struct pnfs_layout_segment *lp;
  532. int found = 0;
  533. dprintk("%s:Begin\n", __func__);
  534. assert_spin_locked(&lo->plh_inode->i_lock);
  535. list_for_each_entry(lp, &lo->plh_segs, pls_list) {
  536. if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
  537. continue;
  538. list_add_tail(&lseg->pls_list, &lp->pls_list);
  539. dprintk("%s: inserted lseg %p "
  540. "iomode %d offset %llu length %llu before "
  541. "lp %p iomode %d offset %llu length %llu\n",
  542. __func__, lseg, lseg->pls_range.iomode,
  543. lseg->pls_range.offset, lseg->pls_range.length,
  544. lp, lp->pls_range.iomode, lp->pls_range.offset,
  545. lp->pls_range.length);
  546. found = 1;
  547. break;
  548. }
  549. if (!found) {
  550. list_add_tail(&lseg->pls_list, &lo->plh_segs);
  551. dprintk("%s: inserted lseg %p "
  552. "iomode %d offset %llu length %llu at tail\n",
  553. __func__, lseg, lseg->pls_range.iomode,
  554. lseg->pls_range.offset, lseg->pls_range.length);
  555. }
  556. get_layout_hdr(lo);
  557. dprintk("%s:Return\n", __func__);
  558. }
  559. static struct pnfs_layout_hdr *
  560. alloc_init_layout_hdr(struct inode *ino)
  561. {
  562. struct pnfs_layout_hdr *lo;
  563. lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
  564. if (!lo)
  565. return NULL;
  566. atomic_set(&lo->plh_refcount, 1);
  567. INIT_LIST_HEAD(&lo->plh_layouts);
  568. INIT_LIST_HEAD(&lo->plh_segs);
  569. INIT_LIST_HEAD(&lo->plh_bulk_recall);
  570. lo->plh_inode = ino;
  571. return lo;
  572. }
  573. static struct pnfs_layout_hdr *
  574. pnfs_find_alloc_layout(struct inode *ino)
  575. {
  576. struct nfs_inode *nfsi = NFS_I(ino);
  577. struct pnfs_layout_hdr *new = NULL;
  578. dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
  579. assert_spin_locked(&ino->i_lock);
  580. if (nfsi->layout) {
  581. if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
  582. return NULL;
  583. else
  584. return nfsi->layout;
  585. }
  586. spin_unlock(&ino->i_lock);
  587. new = alloc_init_layout_hdr(ino);
  588. spin_lock(&ino->i_lock);
  589. if (likely(nfsi->layout == NULL)) /* Won the race? */
  590. nfsi->layout = new;
  591. else
  592. kfree(new);
  593. return nfsi->layout;
  594. }
  595. /*
  596. * iomode matching rules:
  597. * iomode lseg match
  598. * ----- ----- -----
  599. * ANY READ true
  600. * ANY RW true
  601. * RW READ false
  602. * RW RW true
  603. * READ READ true
  604. * READ RW true
  605. */
  606. static int
  607. is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
  608. {
  609. return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
  610. }
  611. /*
  612. * lookup range in layout
  613. */
  614. static struct pnfs_layout_segment *
  615. pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
  616. {
  617. struct pnfs_layout_segment *lseg, *ret = NULL;
  618. dprintk("%s:Begin\n", __func__);
  619. assert_spin_locked(&lo->plh_inode->i_lock);
  620. list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
  621. if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
  622. is_matching_lseg(lseg, iomode)) {
  623. ret = get_lseg(lseg);
  624. break;
  625. }
  626. if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
  627. break;
  628. }
  629. dprintk("%s:Return lseg %p ref %d\n",
  630. __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
  631. return ret;
  632. }
  633. /*
  634. * Layout segment is retreived from the server if not cached.
  635. * The appropriate layout segment is referenced and returned to the caller.
  636. */
  637. struct pnfs_layout_segment *
  638. pnfs_update_layout(struct inode *ino,
  639. struct nfs_open_context *ctx,
  640. enum pnfs_iomode iomode)
  641. {
  642. struct nfs_inode *nfsi = NFS_I(ino);
  643. struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
  644. struct pnfs_layout_hdr *lo;
  645. struct pnfs_layout_segment *lseg = NULL;
  646. bool first = false;
  647. if (!pnfs_enabled_sb(NFS_SERVER(ino)))
  648. return NULL;
  649. spin_lock(&ino->i_lock);
  650. lo = pnfs_find_alloc_layout(ino);
  651. if (lo == NULL) {
  652. dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
  653. goto out_unlock;
  654. }
  655. /* Do we even need to bother with this? */
  656. if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
  657. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
  658. dprintk("%s matches recall, use MDS\n", __func__);
  659. goto out_unlock;
  660. }
  661. /* Check to see if the layout for the given range already exists */
  662. lseg = pnfs_find_lseg(lo, iomode);
  663. if (lseg)
  664. goto out_unlock;
  665. /* if LAYOUTGET already failed once we don't try again */
  666. if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
  667. goto out_unlock;
  668. if (pnfs_layoutgets_blocked(lo, NULL, 0))
  669. goto out_unlock;
  670. atomic_inc(&lo->plh_outstanding);
  671. get_layout_hdr(lo);
  672. if (list_empty(&lo->plh_segs))
  673. first = true;
  674. spin_unlock(&ino->i_lock);
  675. if (first) {
  676. /* The lo must be on the clp list if there is any
  677. * chance of a CB_LAYOUTRECALL(FILE) coming in.
  678. */
  679. spin_lock(&clp->cl_lock);
  680. BUG_ON(!list_empty(&lo->plh_layouts));
  681. list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
  682. spin_unlock(&clp->cl_lock);
  683. }
  684. lseg = send_layoutget(lo, ctx, iomode);
  685. if (!lseg && first) {
  686. spin_lock(&clp->cl_lock);
  687. list_del_init(&lo->plh_layouts);
  688. spin_unlock(&clp->cl_lock);
  689. }
  690. atomic_dec(&lo->plh_outstanding);
  691. put_layout_hdr(lo);
  692. out:
  693. dprintk("%s end, state 0x%lx lseg %p\n", __func__,
  694. nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
  695. return lseg;
  696. out_unlock:
  697. spin_unlock(&ino->i_lock);
  698. goto out;
  699. }
  700. int
  701. pnfs_layout_process(struct nfs4_layoutget *lgp)
  702. {
  703. struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
  704. struct nfs4_layoutget_res *res = &lgp->res;
  705. struct pnfs_layout_segment *lseg;
  706. struct inode *ino = lo->plh_inode;
  707. struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
  708. int status = 0;
  709. /* Verify we got what we asked for.
  710. * Note that because the xdr parsing only accepts a single
  711. * element array, this can fail even if the server is behaving
  712. * correctly.
  713. */
  714. if (lgp->args.range.iomode > res->range.iomode ||
  715. res->range.offset != 0 ||
  716. res->range.length != NFS4_MAX_UINT64) {
  717. status = -EINVAL;
  718. goto out;
  719. }
  720. /* Inject layout blob into I/O device driver */
  721. lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
  722. if (!lseg || IS_ERR(lseg)) {
  723. if (!lseg)
  724. status = -ENOMEM;
  725. else
  726. status = PTR_ERR(lseg);
  727. dprintk("%s: Could not allocate layout: error %d\n",
  728. __func__, status);
  729. goto out;
  730. }
  731. spin_lock(&ino->i_lock);
  732. if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
  733. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
  734. dprintk("%s forget reply due to recall\n", __func__);
  735. goto out_forget_reply;
  736. }
  737. if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
  738. dprintk("%s forget reply due to state\n", __func__);
  739. goto out_forget_reply;
  740. }
  741. init_lseg(lo, lseg);
  742. lseg->pls_range = res->range;
  743. *lgp->lsegpp = get_lseg(lseg);
  744. pnfs_insert_layout(lo, lseg);
  745. if (res->return_on_close) {
  746. set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
  747. set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
  748. }
  749. /* Done processing layoutget. Set the layout stateid */
  750. pnfs_set_layout_stateid(lo, &res->stateid, false);
  751. spin_unlock(&ino->i_lock);
  752. out:
  753. return status;
  754. out_forget_reply:
  755. spin_unlock(&ino->i_lock);
  756. lseg->pls_layout = lo;
  757. NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
  758. goto out;
  759. }
  760. static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
  761. struct nfs_page *prev,
  762. struct nfs_page *req)
  763. {
  764. if (pgio->pg_count == prev->wb_bytes) {
  765. /* This is first coelesce call for a series of nfs_pages */
  766. pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
  767. prev->wb_context,
  768. IOMODE_READ);
  769. }
  770. return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
  771. }
  772. void
  773. pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
  774. {
  775. struct pnfs_layoutdriver_type *ld;
  776. ld = NFS_SERVER(inode)->pnfs_curr_ld;
  777. pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
  778. }
  779. /*
  780. * Device ID cache. Currently supports one layout type per struct nfs_client.
  781. * Add layout type to the lookup key to expand to support multiple types.
  782. */
  783. int
  784. pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
  785. void (*free_callback)(struct pnfs_deviceid_node *))
  786. {
  787. struct pnfs_deviceid_cache *c;
  788. c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
  789. if (!c)
  790. return -ENOMEM;
  791. spin_lock(&clp->cl_lock);
  792. if (clp->cl_devid_cache != NULL) {
  793. atomic_inc(&clp->cl_devid_cache->dc_ref);
  794. dprintk("%s [kref [%d]]\n", __func__,
  795. atomic_read(&clp->cl_devid_cache->dc_ref));
  796. kfree(c);
  797. } else {
  798. /* kzalloc initializes hlists */
  799. spin_lock_init(&c->dc_lock);
  800. atomic_set(&c->dc_ref, 1);
  801. c->dc_free_callback = free_callback;
  802. clp->cl_devid_cache = c;
  803. dprintk("%s [new]\n", __func__);
  804. }
  805. spin_unlock(&clp->cl_lock);
  806. return 0;
  807. }
  808. EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
  809. /*
  810. * Called from pnfs_layoutdriver_type->free_lseg
  811. * last layout segment reference frees deviceid
  812. */
  813. void
  814. pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
  815. struct pnfs_deviceid_node *devid)
  816. {
  817. struct nfs4_deviceid *id = &devid->de_id;
  818. struct pnfs_deviceid_node *d;
  819. struct hlist_node *n;
  820. long h = nfs4_deviceid_hash(id);
  821. dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
  822. if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
  823. return;
  824. hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
  825. if (!memcmp(&d->de_id, id, sizeof(*id))) {
  826. hlist_del_rcu(&d->de_node);
  827. spin_unlock(&c->dc_lock);
  828. synchronize_rcu();
  829. c->dc_free_callback(devid);
  830. return;
  831. }
  832. spin_unlock(&c->dc_lock);
  833. /* Why wasn't it found in the list? */
  834. BUG();
  835. }
  836. EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
  837. /* Find and reference a deviceid */
  838. struct pnfs_deviceid_node *
  839. pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
  840. {
  841. struct pnfs_deviceid_node *d;
  842. struct hlist_node *n;
  843. long hash = nfs4_deviceid_hash(id);
  844. dprintk("--> %s hash %ld\n", __func__, hash);
  845. rcu_read_lock();
  846. hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
  847. if (!memcmp(&d->de_id, id, sizeof(*id))) {
  848. if (!atomic_inc_not_zero(&d->de_ref)) {
  849. goto fail;
  850. } else {
  851. rcu_read_unlock();
  852. return d;
  853. }
  854. }
  855. }
  856. fail:
  857. rcu_read_unlock();
  858. return NULL;
  859. }
  860. EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
  861. /*
  862. * Add a deviceid to the cache.
  863. * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
  864. */
  865. struct pnfs_deviceid_node *
  866. pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
  867. {
  868. struct pnfs_deviceid_node *d;
  869. long hash = nfs4_deviceid_hash(&new->de_id);
  870. dprintk("--> %s hash %ld\n", __func__, hash);
  871. spin_lock(&c->dc_lock);
  872. d = pnfs_find_get_deviceid(c, &new->de_id);
  873. if (d) {
  874. spin_unlock(&c->dc_lock);
  875. dprintk("%s [discard]\n", __func__);
  876. c->dc_free_callback(new);
  877. return d;
  878. }
  879. INIT_HLIST_NODE(&new->de_node);
  880. atomic_set(&new->de_ref, 1);
  881. hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
  882. spin_unlock(&c->dc_lock);
  883. dprintk("%s [new]\n", __func__);
  884. return new;
  885. }
  886. EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
  887. void
  888. pnfs_put_deviceid_cache(struct nfs_client *clp)
  889. {
  890. struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
  891. dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref));
  892. if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
  893. int i;
  894. /* Verify cache is empty */
  895. for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
  896. BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
  897. clp->cl_devid_cache = NULL;
  898. spin_unlock(&clp->cl_lock);
  899. kfree(local);
  900. }
  901. }
  902. EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);