pnfs.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043
  1. /*
  2. * pNFS functions to call and manage layout drivers.
  3. *
  4. * Copyright (c) 2002 [year of first publication]
  5. * The Regents of the University of Michigan
  6. * All Rights Reserved
  7. *
  8. * Dean Hildebrand <dhildebz@umich.edu>
  9. *
  10. * Permission is granted to use, copy, create derivative works, and
  11. * redistribute this software and such derivative works for any purpose,
  12. * so long as the name of the University of Michigan is not used in
  13. * any advertising or publicity pertaining to the use or distribution
  14. * of this software without specific, written prior authorization. If
  15. * the above copyright notice or any other identification of the
  16. * University of Michigan is included in any copy of any portion of
  17. * this software, then the disclaimer below must also be included.
  18. *
  19. * This software is provided as is, without representation or warranty
  20. * of any kind either express or implied, including without limitation
  21. * the implied warranties of merchantability, fitness for a particular
  22. * purpose, or noninfringement. The Regents of the University of
  23. * Michigan shall not be liable for any damages, including special,
  24. * indirect, incidental, or consequential damages, with respect to any
  25. * claim arising out of or in connection with the use of the software,
  26. * even if it has been or is hereafter advised of the possibility of
  27. * such damages.
  28. */
  29. #include <linux/nfs_fs.h>
  30. #include "internal.h"
  31. #include "pnfs.h"
  32. #include "iostat.h"
  33. #define NFSDBG_FACILITY NFSDBG_PNFS
  34. /* Locking:
  35. *
  36. * pnfs_spinlock:
  37. * protects pnfs_modules_tbl.
  38. */
  39. static DEFINE_SPINLOCK(pnfs_spinlock);
  40. /*
  41. * pnfs_modules_tbl holds all pnfs modules
  42. */
  43. static LIST_HEAD(pnfs_modules_tbl);
  44. /* Return the registered pnfs layout driver module matching given id */
  45. static struct pnfs_layoutdriver_type *
  46. find_pnfs_driver_locked(u32 id)
  47. {
  48. struct pnfs_layoutdriver_type *local;
  49. list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
  50. if (local->id == id)
  51. goto out;
  52. local = NULL;
  53. out:
  54. dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
  55. return local;
  56. }
  57. static struct pnfs_layoutdriver_type *
  58. find_pnfs_driver(u32 id)
  59. {
  60. struct pnfs_layoutdriver_type *local;
  61. spin_lock(&pnfs_spinlock);
  62. local = find_pnfs_driver_locked(id);
  63. spin_unlock(&pnfs_spinlock);
  64. return local;
  65. }
  66. void
  67. unset_pnfs_layoutdriver(struct nfs_server *nfss)
  68. {
  69. if (nfss->pnfs_curr_ld) {
  70. nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
  71. module_put(nfss->pnfs_curr_ld->owner);
  72. }
  73. nfss->pnfs_curr_ld = NULL;
  74. }
  75. /*
  76. * Try to set the server's pnfs module to the pnfs layout type specified by id.
  77. * Currently only one pNFS layout driver per filesystem is supported.
  78. *
  79. * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
  80. */
  81. void
  82. set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
  83. {
  84. struct pnfs_layoutdriver_type *ld_type = NULL;
  85. if (id == 0)
  86. goto out_no_driver;
  87. if (!(server->nfs_client->cl_exchange_flags &
  88. (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
  89. printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
  90. id, server->nfs_client->cl_exchange_flags);
  91. goto out_no_driver;
  92. }
  93. ld_type = find_pnfs_driver(id);
  94. if (!ld_type) {
  95. request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
  96. ld_type = find_pnfs_driver(id);
  97. if (!ld_type) {
  98. dprintk("%s: No pNFS module found for %u.\n",
  99. __func__, id);
  100. goto out_no_driver;
  101. }
  102. }
  103. if (!try_module_get(ld_type->owner)) {
  104. dprintk("%s: Could not grab reference on module\n", __func__);
  105. goto out_no_driver;
  106. }
  107. server->pnfs_curr_ld = ld_type;
  108. if (ld_type->set_layoutdriver(server)) {
  109. printk(KERN_ERR
  110. "%s: Error initializing mount point for layout driver %u.\n",
  111. __func__, id);
  112. module_put(ld_type->owner);
  113. goto out_no_driver;
  114. }
  115. dprintk("%s: pNFS module for %u set\n", __func__, id);
  116. return;
  117. out_no_driver:
  118. dprintk("%s: Using NFSv4 I/O\n", __func__);
  119. server->pnfs_curr_ld = NULL;
  120. }
  121. int
  122. pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
  123. {
  124. int status = -EINVAL;
  125. struct pnfs_layoutdriver_type *tmp;
  126. if (ld_type->id == 0) {
  127. printk(KERN_ERR "%s id 0 is reserved\n", __func__);
  128. return status;
  129. }
  130. if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
  131. printk(KERN_ERR "%s Layout driver must provide "
  132. "alloc_lseg and free_lseg.\n", __func__);
  133. return status;
  134. }
  135. spin_lock(&pnfs_spinlock);
  136. tmp = find_pnfs_driver_locked(ld_type->id);
  137. if (!tmp) {
  138. list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
  139. status = 0;
  140. dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
  141. ld_type->name);
  142. } else {
  143. printk(KERN_ERR "%s Module with id %d already loaded!\n",
  144. __func__, ld_type->id);
  145. }
  146. spin_unlock(&pnfs_spinlock);
  147. return status;
  148. }
  149. EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
  150. void
  151. pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
  152. {
  153. dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
  154. spin_lock(&pnfs_spinlock);
  155. list_del(&ld_type->pnfs_tblid);
  156. spin_unlock(&pnfs_spinlock);
  157. }
  158. EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
  159. /*
  160. * pNFS client layout cache
  161. */
  162. /* Need to hold i_lock if caller does not already hold reference */
  163. void
  164. get_layout_hdr(struct pnfs_layout_hdr *lo)
  165. {
  166. atomic_inc(&lo->plh_refcount);
  167. }
  168. static void
  169. destroy_layout_hdr(struct pnfs_layout_hdr *lo)
  170. {
  171. dprintk("%s: freeing layout cache %p\n", __func__, lo);
  172. BUG_ON(!list_empty(&lo->plh_layouts));
  173. NFS_I(lo->plh_inode)->layout = NULL;
  174. kfree(lo);
  175. }
  176. static void
  177. put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
  178. {
  179. if (atomic_dec_and_test(&lo->plh_refcount))
  180. destroy_layout_hdr(lo);
  181. }
  182. void
  183. put_layout_hdr(struct pnfs_layout_hdr *lo)
  184. {
  185. struct inode *inode = lo->plh_inode;
  186. if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
  187. destroy_layout_hdr(lo);
  188. spin_unlock(&inode->i_lock);
  189. }
  190. }
  191. static void
  192. init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
  193. {
  194. INIT_LIST_HEAD(&lseg->pls_list);
  195. atomic_set(&lseg->pls_refcount, 1);
  196. smp_mb();
  197. set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
  198. lseg->pls_layout = lo;
  199. }
  200. static void free_lseg(struct pnfs_layout_segment *lseg)
  201. {
  202. struct inode *ino = lseg->pls_layout->plh_inode;
  203. NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
  204. /* Matched by get_layout_hdr in pnfs_insert_layout */
  205. put_layout_hdr(NFS_I(ino)->layout);
  206. }
  207. static void
  208. put_lseg_common(struct pnfs_layout_segment *lseg)
  209. {
  210. struct inode *inode = lseg->pls_layout->plh_inode;
  211. BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
  212. list_del_init(&lseg->pls_list);
  213. if (list_empty(&lseg->pls_layout->plh_segs)) {
  214. set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
  215. /* Matched by initial refcount set in alloc_init_layout_hdr */
  216. put_layout_hdr_locked(lseg->pls_layout);
  217. }
  218. rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
  219. }
  220. void
  221. put_lseg(struct pnfs_layout_segment *lseg)
  222. {
  223. struct inode *inode;
  224. if (!lseg)
  225. return;
  226. dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
  227. atomic_read(&lseg->pls_refcount),
  228. test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
  229. inode = lseg->pls_layout->plh_inode;
  230. if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
  231. LIST_HEAD(free_me);
  232. put_lseg_common(lseg);
  233. list_add(&lseg->pls_list, &free_me);
  234. spin_unlock(&inode->i_lock);
  235. pnfs_free_lseg_list(&free_me);
  236. }
  237. }
  238. static bool
  239. should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
  240. {
  241. return (recall_iomode == IOMODE_ANY ||
  242. lseg_iomode == recall_iomode);
  243. }
  244. /* Returns 1 if lseg is removed from list, 0 otherwise */
  245. static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
  246. struct list_head *tmp_list)
  247. {
  248. int rv = 0;
  249. if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
  250. /* Remove the reference keeping the lseg in the
  251. * list. It will now be removed when all
  252. * outstanding io is finished.
  253. */
  254. dprintk("%s: lseg %p ref %d\n", __func__, lseg,
  255. atomic_read(&lseg->pls_refcount));
  256. if (atomic_dec_and_test(&lseg->pls_refcount)) {
  257. put_lseg_common(lseg);
  258. list_add(&lseg->pls_list, tmp_list);
  259. rv = 1;
  260. }
  261. }
  262. return rv;
  263. }
  264. /* Returns count of number of matching invalid lsegs remaining in list
  265. * after call.
  266. */
  267. int
  268. mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
  269. struct list_head *tmp_list,
  270. u32 iomode)
  271. {
  272. struct pnfs_layout_segment *lseg, *next;
  273. int invalid = 0, removed = 0;
  274. dprintk("%s:Begin lo %p\n", __func__, lo);
  275. if (list_empty(&lo->plh_segs)) {
  276. if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
  277. put_layout_hdr_locked(lo);
  278. return 0;
  279. }
  280. list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
  281. if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
  282. dprintk("%s: freeing lseg %p iomode %d "
  283. "offset %llu length %llu\n", __func__,
  284. lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
  285. lseg->pls_range.length);
  286. invalid++;
  287. removed += mark_lseg_invalid(lseg, tmp_list);
  288. }
  289. dprintk("%s:Return %i\n", __func__, invalid - removed);
  290. return invalid - removed;
  291. }
  292. /* note free_me must contain lsegs from a single layout_hdr */
  293. void
  294. pnfs_free_lseg_list(struct list_head *free_me)
  295. {
  296. struct pnfs_layout_segment *lseg, *tmp;
  297. struct pnfs_layout_hdr *lo;
  298. if (list_empty(free_me))
  299. return;
  300. lo = list_first_entry(free_me, struct pnfs_layout_segment,
  301. pls_list)->pls_layout;
  302. if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
  303. struct nfs_client *clp;
  304. clp = NFS_SERVER(lo->plh_inode)->nfs_client;
  305. spin_lock(&clp->cl_lock);
  306. list_del_init(&lo->plh_layouts);
  307. spin_unlock(&clp->cl_lock);
  308. }
  309. list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
  310. list_del(&lseg->pls_list);
  311. free_lseg(lseg);
  312. }
  313. }
  314. void
  315. pnfs_destroy_layout(struct nfs_inode *nfsi)
  316. {
  317. struct pnfs_layout_hdr *lo;
  318. LIST_HEAD(tmp_list);
  319. spin_lock(&nfsi->vfs_inode.i_lock);
  320. lo = nfsi->layout;
  321. if (lo) {
  322. lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
  323. mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
  324. }
  325. spin_unlock(&nfsi->vfs_inode.i_lock);
  326. pnfs_free_lseg_list(&tmp_list);
  327. }
  328. /*
  329. * Called by the state manger to remove all layouts established under an
  330. * expired lease.
  331. */
  332. void
  333. pnfs_destroy_all_layouts(struct nfs_client *clp)
  334. {
  335. struct pnfs_layout_hdr *lo;
  336. LIST_HEAD(tmp_list);
  337. spin_lock(&clp->cl_lock);
  338. list_splice_init(&clp->cl_layouts, &tmp_list);
  339. spin_unlock(&clp->cl_lock);
  340. while (!list_empty(&tmp_list)) {
  341. lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
  342. plh_layouts);
  343. dprintk("%s freeing layout for inode %lu\n", __func__,
  344. lo->plh_inode->i_ino);
  345. pnfs_destroy_layout(NFS_I(lo->plh_inode));
  346. }
  347. }
  348. /* update lo->plh_stateid with new if is more recent */
  349. void
  350. pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
  351. bool update_barrier)
  352. {
  353. u32 oldseq, newseq;
  354. oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
  355. newseq = be32_to_cpu(new->stateid.seqid);
  356. if ((int)(newseq - oldseq) > 0) {
  357. memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
  358. if (update_barrier) {
  359. u32 new_barrier = be32_to_cpu(new->stateid.seqid);
  360. if ((int)(new_barrier - lo->plh_barrier))
  361. lo->plh_barrier = new_barrier;
  362. } else {
  363. /* Because of wraparound, we want to keep the barrier
  364. * "close" to the current seqids. It needs to be
  365. * within 2**31 to count as "behind", so if it
  366. * gets too near that limit, give us a litle leeway
  367. * and bring it to within 2**30.
  368. * NOTE - and yes, this is all unsigned arithmetic.
  369. */
  370. if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
  371. lo->plh_barrier = newseq - (1 << 30);
  372. }
  373. }
  374. }
  375. /* lget is set to 1 if called from inside send_layoutget call chain */
  376. static bool
  377. pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
  378. int lget)
  379. {
  380. if ((stateid) &&
  381. (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
  382. return true;
  383. return lo->plh_block_lgets ||
  384. test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
  385. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
  386. (list_empty(&lo->plh_segs) &&
  387. (atomic_read(&lo->plh_outstanding) > lget));
  388. }
  389. int
  390. pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
  391. struct nfs4_state *open_state)
  392. {
  393. int status = 0;
  394. dprintk("--> %s\n", __func__);
  395. spin_lock(&lo->plh_inode->i_lock);
  396. if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
  397. status = -EAGAIN;
  398. } else if (list_empty(&lo->plh_segs)) {
  399. int seq;
  400. do {
  401. seq = read_seqbegin(&open_state->seqlock);
  402. memcpy(dst->data, open_state->stateid.data,
  403. sizeof(open_state->stateid.data));
  404. } while (read_seqretry(&open_state->seqlock, seq));
  405. } else
  406. memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
  407. spin_unlock(&lo->plh_inode->i_lock);
  408. dprintk("<-- %s\n", __func__);
  409. return status;
  410. }
  411. /*
  412. * Get layout from server.
  413. * for now, assume that whole file layouts are requested.
  414. * arg->offset: 0
  415. * arg->length: all ones
  416. */
  417. static struct pnfs_layout_segment *
  418. send_layoutget(struct pnfs_layout_hdr *lo,
  419. struct nfs_open_context *ctx,
  420. u32 iomode)
  421. {
  422. struct inode *ino = lo->plh_inode;
  423. struct nfs_server *server = NFS_SERVER(ino);
  424. struct nfs4_layoutget *lgp;
  425. struct pnfs_layout_segment *lseg = NULL;
  426. dprintk("--> %s\n", __func__);
  427. BUG_ON(ctx == NULL);
  428. lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
  429. if (lgp == NULL)
  430. return NULL;
  431. lgp->args.minlength = NFS4_MAX_UINT64;
  432. lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
  433. lgp->args.range.iomode = iomode;
  434. lgp->args.range.offset = 0;
  435. lgp->args.range.length = NFS4_MAX_UINT64;
  436. lgp->args.type = server->pnfs_curr_ld->id;
  437. lgp->args.inode = ino;
  438. lgp->args.ctx = get_nfs_open_context(ctx);
  439. lgp->lsegpp = &lseg;
  440. /* Synchronously retrieve layout information from server and
  441. * store in lseg.
  442. */
  443. nfs4_proc_layoutget(lgp);
  444. if (!lseg) {
  445. /* remember that LAYOUTGET failed and suspend trying */
  446. set_bit(lo_fail_bit(iomode), &lo->plh_flags);
  447. }
  448. return lseg;
  449. }
  450. bool pnfs_roc(struct inode *ino)
  451. {
  452. struct pnfs_layout_hdr *lo;
  453. struct pnfs_layout_segment *lseg, *tmp;
  454. LIST_HEAD(tmp_list);
  455. bool found = false;
  456. spin_lock(&ino->i_lock);
  457. lo = NFS_I(ino)->layout;
  458. if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
  459. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
  460. goto out_nolayout;
  461. list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
  462. if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
  463. mark_lseg_invalid(lseg, &tmp_list);
  464. found = true;
  465. }
  466. if (!found)
  467. goto out_nolayout;
  468. lo->plh_block_lgets++;
  469. get_layout_hdr(lo); /* matched in pnfs_roc_release */
  470. spin_unlock(&ino->i_lock);
  471. pnfs_free_lseg_list(&tmp_list);
  472. return true;
  473. out_nolayout:
  474. spin_unlock(&ino->i_lock);
  475. return false;
  476. }
  477. void pnfs_roc_release(struct inode *ino)
  478. {
  479. struct pnfs_layout_hdr *lo;
  480. spin_lock(&ino->i_lock);
  481. lo = NFS_I(ino)->layout;
  482. lo->plh_block_lgets--;
  483. put_layout_hdr_locked(lo);
  484. spin_unlock(&ino->i_lock);
  485. }
  486. void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
  487. {
  488. struct pnfs_layout_hdr *lo;
  489. spin_lock(&ino->i_lock);
  490. lo = NFS_I(ino)->layout;
  491. if ((int)(barrier - lo->plh_barrier) > 0)
  492. lo->plh_barrier = barrier;
  493. spin_unlock(&ino->i_lock);
  494. }
  495. bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
  496. {
  497. struct nfs_inode *nfsi = NFS_I(ino);
  498. struct pnfs_layout_segment *lseg;
  499. bool found = false;
  500. spin_lock(&ino->i_lock);
  501. list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
  502. if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
  503. found = true;
  504. break;
  505. }
  506. if (!found) {
  507. struct pnfs_layout_hdr *lo = nfsi->layout;
  508. u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
  509. /* Since close does not return a layout stateid for use as
  510. * a barrier, we choose the worst-case barrier.
  511. */
  512. *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
  513. }
  514. spin_unlock(&ino->i_lock);
  515. return found;
  516. }
  517. /*
  518. * Compare two layout segments for sorting into layout cache.
  519. * We want to preferentially return RW over RO layouts, so ensure those
  520. * are seen first.
  521. */
  522. static s64
  523. cmp_layout(u32 iomode1, u32 iomode2)
  524. {
  525. /* read > read/write */
  526. return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
  527. }
  528. static void
  529. pnfs_insert_layout(struct pnfs_layout_hdr *lo,
  530. struct pnfs_layout_segment *lseg)
  531. {
  532. struct pnfs_layout_segment *lp;
  533. int found = 0;
  534. dprintk("%s:Begin\n", __func__);
  535. assert_spin_locked(&lo->plh_inode->i_lock);
  536. list_for_each_entry(lp, &lo->plh_segs, pls_list) {
  537. if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
  538. continue;
  539. list_add_tail(&lseg->pls_list, &lp->pls_list);
  540. dprintk("%s: inserted lseg %p "
  541. "iomode %d offset %llu length %llu before "
  542. "lp %p iomode %d offset %llu length %llu\n",
  543. __func__, lseg, lseg->pls_range.iomode,
  544. lseg->pls_range.offset, lseg->pls_range.length,
  545. lp, lp->pls_range.iomode, lp->pls_range.offset,
  546. lp->pls_range.length);
  547. found = 1;
  548. break;
  549. }
  550. if (!found) {
  551. list_add_tail(&lseg->pls_list, &lo->plh_segs);
  552. dprintk("%s: inserted lseg %p "
  553. "iomode %d offset %llu length %llu at tail\n",
  554. __func__, lseg, lseg->pls_range.iomode,
  555. lseg->pls_range.offset, lseg->pls_range.length);
  556. }
  557. get_layout_hdr(lo);
  558. dprintk("%s:Return\n", __func__);
  559. }
  560. static struct pnfs_layout_hdr *
  561. alloc_init_layout_hdr(struct inode *ino)
  562. {
  563. struct pnfs_layout_hdr *lo;
  564. lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
  565. if (!lo)
  566. return NULL;
  567. atomic_set(&lo->plh_refcount, 1);
  568. INIT_LIST_HEAD(&lo->plh_layouts);
  569. INIT_LIST_HEAD(&lo->plh_segs);
  570. INIT_LIST_HEAD(&lo->plh_bulk_recall);
  571. lo->plh_inode = ino;
  572. return lo;
  573. }
  574. static struct pnfs_layout_hdr *
  575. pnfs_find_alloc_layout(struct inode *ino)
  576. {
  577. struct nfs_inode *nfsi = NFS_I(ino);
  578. struct pnfs_layout_hdr *new = NULL;
  579. dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
  580. assert_spin_locked(&ino->i_lock);
  581. if (nfsi->layout) {
  582. if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
  583. return NULL;
  584. else
  585. return nfsi->layout;
  586. }
  587. spin_unlock(&ino->i_lock);
  588. new = alloc_init_layout_hdr(ino);
  589. spin_lock(&ino->i_lock);
  590. if (likely(nfsi->layout == NULL)) /* Won the race? */
  591. nfsi->layout = new;
  592. else
  593. kfree(new);
  594. return nfsi->layout;
  595. }
  596. /*
  597. * iomode matching rules:
  598. * iomode lseg match
  599. * ----- ----- -----
  600. * ANY READ true
  601. * ANY RW true
  602. * RW READ false
  603. * RW RW true
  604. * READ READ true
  605. * READ RW true
  606. */
  607. static int
  608. is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
  609. {
  610. return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
  611. }
  612. /*
  613. * lookup range in layout
  614. */
  615. static struct pnfs_layout_segment *
  616. pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
  617. {
  618. struct pnfs_layout_segment *lseg, *ret = NULL;
  619. dprintk("%s:Begin\n", __func__);
  620. assert_spin_locked(&lo->plh_inode->i_lock);
  621. list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
  622. if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
  623. is_matching_lseg(lseg, iomode)) {
  624. ret = get_lseg(lseg);
  625. break;
  626. }
  627. if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
  628. break;
  629. }
  630. dprintk("%s:Return lseg %p ref %d\n",
  631. __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
  632. return ret;
  633. }
  634. /*
  635. * Layout segment is retreived from the server if not cached.
  636. * The appropriate layout segment is referenced and returned to the caller.
  637. */
  638. struct pnfs_layout_segment *
  639. pnfs_update_layout(struct inode *ino,
  640. struct nfs_open_context *ctx,
  641. enum pnfs_iomode iomode)
  642. {
  643. struct nfs_inode *nfsi = NFS_I(ino);
  644. struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
  645. struct pnfs_layout_hdr *lo;
  646. struct pnfs_layout_segment *lseg = NULL;
  647. bool first = false;
  648. if (!pnfs_enabled_sb(NFS_SERVER(ino)))
  649. return NULL;
  650. spin_lock(&ino->i_lock);
  651. lo = pnfs_find_alloc_layout(ino);
  652. if (lo == NULL) {
  653. dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
  654. goto out_unlock;
  655. }
  656. /* Do we even need to bother with this? */
  657. if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
  658. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
  659. dprintk("%s matches recall, use MDS\n", __func__);
  660. goto out_unlock;
  661. }
  662. /* Check to see if the layout for the given range already exists */
  663. lseg = pnfs_find_lseg(lo, iomode);
  664. if (lseg)
  665. goto out_unlock;
  666. /* if LAYOUTGET already failed once we don't try again */
  667. if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
  668. goto out_unlock;
  669. if (pnfs_layoutgets_blocked(lo, NULL, 0))
  670. goto out_unlock;
  671. atomic_inc(&lo->plh_outstanding);
  672. get_layout_hdr(lo);
  673. if (list_empty(&lo->plh_segs))
  674. first = true;
  675. spin_unlock(&ino->i_lock);
  676. if (first) {
  677. /* The lo must be on the clp list if there is any
  678. * chance of a CB_LAYOUTRECALL(FILE) coming in.
  679. */
  680. spin_lock(&clp->cl_lock);
  681. BUG_ON(!list_empty(&lo->plh_layouts));
  682. list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
  683. spin_unlock(&clp->cl_lock);
  684. }
  685. lseg = send_layoutget(lo, ctx, iomode);
  686. if (!lseg && first) {
  687. spin_lock(&clp->cl_lock);
  688. list_del_init(&lo->plh_layouts);
  689. spin_unlock(&clp->cl_lock);
  690. }
  691. atomic_dec(&lo->plh_outstanding);
  692. put_layout_hdr(lo);
  693. out:
  694. dprintk("%s end, state 0x%lx lseg %p\n", __func__,
  695. nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
  696. return lseg;
  697. out_unlock:
  698. spin_unlock(&ino->i_lock);
  699. goto out;
  700. }
  701. int
  702. pnfs_layout_process(struct nfs4_layoutget *lgp)
  703. {
  704. struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
  705. struct nfs4_layoutget_res *res = &lgp->res;
  706. struct pnfs_layout_segment *lseg;
  707. struct inode *ino = lo->plh_inode;
  708. struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
  709. int status = 0;
  710. /* Verify we got what we asked for.
  711. * Note that because the xdr parsing only accepts a single
  712. * element array, this can fail even if the server is behaving
  713. * correctly.
  714. */
  715. if (lgp->args.range.iomode > res->range.iomode ||
  716. res->range.offset != 0 ||
  717. res->range.length != NFS4_MAX_UINT64) {
  718. status = -EINVAL;
  719. goto out;
  720. }
  721. /* Inject layout blob into I/O device driver */
  722. lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
  723. if (!lseg || IS_ERR(lseg)) {
  724. if (!lseg)
  725. status = -ENOMEM;
  726. else
  727. status = PTR_ERR(lseg);
  728. dprintk("%s: Could not allocate layout: error %d\n",
  729. __func__, status);
  730. goto out;
  731. }
  732. spin_lock(&ino->i_lock);
  733. if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
  734. test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
  735. dprintk("%s forget reply due to recall\n", __func__);
  736. goto out_forget_reply;
  737. }
  738. if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
  739. dprintk("%s forget reply due to state\n", __func__);
  740. goto out_forget_reply;
  741. }
  742. init_lseg(lo, lseg);
  743. lseg->pls_range = res->range;
  744. *lgp->lsegpp = get_lseg(lseg);
  745. pnfs_insert_layout(lo, lseg);
  746. if (res->return_on_close) {
  747. set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
  748. set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
  749. }
  750. /* Done processing layoutget. Set the layout stateid */
  751. pnfs_set_layout_stateid(lo, &res->stateid, false);
  752. spin_unlock(&ino->i_lock);
  753. out:
  754. return status;
  755. out_forget_reply:
  756. spin_unlock(&ino->i_lock);
  757. lseg->pls_layout = lo;
  758. NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
  759. goto out;
  760. }
  761. static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
  762. struct nfs_page *prev,
  763. struct nfs_page *req)
  764. {
  765. if (pgio->pg_count == prev->wb_bytes) {
  766. /* This is first coelesce call for a series of nfs_pages */
  767. pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
  768. prev->wb_context,
  769. IOMODE_READ);
  770. }
  771. return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
  772. }
  773. void
  774. pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
  775. {
  776. struct pnfs_layoutdriver_type *ld;
  777. ld = NFS_SERVER(inode)->pnfs_curr_ld;
  778. pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
  779. }
  780. /*
  781. * Call the appropriate parallel I/O subsystem read function.
  782. */
  783. enum pnfs_try_status
  784. pnfs_try_to_read_data(struct nfs_read_data *rdata,
  785. const struct rpc_call_ops *call_ops)
  786. {
  787. struct inode *inode = rdata->inode;
  788. struct nfs_server *nfss = NFS_SERVER(inode);
  789. enum pnfs_try_status trypnfs;
  790. rdata->mds_ops = call_ops;
  791. dprintk("%s: Reading ino:%lu %u@%llu\n",
  792. __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
  793. trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
  794. if (trypnfs == PNFS_NOT_ATTEMPTED) {
  795. put_lseg(rdata->lseg);
  796. rdata->lseg = NULL;
  797. } else {
  798. nfs_inc_stats(inode, NFSIOS_PNFS_READ);
  799. }
  800. dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
  801. return trypnfs;
  802. }
  803. /*
  804. * Device ID cache. Currently supports one layout type per struct nfs_client.
  805. * Add layout type to the lookup key to expand to support multiple types.
  806. */
  807. int
  808. pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
  809. void (*free_callback)(struct pnfs_deviceid_node *))
  810. {
  811. struct pnfs_deviceid_cache *c;
  812. c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
  813. if (!c)
  814. return -ENOMEM;
  815. spin_lock(&clp->cl_lock);
  816. if (clp->cl_devid_cache != NULL) {
  817. atomic_inc(&clp->cl_devid_cache->dc_ref);
  818. dprintk("%s [kref [%d]]\n", __func__,
  819. atomic_read(&clp->cl_devid_cache->dc_ref));
  820. kfree(c);
  821. } else {
  822. /* kzalloc initializes hlists */
  823. spin_lock_init(&c->dc_lock);
  824. atomic_set(&c->dc_ref, 1);
  825. c->dc_free_callback = free_callback;
  826. clp->cl_devid_cache = c;
  827. dprintk("%s [new]\n", __func__);
  828. }
  829. spin_unlock(&clp->cl_lock);
  830. return 0;
  831. }
  832. EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
  833. /*
  834. * Called from pnfs_layoutdriver_type->free_lseg
  835. * last layout segment reference frees deviceid
  836. */
  837. void
  838. pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
  839. struct pnfs_deviceid_node *devid)
  840. {
  841. struct nfs4_deviceid *id = &devid->de_id;
  842. struct pnfs_deviceid_node *d;
  843. struct hlist_node *n;
  844. long h = nfs4_deviceid_hash(id);
  845. dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
  846. if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
  847. return;
  848. hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
  849. if (!memcmp(&d->de_id, id, sizeof(*id))) {
  850. hlist_del_rcu(&d->de_node);
  851. spin_unlock(&c->dc_lock);
  852. synchronize_rcu();
  853. c->dc_free_callback(devid);
  854. return;
  855. }
  856. spin_unlock(&c->dc_lock);
  857. /* Why wasn't it found in the list? */
  858. BUG();
  859. }
  860. EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
  861. /* Find and reference a deviceid */
  862. struct pnfs_deviceid_node *
  863. pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
  864. {
  865. struct pnfs_deviceid_node *d;
  866. struct hlist_node *n;
  867. long hash = nfs4_deviceid_hash(id);
  868. dprintk("--> %s hash %ld\n", __func__, hash);
  869. rcu_read_lock();
  870. hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
  871. if (!memcmp(&d->de_id, id, sizeof(*id))) {
  872. if (!atomic_inc_not_zero(&d->de_ref)) {
  873. goto fail;
  874. } else {
  875. rcu_read_unlock();
  876. return d;
  877. }
  878. }
  879. }
  880. fail:
  881. rcu_read_unlock();
  882. return NULL;
  883. }
  884. EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
  885. /*
  886. * Add a deviceid to the cache.
  887. * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
  888. */
  889. struct pnfs_deviceid_node *
  890. pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
  891. {
  892. struct pnfs_deviceid_node *d;
  893. long hash = nfs4_deviceid_hash(&new->de_id);
  894. dprintk("--> %s hash %ld\n", __func__, hash);
  895. spin_lock(&c->dc_lock);
  896. d = pnfs_find_get_deviceid(c, &new->de_id);
  897. if (d) {
  898. spin_unlock(&c->dc_lock);
  899. dprintk("%s [discard]\n", __func__);
  900. c->dc_free_callback(new);
  901. return d;
  902. }
  903. INIT_HLIST_NODE(&new->de_node);
  904. atomic_set(&new->de_ref, 1);
  905. hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
  906. spin_unlock(&c->dc_lock);
  907. dprintk("%s [new]\n", __func__);
  908. return new;
  909. }
  910. EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
  911. void
  912. pnfs_put_deviceid_cache(struct nfs_client *clp)
  913. {
  914. struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
  915. dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref));
  916. if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
  917. int i;
  918. /* Verify cache is empty */
  919. for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
  920. BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
  921. clp->cl_devid_cache = NULL;
  922. spin_unlock(&clp->cl_lock);
  923. kfree(local);
  924. }
  925. }
  926. EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);