objlayout.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788
  1. /*
  2. * pNFS Objects layout driver high level definitions
  3. *
  4. * Copyright (C) 2007 Panasas Inc. [year of first publication]
  5. * All rights reserved.
  6. *
  7. * Benny Halevy <bhalevy@panasas.com>
  8. * Boaz Harrosh <bharrosh@panasas.com>
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License version 2
  12. * See the file COPYING included with this distribution for more details.
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. *
  18. * 1. Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * 2. Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in the
  22. * documentation and/or other materials provided with the distribution.
  23. * 3. Neither the name of the Panasas company nor the names of its
  24. * contributors may be used to endorse or promote products derived
  25. * from this software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  28. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  29. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  30. * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  31. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  34. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  35. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  36. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  37. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. */
  39. #include <linux/kmod.h>
  40. #include <linux/moduleparam.h>
  41. #include <linux/ratelimit.h>
  42. #include <scsi/osd_initiator.h>
  43. #include "objlayout.h"
  44. #define NFSDBG_FACILITY NFSDBG_PNFS_LD
  45. /*
  46. * Create a objlayout layout structure for the given inode and return it.
  47. */
  48. struct pnfs_layout_hdr *
  49. objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
  50. {
  51. struct objlayout *objlay;
  52. objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
  53. if (objlay) {
  54. spin_lock_init(&objlay->lock);
  55. INIT_LIST_HEAD(&objlay->err_list);
  56. }
  57. dprintk("%s: Return %p\n", __func__, objlay);
  58. return &objlay->pnfs_layout;
  59. }
  60. /*
  61. * Free an objlayout layout structure
  62. */
  63. void
  64. objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
  65. {
  66. struct objlayout *objlay = OBJLAYOUT(lo);
  67. dprintk("%s: objlay %p\n", __func__, objlay);
  68. WARN_ON(!list_empty(&objlay->err_list));
  69. kfree(objlay);
  70. }
  71. /*
  72. * Unmarshall layout and store it in pnfslay.
  73. */
  74. struct pnfs_layout_segment *
  75. objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
  76. struct nfs4_layoutget_res *lgr,
  77. gfp_t gfp_flags)
  78. {
  79. int status = -ENOMEM;
  80. struct xdr_stream stream;
  81. struct xdr_buf buf = {
  82. .pages = lgr->layoutp->pages,
  83. .page_len = lgr->layoutp->len,
  84. .buflen = lgr->layoutp->len,
  85. .len = lgr->layoutp->len,
  86. };
  87. struct page *scratch;
  88. struct pnfs_layout_segment *lseg;
  89. dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
  90. scratch = alloc_page(gfp_flags);
  91. if (!scratch)
  92. goto err_nofree;
  93. xdr_init_decode(&stream, &buf, NULL);
  94. xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
  95. status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
  96. if (unlikely(status)) {
  97. dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
  98. status);
  99. goto err;
  100. }
  101. __free_page(scratch);
  102. dprintk("%s: Return %p\n", __func__, lseg);
  103. return lseg;
  104. err:
  105. __free_page(scratch);
  106. err_nofree:
  107. dprintk("%s: Err Return=>%d\n", __func__, status);
  108. return ERR_PTR(status);
  109. }
  110. /*
  111. * Free a layout segement
  112. */
  113. void
  114. objlayout_free_lseg(struct pnfs_layout_segment *lseg)
  115. {
  116. dprintk("%s: freeing layout segment %p\n", __func__, lseg);
  117. if (unlikely(!lseg))
  118. return;
  119. objio_free_lseg(lseg);
  120. }
  121. /*
  122. * I/O Operations
  123. */
  124. static inline u64
  125. end_offset(u64 start, u64 len)
  126. {
  127. u64 end;
  128. end = start + len;
  129. return end >= start ? end : NFS4_MAX_UINT64;
  130. }
  131. /* last octet in a range */
  132. static inline u64
  133. last_byte_offset(u64 start, u64 len)
  134. {
  135. u64 end;
  136. BUG_ON(!len);
  137. end = start + len;
  138. return end > start ? end - 1 : NFS4_MAX_UINT64;
  139. }
  140. static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
  141. struct page ***p_pages, unsigned *p_pgbase,
  142. u64 offset, unsigned long count)
  143. {
  144. u64 lseg_end_offset;
  145. BUG_ON(offset < lseg->pls_range.offset);
  146. lseg_end_offset = end_offset(lseg->pls_range.offset,
  147. lseg->pls_range.length);
  148. BUG_ON(offset >= lseg_end_offset);
  149. WARN_ON(offset + count > lseg_end_offset);
  150. if (*p_pgbase > PAGE_SIZE) {
  151. dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
  152. *p_pages += *p_pgbase >> PAGE_SHIFT;
  153. *p_pgbase &= ~PAGE_MASK;
  154. }
  155. }
  156. /*
  157. * I/O done common code
  158. */
  159. static void
  160. objlayout_iodone(struct objlayout_io_res *oir)
  161. {
  162. if (likely(oir->status >= 0)) {
  163. objio_free_result(oir);
  164. } else {
  165. struct objlayout *objlay = oir->objlay;
  166. spin_lock(&objlay->lock);
  167. objlay->delta_space_valid = OBJ_DSU_INVALID;
  168. list_add(&objlay->err_list, &oir->err_list);
  169. spin_unlock(&objlay->lock);
  170. }
  171. }
  172. /*
  173. * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
  174. *
  175. * The @index component IO failed (error returned from target). Register
  176. * the error for later reporting at layout-return.
  177. */
  178. void
  179. objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
  180. struct pnfs_osd_objid *pooid, int osd_error,
  181. u64 offset, u64 length, bool is_write)
  182. {
  183. struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
  184. BUG_ON(index >= oir->num_comps);
  185. if (osd_error) {
  186. ioerr->oer_component = *pooid;
  187. ioerr->oer_comp_offset = offset;
  188. ioerr->oer_comp_length = length;
  189. ioerr->oer_iswrite = is_write;
  190. ioerr->oer_errno = osd_error;
  191. dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
  192. "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
  193. __func__, index, ioerr->oer_errno,
  194. ioerr->oer_iswrite,
  195. _DEVID_LO(&ioerr->oer_component.oid_device_id),
  196. _DEVID_HI(&ioerr->oer_component.oid_device_id),
  197. ioerr->oer_component.oid_partition_id,
  198. ioerr->oer_component.oid_object_id,
  199. ioerr->oer_comp_offset,
  200. ioerr->oer_comp_length);
  201. } else {
  202. /* User need not call if no error is reported */
  203. ioerr->oer_errno = 0;
  204. }
  205. }
  206. /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
  207. * This is because the osd completion is called with ints-off from
  208. * the block layer
  209. */
  210. static void _rpc_read_complete(struct work_struct *work)
  211. {
  212. struct rpc_task *task;
  213. struct nfs_read_data *rdata;
  214. dprintk("%s enter\n", __func__);
  215. task = container_of(work, struct rpc_task, u.tk_work);
  216. rdata = container_of(task, struct nfs_read_data, task);
  217. pnfs_ld_read_done(rdata);
  218. }
  219. void
  220. objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  221. {
  222. struct nfs_read_data *rdata = oir->rpcdata;
  223. oir->status = rdata->task.tk_status = status;
  224. if (status >= 0)
  225. rdata->res.count = status;
  226. else
  227. rdata->header->pnfs_error = status;
  228. objlayout_iodone(oir);
  229. /* must not use oir after this point */
  230. dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
  231. status, rdata->res.eof, sync);
  232. if (sync)
  233. pnfs_ld_read_done(rdata);
  234. else {
  235. INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
  236. schedule_work(&rdata->task.u.tk_work);
  237. }
  238. }
  239. /*
  240. * Perform sync or async reads.
  241. */
  242. enum pnfs_try_status
  243. objlayout_read_pagelist(struct nfs_read_data *rdata)
  244. {
  245. struct nfs_pgio_header *hdr = rdata->header;
  246. struct inode *inode = hdr->inode;
  247. loff_t offset = rdata->args.offset;
  248. size_t count = rdata->args.count;
  249. int err;
  250. loff_t eof;
  251. eof = i_size_read(inode);
  252. if (unlikely(offset + count > eof)) {
  253. if (offset >= eof) {
  254. err = 0;
  255. rdata->res.count = 0;
  256. rdata->res.eof = 1;
  257. /*FIXME: do we need to call pnfs_ld_read_done() */
  258. goto out;
  259. }
  260. count = eof - offset;
  261. }
  262. rdata->res.eof = (offset + count) >= eof;
  263. _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
  264. &rdata->args.pgbase,
  265. rdata->args.offset, rdata->args.count);
  266. dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
  267. __func__, inode->i_ino, offset, count, rdata->res.eof);
  268. err = objio_read_pagelist(rdata);
  269. out:
  270. if (unlikely(err)) {
  271. hdr->pnfs_error = err;
  272. dprintk("%s: Returned Error %d\n", __func__, err);
  273. return PNFS_NOT_ATTEMPTED;
  274. }
  275. return PNFS_ATTEMPTED;
  276. }
  277. /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
  278. * This is because the osd completion is called with ints-off from
  279. * the block layer
  280. */
  281. static void _rpc_write_complete(struct work_struct *work)
  282. {
  283. struct rpc_task *task;
  284. struct nfs_write_data *wdata;
  285. dprintk("%s enter\n", __func__);
  286. task = container_of(work, struct rpc_task, u.tk_work);
  287. wdata = container_of(task, struct nfs_write_data, task);
  288. pnfs_ld_write_done(wdata);
  289. }
  290. void
  291. objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  292. {
  293. struct nfs_write_data *wdata = oir->rpcdata;
  294. oir->status = wdata->task.tk_status = status;
  295. if (status >= 0) {
  296. wdata->res.count = status;
  297. wdata->verf.committed = oir->committed;
  298. } else {
  299. wdata->header->pnfs_error = status;
  300. }
  301. objlayout_iodone(oir);
  302. /* must not use oir after this point */
  303. dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
  304. status, wdata->verf.committed, sync);
  305. if (sync)
  306. pnfs_ld_write_done(wdata);
  307. else {
  308. INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
  309. schedule_work(&wdata->task.u.tk_work);
  310. }
  311. }
  312. /*
  313. * Perform sync or async writes.
  314. */
  315. enum pnfs_try_status
  316. objlayout_write_pagelist(struct nfs_write_data *wdata,
  317. int how)
  318. {
  319. struct nfs_pgio_header *hdr = wdata->header;
  320. int err;
  321. _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
  322. &wdata->args.pgbase,
  323. wdata->args.offset, wdata->args.count);
  324. err = objio_write_pagelist(wdata, how);
  325. if (unlikely(err)) {
  326. hdr->pnfs_error = err;
  327. dprintk("%s: Returned Error %d\n", __func__, err);
  328. return PNFS_NOT_ATTEMPTED;
  329. }
  330. return PNFS_ATTEMPTED;
  331. }
  332. void
  333. objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
  334. struct xdr_stream *xdr,
  335. const struct nfs4_layoutcommit_args *args)
  336. {
  337. struct objlayout *objlay = OBJLAYOUT(pnfslay);
  338. struct pnfs_osd_layoutupdate lou;
  339. __be32 *start;
  340. dprintk("%s: Begin\n", __func__);
  341. spin_lock(&objlay->lock);
  342. lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
  343. lou.dsu_delta = objlay->delta_space_used;
  344. objlay->delta_space_used = 0;
  345. objlay->delta_space_valid = OBJ_DSU_INIT;
  346. lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
  347. spin_unlock(&objlay->lock);
  348. start = xdr_reserve_space(xdr, 4);
  349. BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
  350. *start = cpu_to_be32((xdr->p - start - 1) * 4);
  351. dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
  352. lou.dsu_delta, lou.olu_ioerr_flag);
  353. }
  354. static int
  355. err_prio(u32 oer_errno)
  356. {
  357. switch (oer_errno) {
  358. case 0:
  359. return 0;
  360. case PNFS_OSD_ERR_RESOURCE:
  361. return OSD_ERR_PRI_RESOURCE;
  362. case PNFS_OSD_ERR_BAD_CRED:
  363. return OSD_ERR_PRI_BAD_CRED;
  364. case PNFS_OSD_ERR_NO_ACCESS:
  365. return OSD_ERR_PRI_NO_ACCESS;
  366. case PNFS_OSD_ERR_UNREACHABLE:
  367. return OSD_ERR_PRI_UNREACHABLE;
  368. case PNFS_OSD_ERR_NOT_FOUND:
  369. return OSD_ERR_PRI_NOT_FOUND;
  370. case PNFS_OSD_ERR_NO_SPACE:
  371. return OSD_ERR_PRI_NO_SPACE;
  372. default:
  373. WARN_ON(1);
  374. /* fallthrough */
  375. case PNFS_OSD_ERR_EIO:
  376. return OSD_ERR_PRI_EIO;
  377. }
  378. }
  379. static void
  380. merge_ioerr(struct pnfs_osd_ioerr *dest_err,
  381. const struct pnfs_osd_ioerr *src_err)
  382. {
  383. u64 dest_end, src_end;
  384. if (!dest_err->oer_errno) {
  385. *dest_err = *src_err;
  386. /* accumulated device must be blank */
  387. memset(&dest_err->oer_component.oid_device_id, 0,
  388. sizeof(dest_err->oer_component.oid_device_id));
  389. return;
  390. }
  391. if (dest_err->oer_component.oid_partition_id !=
  392. src_err->oer_component.oid_partition_id)
  393. dest_err->oer_component.oid_partition_id = 0;
  394. if (dest_err->oer_component.oid_object_id !=
  395. src_err->oer_component.oid_object_id)
  396. dest_err->oer_component.oid_object_id = 0;
  397. if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
  398. dest_err->oer_comp_offset = src_err->oer_comp_offset;
  399. dest_end = end_offset(dest_err->oer_comp_offset,
  400. dest_err->oer_comp_length);
  401. src_end = end_offset(src_err->oer_comp_offset,
  402. src_err->oer_comp_length);
  403. if (dest_end < src_end)
  404. dest_end = src_end;
  405. dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
  406. if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
  407. (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
  408. dest_err->oer_errno = src_err->oer_errno;
  409. } else if (src_err->oer_iswrite) {
  410. dest_err->oer_iswrite = true;
  411. dest_err->oer_errno = src_err->oer_errno;
  412. }
  413. }
  414. static void
  415. encode_accumulated_error(struct objlayout *objlay, __be32 *p)
  416. {
  417. struct objlayout_io_res *oir, *tmp;
  418. struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
  419. list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
  420. unsigned i;
  421. for (i = 0; i < oir->num_comps; i++) {
  422. struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
  423. if (!ioerr->oer_errno)
  424. continue;
  425. printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
  426. "is_write=%d dev(%llx:%llx) par=0x%llx "
  427. "obj=0x%llx offset=0x%llx length=0x%llx\n",
  428. __func__, i, ioerr->oer_errno,
  429. ioerr->oer_iswrite,
  430. _DEVID_LO(&ioerr->oer_component.oid_device_id),
  431. _DEVID_HI(&ioerr->oer_component.oid_device_id),
  432. ioerr->oer_component.oid_partition_id,
  433. ioerr->oer_component.oid_object_id,
  434. ioerr->oer_comp_offset,
  435. ioerr->oer_comp_length);
  436. merge_ioerr(&accumulated_err, ioerr);
  437. }
  438. list_del(&oir->err_list);
  439. objio_free_result(oir);
  440. }
  441. pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
  442. }
  443. void
  444. objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
  445. struct xdr_stream *xdr,
  446. const struct nfs4_layoutreturn_args *args)
  447. {
  448. struct objlayout *objlay = OBJLAYOUT(pnfslay);
  449. struct objlayout_io_res *oir, *tmp;
  450. __be32 *start;
  451. dprintk("%s: Begin\n", __func__);
  452. start = xdr_reserve_space(xdr, 4);
  453. BUG_ON(!start);
  454. spin_lock(&objlay->lock);
  455. list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
  456. __be32 *last_xdr = NULL, *p;
  457. unsigned i;
  458. int res = 0;
  459. for (i = 0; i < oir->num_comps; i++) {
  460. struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
  461. if (!ioerr->oer_errno)
  462. continue;
  463. dprintk("%s: err[%d]: errno=%d is_write=%d "
  464. "dev(%llx:%llx) par=0x%llx obj=0x%llx "
  465. "offset=0x%llx length=0x%llx\n",
  466. __func__, i, ioerr->oer_errno,
  467. ioerr->oer_iswrite,
  468. _DEVID_LO(&ioerr->oer_component.oid_device_id),
  469. _DEVID_HI(&ioerr->oer_component.oid_device_id),
  470. ioerr->oer_component.oid_partition_id,
  471. ioerr->oer_component.oid_object_id,
  472. ioerr->oer_comp_offset,
  473. ioerr->oer_comp_length);
  474. p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
  475. if (unlikely(!p)) {
  476. res = -E2BIG;
  477. break; /* accumulated_error */
  478. }
  479. last_xdr = p;
  480. pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
  481. }
  482. /* TODO: use xdr_write_pages */
  483. if (unlikely(res)) {
  484. /* no space for even one error descriptor */
  485. BUG_ON(!last_xdr);
  486. /* we've encountered a situation with lots and lots of
  487. * errors and no space to encode them all. Use the last
  488. * available slot to report the union of all the
  489. * remaining errors.
  490. */
  491. encode_accumulated_error(objlay, last_xdr);
  492. goto loop_done;
  493. }
  494. list_del(&oir->err_list);
  495. objio_free_result(oir);
  496. }
  497. loop_done:
  498. spin_unlock(&objlay->lock);
  499. *start = cpu_to_be32((xdr->p - start - 1) * 4);
  500. dprintk("%s: Return\n", __func__);
  501. }
  502. /*
  503. * Get Device Info API for io engines
  504. */
  505. struct objlayout_deviceinfo {
  506. struct page *page;
  507. struct pnfs_osd_deviceaddr da; /* This must be last */
  508. };
  509. /* Initialize and call nfs_getdeviceinfo, then decode and return a
  510. * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
  511. * should be called.
  512. */
  513. int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
  514. struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
  515. gfp_t gfp_flags)
  516. {
  517. struct objlayout_deviceinfo *odi;
  518. struct pnfs_device pd;
  519. struct page *page, **pages;
  520. u32 *p;
  521. int err;
  522. page = alloc_page(gfp_flags);
  523. if (!page)
  524. return -ENOMEM;
  525. pages = &page;
  526. pd.pages = pages;
  527. memcpy(&pd.dev_id, d_id, sizeof(*d_id));
  528. pd.layout_type = LAYOUT_OSD2_OBJECTS;
  529. pd.pages = &page;
  530. pd.pgbase = 0;
  531. pd.pglen = PAGE_SIZE;
  532. pd.mincount = 0;
  533. err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
  534. dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
  535. if (err)
  536. goto err_out;
  537. p = page_address(page);
  538. odi = kzalloc(sizeof(*odi), gfp_flags);
  539. if (!odi) {
  540. err = -ENOMEM;
  541. goto err_out;
  542. }
  543. pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
  544. odi->page = page;
  545. *deviceaddr = &odi->da;
  546. return 0;
  547. err_out:
  548. __free_page(page);
  549. return err;
  550. }
  551. void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
  552. {
  553. struct objlayout_deviceinfo *odi = container_of(deviceaddr,
  554. struct objlayout_deviceinfo,
  555. da);
  556. __free_page(odi->page);
  557. kfree(odi);
  558. }
  559. enum {
  560. OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
  561. OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
  562. OSD_LOGIN_UPCALL_PATHLEN = 256
  563. };
  564. static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
  565. module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
  566. 0600);
  567. MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
  568. struct __auto_login {
  569. char uri[OBJLAYOUT_MAX_URI_LEN];
  570. char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
  571. char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
  572. };
  573. static int __objlayout_upcall(struct __auto_login *login)
  574. {
  575. static char *envp[] = { "HOME=/",
  576. "TERM=linux",
  577. "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
  578. NULL
  579. };
  580. char *argv[8];
  581. int ret;
  582. if (unlikely(!osd_login_prog[0])) {
  583. dprintk("%s: osd_login_prog is disabled\n", __func__);
  584. return -EACCES;
  585. }
  586. dprintk("%s uri: %s\n", __func__, login->uri);
  587. dprintk("%s osdname %s\n", __func__, login->osdname);
  588. dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
  589. argv[0] = (char *)osd_login_prog;
  590. argv[1] = "-u";
  591. argv[2] = login->uri;
  592. argv[3] = "-o";
  593. argv[4] = login->osdname;
  594. argv[5] = "-s";
  595. argv[6] = login->systemid_hex;
  596. argv[7] = NULL;
  597. ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
  598. /*
  599. * Disable the upcall mechanism if we're getting an ENOENT or
  600. * EACCES error. The admin can re-enable it on the fly by using
  601. * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
  602. * the problem has been fixed.
  603. */
  604. if (ret == -ENOENT || ret == -EACCES) {
  605. printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
  606. "objlayoutdriver.osd_login_prog kernel parameter!\n",
  607. osd_login_prog);
  608. osd_login_prog[0] = '\0';
  609. }
  610. dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
  611. return ret;
  612. }
  613. /* Assume dest is all zeros */
  614. static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
  615. char *dest, int max_len,
  616. const char *var_name)
  617. {
  618. if (!s.len)
  619. return;
  620. if (s.len >= max_len) {
  621. pr_warn_ratelimited(
  622. "objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
  623. var_name, s.len, max_len);
  624. s.len = max_len - 1; /* space for null terminator */
  625. }
  626. memcpy(dest, s.data, s.len);
  627. }
  628. /* Assume sysid is all zeros */
  629. static void _sysid_2_hex(struct nfs4_string s,
  630. char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
  631. {
  632. int i;
  633. char *cur;
  634. if (!s.len)
  635. return;
  636. if (s.len != OSD_SYSTEMID_LEN) {
  637. pr_warn_ratelimited(
  638. "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
  639. s.len);
  640. if (s.len > OSD_SYSTEMID_LEN)
  641. s.len = OSD_SYSTEMID_LEN;
  642. }
  643. cur = sysid;
  644. for (i = 0; i < s.len; i++)
  645. cur = hex_byte_pack(cur, s.data[i]);
  646. }
  647. int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
  648. {
  649. int rc;
  650. struct __auto_login login;
  651. if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
  652. return -ENODEV;
  653. memset(&login, 0, sizeof(login));
  654. __copy_nfsS_and_zero_terminate(
  655. deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
  656. login.uri, sizeof(login.uri), "URI");
  657. __copy_nfsS_and_zero_terminate(
  658. deviceaddr->oda_osdname,
  659. login.osdname, sizeof(login.osdname), "OSDNAME");
  660. _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
  661. rc = __objlayout_upcall(&login);
  662. if (rc > 0) /* script returns positive values */
  663. rc = -ENODEV;
  664. return rc;
  665. }