objlayout.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. /*
  2. * pNFS Objects layout driver high level definitions
  3. *
  4. * Copyright (C) 2007 Panasas Inc. [year of first publication]
  5. * All rights reserved.
  6. *
  7. * Benny Halevy <bhalevy@panasas.com>
  8. * Boaz Harrosh <bharrosh@panasas.com>
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License version 2
  12. * See the file COPYING included with this distribution for more details.
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. *
  18. * 1. Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * 2. Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in the
  22. * documentation and/or other materials provided with the distribution.
  23. * 3. Neither the name of the Panasas company nor the names of its
  24. * contributors may be used to endorse or promote products derived
  25. * from this software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  28. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  29. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  30. * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  31. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  34. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  35. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  36. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  37. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. */
  39. #include <linux/kmod.h>
  40. #include <linux/moduleparam.h>
  41. #include <linux/ratelimit.h>
  42. #include <scsi/osd_initiator.h>
  43. #include "objlayout.h"
  44. #define NFSDBG_FACILITY NFSDBG_PNFS_LD
  45. /*
  46. * Create a objlayout layout structure for the given inode and return it.
  47. */
  48. struct pnfs_layout_hdr *
  49. objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
  50. {
  51. struct objlayout *objlay;
  52. objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
  53. if (objlay) {
  54. spin_lock_init(&objlay->lock);
  55. INIT_LIST_HEAD(&objlay->err_list);
  56. }
  57. dprintk("%s: Return %p\n", __func__, objlay);
  58. return &objlay->pnfs_layout;
  59. }
  60. /*
  61. * Free an objlayout layout structure
  62. */
  63. void
  64. objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
  65. {
  66. struct objlayout *objlay = OBJLAYOUT(lo);
  67. dprintk("%s: objlay %p\n", __func__, objlay);
  68. WARN_ON(!list_empty(&objlay->err_list));
  69. kfree(objlay);
  70. }
  71. /*
  72. * Unmarshall layout and store it in pnfslay.
  73. */
  74. struct pnfs_layout_segment *
  75. objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
  76. struct nfs4_layoutget_res *lgr,
  77. gfp_t gfp_flags)
  78. {
  79. int status = -ENOMEM;
  80. struct xdr_stream stream;
  81. struct xdr_buf buf = {
  82. .pages = lgr->layoutp->pages,
  83. .page_len = lgr->layoutp->len,
  84. .buflen = lgr->layoutp->len,
  85. .len = lgr->layoutp->len,
  86. };
  87. struct page *scratch;
  88. struct pnfs_layout_segment *lseg;
  89. dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
  90. scratch = alloc_page(gfp_flags);
  91. if (!scratch)
  92. goto err_nofree;
  93. xdr_init_decode(&stream, &buf, NULL);
  94. xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
  95. status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
  96. if (unlikely(status)) {
  97. dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
  98. status);
  99. goto err;
  100. }
  101. __free_page(scratch);
  102. dprintk("%s: Return %p\n", __func__, lseg);
  103. return lseg;
  104. err:
  105. __free_page(scratch);
  106. err_nofree:
  107. dprintk("%s: Err Return=>%d\n", __func__, status);
  108. return ERR_PTR(status);
  109. }
  110. /*
  111. * Free a layout segement
  112. */
  113. void
  114. objlayout_free_lseg(struct pnfs_layout_segment *lseg)
  115. {
  116. dprintk("%s: freeing layout segment %p\n", __func__, lseg);
  117. if (unlikely(!lseg))
  118. return;
  119. objio_free_lseg(lseg);
  120. }
  121. /*
  122. * I/O Operations
  123. */
  124. static inline u64
  125. end_offset(u64 start, u64 len)
  126. {
  127. u64 end;
  128. end = start + len;
  129. return end >= start ? end : NFS4_MAX_UINT64;
  130. }
  131. static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
  132. struct page ***p_pages, unsigned *p_pgbase,
  133. u64 offset, unsigned long count)
  134. {
  135. u64 lseg_end_offset;
  136. BUG_ON(offset < lseg->pls_range.offset);
  137. lseg_end_offset = end_offset(lseg->pls_range.offset,
  138. lseg->pls_range.length);
  139. BUG_ON(offset >= lseg_end_offset);
  140. WARN_ON(offset + count > lseg_end_offset);
  141. if (*p_pgbase > PAGE_SIZE) {
  142. dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
  143. *p_pages += *p_pgbase >> PAGE_SHIFT;
  144. *p_pgbase &= ~PAGE_MASK;
  145. }
  146. }
  147. /*
  148. * I/O done common code
  149. */
  150. static void
  151. objlayout_iodone(struct objlayout_io_res *oir)
  152. {
  153. if (likely(oir->status >= 0)) {
  154. objio_free_result(oir);
  155. } else {
  156. struct objlayout *objlay = oir->objlay;
  157. spin_lock(&objlay->lock);
  158. objlay->delta_space_valid = OBJ_DSU_INVALID;
  159. list_add(&objlay->err_list, &oir->err_list);
  160. spin_unlock(&objlay->lock);
  161. }
  162. }
  163. /*
  164. * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
  165. *
  166. * The @index component IO failed (error returned from target). Register
  167. * the error for later reporting at layout-return.
  168. */
  169. void
  170. objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
  171. struct pnfs_osd_objid *pooid, int osd_error,
  172. u64 offset, u64 length, bool is_write)
  173. {
  174. struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
  175. BUG_ON(index >= oir->num_comps);
  176. if (osd_error) {
  177. ioerr->oer_component = *pooid;
  178. ioerr->oer_comp_offset = offset;
  179. ioerr->oer_comp_length = length;
  180. ioerr->oer_iswrite = is_write;
  181. ioerr->oer_errno = osd_error;
  182. dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
  183. "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
  184. __func__, index, ioerr->oer_errno,
  185. ioerr->oer_iswrite,
  186. _DEVID_LO(&ioerr->oer_component.oid_device_id),
  187. _DEVID_HI(&ioerr->oer_component.oid_device_id),
  188. ioerr->oer_component.oid_partition_id,
  189. ioerr->oer_component.oid_object_id,
  190. ioerr->oer_comp_offset,
  191. ioerr->oer_comp_length);
  192. } else {
  193. /* User need not call if no error is reported */
  194. ioerr->oer_errno = 0;
  195. }
  196. }
  197. /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
  198. * This is because the osd completion is called with ints-off from
  199. * the block layer
  200. */
  201. static void _rpc_read_complete(struct work_struct *work)
  202. {
  203. struct rpc_task *task;
  204. struct nfs_read_data *rdata;
  205. dprintk("%s enter\n", __func__);
  206. task = container_of(work, struct rpc_task, u.tk_work);
  207. rdata = container_of(task, struct nfs_read_data, task);
  208. pnfs_ld_read_done(rdata);
  209. }
  210. void
  211. objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  212. {
  213. struct nfs_read_data *rdata = oir->rpcdata;
  214. oir->status = rdata->task.tk_status = status;
  215. if (status >= 0)
  216. rdata->res.count = status;
  217. else
  218. rdata->header->pnfs_error = status;
  219. objlayout_iodone(oir);
  220. /* must not use oir after this point */
  221. dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
  222. status, rdata->res.eof, sync);
  223. if (sync)
  224. pnfs_ld_read_done(rdata);
  225. else {
  226. INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
  227. schedule_work(&rdata->task.u.tk_work);
  228. }
  229. }
  230. /*
  231. * Perform sync or async reads.
  232. */
  233. enum pnfs_try_status
  234. objlayout_read_pagelist(struct nfs_read_data *rdata)
  235. {
  236. struct nfs_pgio_header *hdr = rdata->header;
  237. struct inode *inode = hdr->inode;
  238. loff_t offset = rdata->args.offset;
  239. size_t count = rdata->args.count;
  240. int err;
  241. loff_t eof;
  242. eof = i_size_read(inode);
  243. if (unlikely(offset + count > eof)) {
  244. if (offset >= eof) {
  245. err = 0;
  246. rdata->res.count = 0;
  247. rdata->res.eof = 1;
  248. /*FIXME: do we need to call pnfs_ld_read_done() */
  249. goto out;
  250. }
  251. count = eof - offset;
  252. }
  253. rdata->res.eof = (offset + count) >= eof;
  254. _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
  255. &rdata->args.pgbase,
  256. rdata->args.offset, rdata->args.count);
  257. dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
  258. __func__, inode->i_ino, offset, count, rdata->res.eof);
  259. err = objio_read_pagelist(rdata);
  260. out:
  261. if (unlikely(err)) {
  262. hdr->pnfs_error = err;
  263. dprintk("%s: Returned Error %d\n", __func__, err);
  264. return PNFS_NOT_ATTEMPTED;
  265. }
  266. return PNFS_ATTEMPTED;
  267. }
  268. /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
  269. * This is because the osd completion is called with ints-off from
  270. * the block layer
  271. */
  272. static void _rpc_write_complete(struct work_struct *work)
  273. {
  274. struct rpc_task *task;
  275. struct nfs_write_data *wdata;
  276. dprintk("%s enter\n", __func__);
  277. task = container_of(work, struct rpc_task, u.tk_work);
  278. wdata = container_of(task, struct nfs_write_data, task);
  279. pnfs_ld_write_done(wdata);
  280. }
  281. void
  282. objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  283. {
  284. struct nfs_write_data *wdata = oir->rpcdata;
  285. oir->status = wdata->task.tk_status = status;
  286. if (status >= 0) {
  287. wdata->res.count = status;
  288. wdata->verf.committed = oir->committed;
  289. } else {
  290. wdata->header->pnfs_error = status;
  291. }
  292. objlayout_iodone(oir);
  293. /* must not use oir after this point */
  294. dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
  295. status, wdata->verf.committed, sync);
  296. if (sync)
  297. pnfs_ld_write_done(wdata);
  298. else {
  299. INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
  300. schedule_work(&wdata->task.u.tk_work);
  301. }
  302. }
  303. /*
  304. * Perform sync or async writes.
  305. */
  306. enum pnfs_try_status
  307. objlayout_write_pagelist(struct nfs_write_data *wdata,
  308. int how)
  309. {
  310. struct nfs_pgio_header *hdr = wdata->header;
  311. int err;
  312. _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
  313. &wdata->args.pgbase,
  314. wdata->args.offset, wdata->args.count);
  315. err = objio_write_pagelist(wdata, how);
  316. if (unlikely(err)) {
  317. hdr->pnfs_error = err;
  318. dprintk("%s: Returned Error %d\n", __func__, err);
  319. return PNFS_NOT_ATTEMPTED;
  320. }
  321. return PNFS_ATTEMPTED;
  322. }
  323. void
  324. objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
  325. struct xdr_stream *xdr,
  326. const struct nfs4_layoutcommit_args *args)
  327. {
  328. struct objlayout *objlay = OBJLAYOUT(pnfslay);
  329. struct pnfs_osd_layoutupdate lou;
  330. __be32 *start;
  331. dprintk("%s: Begin\n", __func__);
  332. spin_lock(&objlay->lock);
  333. lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
  334. lou.dsu_delta = objlay->delta_space_used;
  335. objlay->delta_space_used = 0;
  336. objlay->delta_space_valid = OBJ_DSU_INIT;
  337. lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
  338. spin_unlock(&objlay->lock);
  339. start = xdr_reserve_space(xdr, 4);
  340. BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
  341. *start = cpu_to_be32((xdr->p - start - 1) * 4);
  342. dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
  343. lou.dsu_delta, lou.olu_ioerr_flag);
  344. }
  345. static int
  346. err_prio(u32 oer_errno)
  347. {
  348. switch (oer_errno) {
  349. case 0:
  350. return 0;
  351. case PNFS_OSD_ERR_RESOURCE:
  352. return OSD_ERR_PRI_RESOURCE;
  353. case PNFS_OSD_ERR_BAD_CRED:
  354. return OSD_ERR_PRI_BAD_CRED;
  355. case PNFS_OSD_ERR_NO_ACCESS:
  356. return OSD_ERR_PRI_NO_ACCESS;
  357. case PNFS_OSD_ERR_UNREACHABLE:
  358. return OSD_ERR_PRI_UNREACHABLE;
  359. case PNFS_OSD_ERR_NOT_FOUND:
  360. return OSD_ERR_PRI_NOT_FOUND;
  361. case PNFS_OSD_ERR_NO_SPACE:
  362. return OSD_ERR_PRI_NO_SPACE;
  363. default:
  364. WARN_ON(1);
  365. /* fallthrough */
  366. case PNFS_OSD_ERR_EIO:
  367. return OSD_ERR_PRI_EIO;
  368. }
  369. }
  370. static void
  371. merge_ioerr(struct pnfs_osd_ioerr *dest_err,
  372. const struct pnfs_osd_ioerr *src_err)
  373. {
  374. u64 dest_end, src_end;
  375. if (!dest_err->oer_errno) {
  376. *dest_err = *src_err;
  377. /* accumulated device must be blank */
  378. memset(&dest_err->oer_component.oid_device_id, 0,
  379. sizeof(dest_err->oer_component.oid_device_id));
  380. return;
  381. }
  382. if (dest_err->oer_component.oid_partition_id !=
  383. src_err->oer_component.oid_partition_id)
  384. dest_err->oer_component.oid_partition_id = 0;
  385. if (dest_err->oer_component.oid_object_id !=
  386. src_err->oer_component.oid_object_id)
  387. dest_err->oer_component.oid_object_id = 0;
  388. if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
  389. dest_err->oer_comp_offset = src_err->oer_comp_offset;
  390. dest_end = end_offset(dest_err->oer_comp_offset,
  391. dest_err->oer_comp_length);
  392. src_end = end_offset(src_err->oer_comp_offset,
  393. src_err->oer_comp_length);
  394. if (dest_end < src_end)
  395. dest_end = src_end;
  396. dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
  397. if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
  398. (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
  399. dest_err->oer_errno = src_err->oer_errno;
  400. } else if (src_err->oer_iswrite) {
  401. dest_err->oer_iswrite = true;
  402. dest_err->oer_errno = src_err->oer_errno;
  403. }
  404. }
  405. static void
  406. encode_accumulated_error(struct objlayout *objlay, __be32 *p)
  407. {
  408. struct objlayout_io_res *oir, *tmp;
  409. struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
  410. list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
  411. unsigned i;
  412. for (i = 0; i < oir->num_comps; i++) {
  413. struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
  414. if (!ioerr->oer_errno)
  415. continue;
  416. printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
  417. "is_write=%d dev(%llx:%llx) par=0x%llx "
  418. "obj=0x%llx offset=0x%llx length=0x%llx\n",
  419. __func__, i, ioerr->oer_errno,
  420. ioerr->oer_iswrite,
  421. _DEVID_LO(&ioerr->oer_component.oid_device_id),
  422. _DEVID_HI(&ioerr->oer_component.oid_device_id),
  423. ioerr->oer_component.oid_partition_id,
  424. ioerr->oer_component.oid_object_id,
  425. ioerr->oer_comp_offset,
  426. ioerr->oer_comp_length);
  427. merge_ioerr(&accumulated_err, ioerr);
  428. }
  429. list_del(&oir->err_list);
  430. objio_free_result(oir);
  431. }
  432. pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
  433. }
  434. void
  435. objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
  436. struct xdr_stream *xdr,
  437. const struct nfs4_layoutreturn_args *args)
  438. {
  439. struct objlayout *objlay = OBJLAYOUT(pnfslay);
  440. struct objlayout_io_res *oir, *tmp;
  441. __be32 *start;
  442. dprintk("%s: Begin\n", __func__);
  443. start = xdr_reserve_space(xdr, 4);
  444. BUG_ON(!start);
  445. spin_lock(&objlay->lock);
  446. list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
  447. __be32 *last_xdr = NULL, *p;
  448. unsigned i;
  449. int res = 0;
  450. for (i = 0; i < oir->num_comps; i++) {
  451. struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
  452. if (!ioerr->oer_errno)
  453. continue;
  454. dprintk("%s: err[%d]: errno=%d is_write=%d "
  455. "dev(%llx:%llx) par=0x%llx obj=0x%llx "
  456. "offset=0x%llx length=0x%llx\n",
  457. __func__, i, ioerr->oer_errno,
  458. ioerr->oer_iswrite,
  459. _DEVID_LO(&ioerr->oer_component.oid_device_id),
  460. _DEVID_HI(&ioerr->oer_component.oid_device_id),
  461. ioerr->oer_component.oid_partition_id,
  462. ioerr->oer_component.oid_object_id,
  463. ioerr->oer_comp_offset,
  464. ioerr->oer_comp_length);
  465. p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
  466. if (unlikely(!p)) {
  467. res = -E2BIG;
  468. break; /* accumulated_error */
  469. }
  470. last_xdr = p;
  471. pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
  472. }
  473. /* TODO: use xdr_write_pages */
  474. if (unlikely(res)) {
  475. /* no space for even one error descriptor */
  476. BUG_ON(!last_xdr);
  477. /* we've encountered a situation with lots and lots of
  478. * errors and no space to encode them all. Use the last
  479. * available slot to report the union of all the
  480. * remaining errors.
  481. */
  482. encode_accumulated_error(objlay, last_xdr);
  483. goto loop_done;
  484. }
  485. list_del(&oir->err_list);
  486. objio_free_result(oir);
  487. }
  488. loop_done:
  489. spin_unlock(&objlay->lock);
  490. *start = cpu_to_be32((xdr->p - start - 1) * 4);
  491. dprintk("%s: Return\n", __func__);
  492. }
  493. /*
  494. * Get Device Info API for io engines
  495. */
  496. struct objlayout_deviceinfo {
  497. struct page *page;
  498. struct pnfs_osd_deviceaddr da; /* This must be last */
  499. };
  500. /* Initialize and call nfs_getdeviceinfo, then decode and return a
  501. * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
  502. * should be called.
  503. */
  504. int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
  505. struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
  506. gfp_t gfp_flags)
  507. {
  508. struct objlayout_deviceinfo *odi;
  509. struct pnfs_device pd;
  510. struct page *page, **pages;
  511. u32 *p;
  512. int err;
  513. page = alloc_page(gfp_flags);
  514. if (!page)
  515. return -ENOMEM;
  516. pages = &page;
  517. pd.pages = pages;
  518. memcpy(&pd.dev_id, d_id, sizeof(*d_id));
  519. pd.layout_type = LAYOUT_OSD2_OBJECTS;
  520. pd.pages = &page;
  521. pd.pgbase = 0;
  522. pd.pglen = PAGE_SIZE;
  523. pd.mincount = 0;
  524. err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
  525. dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
  526. if (err)
  527. goto err_out;
  528. p = page_address(page);
  529. odi = kzalloc(sizeof(*odi), gfp_flags);
  530. if (!odi) {
  531. err = -ENOMEM;
  532. goto err_out;
  533. }
  534. pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
  535. odi->page = page;
  536. *deviceaddr = &odi->da;
  537. return 0;
  538. err_out:
  539. __free_page(page);
  540. return err;
  541. }
  542. void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
  543. {
  544. struct objlayout_deviceinfo *odi = container_of(deviceaddr,
  545. struct objlayout_deviceinfo,
  546. da);
  547. __free_page(odi->page);
  548. kfree(odi);
  549. }
  550. enum {
  551. OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
  552. OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
  553. OSD_LOGIN_UPCALL_PATHLEN = 256
  554. };
  555. static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
  556. module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
  557. 0600);
  558. MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
  559. struct __auto_login {
  560. char uri[OBJLAYOUT_MAX_URI_LEN];
  561. char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
  562. char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
  563. };
  564. static int __objlayout_upcall(struct __auto_login *login)
  565. {
  566. static char *envp[] = { "HOME=/",
  567. "TERM=linux",
  568. "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
  569. NULL
  570. };
  571. char *argv[8];
  572. int ret;
  573. if (unlikely(!osd_login_prog[0])) {
  574. dprintk("%s: osd_login_prog is disabled\n", __func__);
  575. return -EACCES;
  576. }
  577. dprintk("%s uri: %s\n", __func__, login->uri);
  578. dprintk("%s osdname %s\n", __func__, login->osdname);
  579. dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
  580. argv[0] = (char *)osd_login_prog;
  581. argv[1] = "-u";
  582. argv[2] = login->uri;
  583. argv[3] = "-o";
  584. argv[4] = login->osdname;
  585. argv[5] = "-s";
  586. argv[6] = login->systemid_hex;
  587. argv[7] = NULL;
  588. ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
  589. /*
  590. * Disable the upcall mechanism if we're getting an ENOENT or
  591. * EACCES error. The admin can re-enable it on the fly by using
  592. * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
  593. * the problem has been fixed.
  594. */
  595. if (ret == -ENOENT || ret == -EACCES) {
  596. printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
  597. "objlayoutdriver.osd_login_prog kernel parameter!\n",
  598. osd_login_prog);
  599. osd_login_prog[0] = '\0';
  600. }
  601. dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
  602. return ret;
  603. }
  604. /* Assume dest is all zeros */
  605. static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
  606. char *dest, int max_len,
  607. const char *var_name)
  608. {
  609. if (!s.len)
  610. return;
  611. if (s.len >= max_len) {
  612. pr_warn_ratelimited(
  613. "objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
  614. var_name, s.len, max_len);
  615. s.len = max_len - 1; /* space for null terminator */
  616. }
  617. memcpy(dest, s.data, s.len);
  618. }
  619. /* Assume sysid is all zeros */
  620. static void _sysid_2_hex(struct nfs4_string s,
  621. char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
  622. {
  623. int i;
  624. char *cur;
  625. if (!s.len)
  626. return;
  627. if (s.len != OSD_SYSTEMID_LEN) {
  628. pr_warn_ratelimited(
  629. "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
  630. s.len);
  631. if (s.len > OSD_SYSTEMID_LEN)
  632. s.len = OSD_SYSTEMID_LEN;
  633. }
  634. cur = sysid;
  635. for (i = 0; i < s.len; i++)
  636. cur = hex_byte_pack(cur, s.data[i]);
  637. }
  638. int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
  639. {
  640. int rc;
  641. struct __auto_login login;
  642. if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
  643. return -ENODEV;
  644. memset(&login, 0, sizeof(login));
  645. __copy_nfsS_and_zero_terminate(
  646. deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
  647. login.uri, sizeof(login.uri), "URI");
  648. __copy_nfsS_and_zero_terminate(
  649. deviceaddr->oda_osdname,
  650. login.osdname, sizeof(login.osdname), "OSDNAME");
  651. _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
  652. rc = __objlayout_upcall(&login);
  653. if (rc > 0) /* script returns positive values */
  654. rc = -ENODEV;
  655. return rc;
  656. }