objlayout.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. /*
  2. * pNFS Objects layout driver high level definitions
  3. *
  4. * Copyright (C) 2007 Panasas Inc. [year of first publication]
  5. * All rights reserved.
  6. *
  7. * Benny Halevy <bhalevy@panasas.com>
  8. * Boaz Harrosh <bharrosh@panasas.com>
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License version 2
  12. * See the file COPYING included with this distribution for more details.
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. *
  18. * 1. Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * 2. Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in the
  22. * documentation and/or other materials provided with the distribution.
  23. * 3. Neither the name of the Panasas company nor the names of its
  24. * contributors may be used to endorse or promote products derived
  25. * from this software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  28. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  29. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  30. * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  31. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  34. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  35. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  36. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  37. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. */
  39. #include <scsi/osd_initiator.h>
  40. #include "objlayout.h"
  41. #define NFSDBG_FACILITY NFSDBG_PNFS_LD
  42. /*
  43. * Create a objlayout layout structure for the given inode and return it.
  44. */
  45. struct pnfs_layout_hdr *
  46. objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
  47. {
  48. struct objlayout *objlay;
  49. objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
  50. dprintk("%s: Return %p\n", __func__, objlay);
  51. return &objlay->pnfs_layout;
  52. }
  53. /*
  54. * Free an objlayout layout structure
  55. */
  56. void
  57. objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
  58. {
  59. struct objlayout *objlay = OBJLAYOUT(lo);
  60. dprintk("%s: objlay %p\n", __func__, objlay);
  61. kfree(objlay);
  62. }
  63. /*
  64. * Unmarshall layout and store it in pnfslay.
  65. */
  66. struct pnfs_layout_segment *
  67. objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
  68. struct nfs4_layoutget_res *lgr,
  69. gfp_t gfp_flags)
  70. {
  71. int status = -ENOMEM;
  72. struct xdr_stream stream;
  73. struct xdr_buf buf = {
  74. .pages = lgr->layoutp->pages,
  75. .page_len = lgr->layoutp->len,
  76. .buflen = lgr->layoutp->len,
  77. .len = lgr->layoutp->len,
  78. };
  79. struct page *scratch;
  80. struct pnfs_layout_segment *lseg;
  81. dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
  82. scratch = alloc_page(gfp_flags);
  83. if (!scratch)
  84. goto err_nofree;
  85. xdr_init_decode(&stream, &buf, NULL);
  86. xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
  87. status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
  88. if (unlikely(status)) {
  89. dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
  90. status);
  91. goto err;
  92. }
  93. __free_page(scratch);
  94. dprintk("%s: Return %p\n", __func__, lseg);
  95. return lseg;
  96. err:
  97. __free_page(scratch);
  98. err_nofree:
  99. dprintk("%s: Err Return=>%d\n", __func__, status);
  100. return ERR_PTR(status);
  101. }
  102. /*
  103. * Free a layout segement
  104. */
  105. void
  106. objlayout_free_lseg(struct pnfs_layout_segment *lseg)
  107. {
  108. dprintk("%s: freeing layout segment %p\n", __func__, lseg);
  109. if (unlikely(!lseg))
  110. return;
  111. objio_free_lseg(lseg);
  112. }
  113. /*
  114. * I/O Operations
  115. */
  116. static inline u64
  117. end_offset(u64 start, u64 len)
  118. {
  119. u64 end;
  120. end = start + len;
  121. return end >= start ? end : NFS4_MAX_UINT64;
  122. }
  123. /* last octet in a range */
  124. static inline u64
  125. last_byte_offset(u64 start, u64 len)
  126. {
  127. u64 end;
  128. BUG_ON(!len);
  129. end = start + len;
  130. return end > start ? end - 1 : NFS4_MAX_UINT64;
  131. }
  132. static struct objlayout_io_state *
  133. objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
  134. struct page **pages,
  135. unsigned pgbase,
  136. loff_t offset,
  137. size_t count,
  138. struct pnfs_layout_segment *lseg,
  139. void *rpcdata,
  140. gfp_t gfp_flags)
  141. {
  142. struct objlayout_io_state *state;
  143. u64 lseg_end_offset;
  144. dprintk("%s: allocating io_state\n", __func__);
  145. if (objio_alloc_io_state(lseg, &state, gfp_flags))
  146. return NULL;
  147. BUG_ON(offset < lseg->pls_range.offset);
  148. lseg_end_offset = end_offset(lseg->pls_range.offset,
  149. lseg->pls_range.length);
  150. BUG_ON(offset >= lseg_end_offset);
  151. if (offset + count > lseg_end_offset) {
  152. count = lseg->pls_range.length -
  153. (offset - lseg->pls_range.offset);
  154. dprintk("%s: truncated count %Zd\n", __func__, count);
  155. }
  156. if (pgbase > PAGE_SIZE) {
  157. pages += pgbase >> PAGE_SHIFT;
  158. pgbase &= ~PAGE_MASK;
  159. }
  160. state->lseg = lseg;
  161. state->rpcdata = rpcdata;
  162. state->pages = pages;
  163. state->pgbase = pgbase;
  164. state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
  165. state->offset = offset;
  166. state->count = count;
  167. state->sync = 0;
  168. return state;
  169. }
  170. static void
  171. objlayout_free_io_state(struct objlayout_io_state *state)
  172. {
  173. dprintk("%s: freeing io_state\n", __func__);
  174. if (unlikely(!state))
  175. return;
  176. objio_free_io_state(state);
  177. }
  178. /*
  179. * I/O done common code
  180. */
  181. static void
  182. objlayout_iodone(struct objlayout_io_state *state)
  183. {
  184. dprintk("%s: state %p status\n", __func__, state);
  185. objlayout_free_io_state(state);
  186. }
  187. /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
  188. * This is because the osd completion is called with ints-off from
  189. * the block layer
  190. */
  191. static void _rpc_read_complete(struct work_struct *work)
  192. {
  193. struct rpc_task *task;
  194. struct nfs_read_data *rdata;
  195. dprintk("%s enter\n", __func__);
  196. task = container_of(work, struct rpc_task, u.tk_work);
  197. rdata = container_of(task, struct nfs_read_data, task);
  198. pnfs_ld_read_done(rdata);
  199. }
  200. void
  201. objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
  202. {
  203. int eof = state->eof;
  204. struct nfs_read_data *rdata;
  205. state->status = status;
  206. dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
  207. rdata = state->rpcdata;
  208. rdata->task.tk_status = status;
  209. if (status >= 0) {
  210. rdata->res.count = status;
  211. rdata->res.eof = eof;
  212. }
  213. objlayout_iodone(state);
  214. /* must not use state after this point */
  215. if (sync)
  216. pnfs_ld_read_done(rdata);
  217. else {
  218. INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
  219. schedule_work(&rdata->task.u.tk_work);
  220. }
  221. }
  222. /*
  223. * Perform sync or async reads.
  224. */
  225. enum pnfs_try_status
  226. objlayout_read_pagelist(struct nfs_read_data *rdata)
  227. {
  228. loff_t offset = rdata->args.offset;
  229. size_t count = rdata->args.count;
  230. struct objlayout_io_state *state;
  231. ssize_t status = 0;
  232. loff_t eof;
  233. dprintk("%s: Begin inode %p offset %llu count %d\n",
  234. __func__, rdata->inode, offset, (int)count);
  235. eof = i_size_read(rdata->inode);
  236. if (unlikely(offset + count > eof)) {
  237. if (offset >= eof) {
  238. status = 0;
  239. rdata->res.count = 0;
  240. rdata->res.eof = 1;
  241. goto out;
  242. }
  243. count = eof - offset;
  244. }
  245. state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
  246. rdata->args.pages, rdata->args.pgbase,
  247. offset, count,
  248. rdata->lseg, rdata,
  249. GFP_KERNEL);
  250. if (unlikely(!state)) {
  251. status = -ENOMEM;
  252. goto out;
  253. }
  254. state->eof = state->offset + state->count >= eof;
  255. status = objio_read_pagelist(state);
  256. out:
  257. dprintk("%s: Return status %Zd\n", __func__, status);
  258. rdata->pnfs_error = status;
  259. return PNFS_ATTEMPTED;
  260. }
  261. /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
  262. * This is because the osd completion is called with ints-off from
  263. * the block layer
  264. */
  265. static void _rpc_write_complete(struct work_struct *work)
  266. {
  267. struct rpc_task *task;
  268. struct nfs_write_data *wdata;
  269. dprintk("%s enter\n", __func__);
  270. task = container_of(work, struct rpc_task, u.tk_work);
  271. wdata = container_of(task, struct nfs_write_data, task);
  272. pnfs_ld_write_done(wdata);
  273. }
  274. void
  275. objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
  276. bool sync)
  277. {
  278. struct nfs_write_data *wdata;
  279. dprintk("%s: Begin\n", __func__);
  280. wdata = state->rpcdata;
  281. state->status = status;
  282. wdata->task.tk_status = status;
  283. if (status >= 0) {
  284. wdata->res.count = status;
  285. wdata->verf.committed = state->committed;
  286. dprintk("%s: Return status %d committed %d\n",
  287. __func__, wdata->task.tk_status,
  288. wdata->verf.committed);
  289. } else
  290. dprintk("%s: Return status %d\n",
  291. __func__, wdata->task.tk_status);
  292. objlayout_iodone(state);
  293. /* must not use state after this point */
  294. if (sync)
  295. pnfs_ld_write_done(wdata);
  296. else {
  297. INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
  298. schedule_work(&wdata->task.u.tk_work);
  299. }
  300. }
  301. /*
  302. * Perform sync or async writes.
  303. */
  304. enum pnfs_try_status
  305. objlayout_write_pagelist(struct nfs_write_data *wdata,
  306. int how)
  307. {
  308. struct objlayout_io_state *state;
  309. ssize_t status;
  310. dprintk("%s: Begin inode %p offset %llu count %u\n",
  311. __func__, wdata->inode, wdata->args.offset, wdata->args.count);
  312. state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
  313. wdata->args.pages,
  314. wdata->args.pgbase,
  315. wdata->args.offset,
  316. wdata->args.count,
  317. wdata->lseg, wdata,
  318. GFP_NOFS);
  319. if (unlikely(!state)) {
  320. status = -ENOMEM;
  321. goto out;
  322. }
  323. state->sync = how & FLUSH_SYNC;
  324. status = objio_write_pagelist(state, how & FLUSH_STABLE);
  325. out:
  326. dprintk("%s: Return status %Zd\n", __func__, status);
  327. wdata->pnfs_error = status;
  328. return PNFS_ATTEMPTED;
  329. }
  330. /*
  331. * Get Device Info API for io engines
  332. */
  333. struct objlayout_deviceinfo {
  334. struct page *page;
  335. struct pnfs_osd_deviceaddr da; /* This must be last */
  336. };
  337. /* Initialize and call nfs_getdeviceinfo, then decode and return a
  338. * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
  339. * should be called.
  340. */
  341. int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
  342. struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
  343. gfp_t gfp_flags)
  344. {
  345. struct objlayout_deviceinfo *odi;
  346. struct pnfs_device pd;
  347. struct super_block *sb;
  348. struct page *page, **pages;
  349. u32 *p;
  350. int err;
  351. page = alloc_page(gfp_flags);
  352. if (!page)
  353. return -ENOMEM;
  354. pages = &page;
  355. pd.pages = pages;
  356. memcpy(&pd.dev_id, d_id, sizeof(*d_id));
  357. pd.layout_type = LAYOUT_OSD2_OBJECTS;
  358. pd.pages = &page;
  359. pd.pgbase = 0;
  360. pd.pglen = PAGE_SIZE;
  361. pd.mincount = 0;
  362. sb = pnfslay->plh_inode->i_sb;
  363. err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
  364. dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
  365. if (err)
  366. goto err_out;
  367. p = page_address(page);
  368. odi = kzalloc(sizeof(*odi), gfp_flags);
  369. if (!odi) {
  370. err = -ENOMEM;
  371. goto err_out;
  372. }
  373. pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
  374. odi->page = page;
  375. *deviceaddr = &odi->da;
  376. return 0;
  377. err_out:
  378. __free_page(page);
  379. return err;
  380. }
  381. void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
  382. {
  383. struct objlayout_deviceinfo *odi = container_of(deviceaddr,
  384. struct objlayout_deviceinfo,
  385. da);
  386. __free_page(odi->page);
  387. kfree(odi);
  388. }