123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452 |
- /*
- * pNFS Objects layout driver high level definitions
- *
- * Copyright (C) 2007 Panasas Inc. [year of first publication]
- * All rights reserved.
- *
- * Benny Halevy <bhalevy@panasas.com>
- * Boaz Harrosh <bharrosh@panasas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * See the file COPYING included with this distribution for more details.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the Panasas company nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include <scsi/osd_initiator.h>
- #include "objlayout.h"
- #define NFSDBG_FACILITY NFSDBG_PNFS_LD
- /*
- * Create a objlayout layout structure for the given inode and return it.
- */
- struct pnfs_layout_hdr *
- objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
- {
- struct objlayout *objlay;
- objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
- dprintk("%s: Return %p\n", __func__, objlay);
- return &objlay->pnfs_layout;
- }
- /*
- * Free an objlayout layout structure
- */
- void
- objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
- {
- struct objlayout *objlay = OBJLAYOUT(lo);
- dprintk("%s: objlay %p\n", __func__, objlay);
- kfree(objlay);
- }
- /*
- * Unmarshall layout and store it in pnfslay.
- */
- struct pnfs_layout_segment *
- objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
- struct nfs4_layoutget_res *lgr,
- gfp_t gfp_flags)
- {
- int status = -ENOMEM;
- struct xdr_stream stream;
- struct xdr_buf buf = {
- .pages = lgr->layoutp->pages,
- .page_len = lgr->layoutp->len,
- .buflen = lgr->layoutp->len,
- .len = lgr->layoutp->len,
- };
- struct page *scratch;
- struct pnfs_layout_segment *lseg;
- dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
- scratch = alloc_page(gfp_flags);
- if (!scratch)
- goto err_nofree;
- xdr_init_decode(&stream, &buf, NULL);
- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
- status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
- if (unlikely(status)) {
- dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
- status);
- goto err;
- }
- __free_page(scratch);
- dprintk("%s: Return %p\n", __func__, lseg);
- return lseg;
- err:
- __free_page(scratch);
- err_nofree:
- dprintk("%s: Err Return=>%d\n", __func__, status);
- return ERR_PTR(status);
- }
- /*
- * Free a layout segement
- */
- void
- objlayout_free_lseg(struct pnfs_layout_segment *lseg)
- {
- dprintk("%s: freeing layout segment %p\n", __func__, lseg);
- if (unlikely(!lseg))
- return;
- objio_free_lseg(lseg);
- }
- /*
- * I/O Operations
- */
- static inline u64
- end_offset(u64 start, u64 len)
- {
- u64 end;
- end = start + len;
- return end >= start ? end : NFS4_MAX_UINT64;
- }
- /* last octet in a range */
- static inline u64
- last_byte_offset(u64 start, u64 len)
- {
- u64 end;
- BUG_ON(!len);
- end = start + len;
- return end > start ? end - 1 : NFS4_MAX_UINT64;
- }
- static struct objlayout_io_state *
- objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
- struct page **pages,
- unsigned pgbase,
- loff_t offset,
- size_t count,
- struct pnfs_layout_segment *lseg,
- void *rpcdata,
- gfp_t gfp_flags)
- {
- struct objlayout_io_state *state;
- u64 lseg_end_offset;
- dprintk("%s: allocating io_state\n", __func__);
- if (objio_alloc_io_state(lseg, &state, gfp_flags))
- return NULL;
- BUG_ON(offset < lseg->pls_range.offset);
- lseg_end_offset = end_offset(lseg->pls_range.offset,
- lseg->pls_range.length);
- BUG_ON(offset >= lseg_end_offset);
- if (offset + count > lseg_end_offset) {
- count = lseg->pls_range.length -
- (offset - lseg->pls_range.offset);
- dprintk("%s: truncated count %Zd\n", __func__, count);
- }
- if (pgbase > PAGE_SIZE) {
- pages += pgbase >> PAGE_SHIFT;
- pgbase &= ~PAGE_MASK;
- }
- state->lseg = lseg;
- state->rpcdata = rpcdata;
- state->pages = pages;
- state->pgbase = pgbase;
- state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
- state->offset = offset;
- state->count = count;
- state->sync = 0;
- return state;
- }
- static void
- objlayout_free_io_state(struct objlayout_io_state *state)
- {
- dprintk("%s: freeing io_state\n", __func__);
- if (unlikely(!state))
- return;
- objio_free_io_state(state);
- }
- /*
- * I/O done common code
- */
- static void
- objlayout_iodone(struct objlayout_io_state *state)
- {
- dprintk("%s: state %p status\n", __func__, state);
- objlayout_free_io_state(state);
- }
- /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
- * This is because the osd completion is called with ints-off from
- * the block layer
- */
- static void _rpc_read_complete(struct work_struct *work)
- {
- struct rpc_task *task;
- struct nfs_read_data *rdata;
- dprintk("%s enter\n", __func__);
- task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_read_data, task);
- pnfs_ld_read_done(rdata);
- }
- void
- objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
- {
- int eof = state->eof;
- struct nfs_read_data *rdata;
- state->status = status;
- dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
- rdata = state->rpcdata;
- rdata->task.tk_status = status;
- if (status >= 0) {
- rdata->res.count = status;
- rdata->res.eof = eof;
- }
- objlayout_iodone(state);
- /* must not use state after this point */
- if (sync)
- pnfs_ld_read_done(rdata);
- else {
- INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
- schedule_work(&rdata->task.u.tk_work);
- }
- }
- /*
- * Perform sync or async reads.
- */
- enum pnfs_try_status
- objlayout_read_pagelist(struct nfs_read_data *rdata)
- {
- loff_t offset = rdata->args.offset;
- size_t count = rdata->args.count;
- struct objlayout_io_state *state;
- ssize_t status = 0;
- loff_t eof;
- dprintk("%s: Begin inode %p offset %llu count %d\n",
- __func__, rdata->inode, offset, (int)count);
- eof = i_size_read(rdata->inode);
- if (unlikely(offset + count > eof)) {
- if (offset >= eof) {
- status = 0;
- rdata->res.count = 0;
- rdata->res.eof = 1;
- goto out;
- }
- count = eof - offset;
- }
- state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
- rdata->args.pages, rdata->args.pgbase,
- offset, count,
- rdata->lseg, rdata,
- GFP_KERNEL);
- if (unlikely(!state)) {
- status = -ENOMEM;
- goto out;
- }
- state->eof = state->offset + state->count >= eof;
- status = objio_read_pagelist(state);
- out:
- dprintk("%s: Return status %Zd\n", __func__, status);
- rdata->pnfs_error = status;
- return PNFS_ATTEMPTED;
- }
- /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
- * This is because the osd completion is called with ints-off from
- * the block layer
- */
- static void _rpc_write_complete(struct work_struct *work)
- {
- struct rpc_task *task;
- struct nfs_write_data *wdata;
- dprintk("%s enter\n", __func__);
- task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_write_data, task);
- pnfs_ld_write_done(wdata);
- }
- void
- objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
- bool sync)
- {
- struct nfs_write_data *wdata;
- dprintk("%s: Begin\n", __func__);
- wdata = state->rpcdata;
- state->status = status;
- wdata->task.tk_status = status;
- if (status >= 0) {
- wdata->res.count = status;
- wdata->verf.committed = state->committed;
- dprintk("%s: Return status %d committed %d\n",
- __func__, wdata->task.tk_status,
- wdata->verf.committed);
- } else
- dprintk("%s: Return status %d\n",
- __func__, wdata->task.tk_status);
- objlayout_iodone(state);
- /* must not use state after this point */
- if (sync)
- pnfs_ld_write_done(wdata);
- else {
- INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
- schedule_work(&wdata->task.u.tk_work);
- }
- }
- /*
- * Perform sync or async writes.
- */
- enum pnfs_try_status
- objlayout_write_pagelist(struct nfs_write_data *wdata,
- int how)
- {
- struct objlayout_io_state *state;
- ssize_t status;
- dprintk("%s: Begin inode %p offset %llu count %u\n",
- __func__, wdata->inode, wdata->args.offset, wdata->args.count);
- state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
- wdata->args.pages,
- wdata->args.pgbase,
- wdata->args.offset,
- wdata->args.count,
- wdata->lseg, wdata,
- GFP_NOFS);
- if (unlikely(!state)) {
- status = -ENOMEM;
- goto out;
- }
- state->sync = how & FLUSH_SYNC;
- status = objio_write_pagelist(state, how & FLUSH_STABLE);
- out:
- dprintk("%s: Return status %Zd\n", __func__, status);
- wdata->pnfs_error = status;
- return PNFS_ATTEMPTED;
- }
- /*
- * Get Device Info API for io engines
- */
- struct objlayout_deviceinfo {
- struct page *page;
- struct pnfs_osd_deviceaddr da; /* This must be last */
- };
- /* Initialize and call nfs_getdeviceinfo, then decode and return a
- * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
- * should be called.
- */
- int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
- struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
- gfp_t gfp_flags)
- {
- struct objlayout_deviceinfo *odi;
- struct pnfs_device pd;
- struct super_block *sb;
- struct page *page, **pages;
- u32 *p;
- int err;
- page = alloc_page(gfp_flags);
- if (!page)
- return -ENOMEM;
- pages = &page;
- pd.pages = pages;
- memcpy(&pd.dev_id, d_id, sizeof(*d_id));
- pd.layout_type = LAYOUT_OSD2_OBJECTS;
- pd.pages = &page;
- pd.pgbase = 0;
- pd.pglen = PAGE_SIZE;
- pd.mincount = 0;
- sb = pnfslay->plh_inode->i_sb;
- err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
- dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
- if (err)
- goto err_out;
- p = page_address(page);
- odi = kzalloc(sizeof(*odi), gfp_flags);
- if (!odi) {
- err = -ENOMEM;
- goto err_out;
- }
- pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
- odi->page = page;
- *deviceaddr = &odi->da;
- return 0;
- err_out:
- __free_page(page);
- return err;
- }
- void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
- {
- struct objlayout_deviceinfo *odi = container_of(deviceaddr,
- struct objlayout_deviceinfo,
- da);
- __free_page(odi->page);
- kfree(odi);
- }
|