|
@@ -69,9 +69,127 @@
|
|
|
* array is only concerned with the reply we are assured that we have
|
|
|
* on extra page for the RPCRMDA header.
|
|
|
*/
|
|
|
-static void xdr_to_sge(struct svcxprt_rdma *xprt,
|
|
|
- struct xdr_buf *xdr,
|
|
|
- struct svc_rdma_req_map *vec)
|
|
|
+int fast_reg_xdr(struct svcxprt_rdma *xprt,
|
|
|
+ struct xdr_buf *xdr,
|
|
|
+ struct svc_rdma_req_map *vec)
|
|
|
+{
|
|
|
+ int sge_no;
|
|
|
+ u32 sge_bytes;
|
|
|
+ u32 page_bytes;
|
|
|
+ u32 page_off;
|
|
|
+ int page_no = 0;
|
|
|
+ u8 *frva;
|
|
|
+ struct svc_rdma_fastreg_mr *frmr;
|
|
|
+
|
|
|
+ frmr = svc_rdma_get_frmr(xprt);
|
|
|
+ if (IS_ERR(frmr))
|
|
|
+ return -ENOMEM;
|
|
|
+ vec->frmr = frmr;
|
|
|
+
|
|
|
+ /* Skip the RPCRDMA header */
|
|
|
+ sge_no = 1;
|
|
|
+
|
|
|
+ /* Map the head. */
|
|
|
+ frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
|
|
|
+ vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
|
|
|
+ vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
|
|
|
+ vec->count = 2;
|
|
|
+ sge_no++;
|
|
|
+
|
|
|
+ /* Build the FRMR */
|
|
|
+ frmr->kva = frva;
|
|
|
+ frmr->direction = DMA_TO_DEVICE;
|
|
|
+ frmr->access_flags = 0;
|
|
|
+ frmr->map_len = PAGE_SIZE;
|
|
|
+ frmr->page_list_len = 1;
|
|
|
+ frmr->page_list->page_list[page_no] =
|
|
|
+ ib_dma_map_single(xprt->sc_cm_id->device,
|
|
|
+ (void *)xdr->head[0].iov_base,
|
|
|
+ PAGE_SIZE, DMA_TO_DEVICE);
|
|
|
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
|
|
+ frmr->page_list->page_list[page_no]))
|
|
|
+ goto fatal_err;
|
|
|
+ atomic_inc(&xprt->sc_dma_used);
|
|
|
+
|
|
|
+ page_off = xdr->page_base;
|
|
|
+ page_bytes = xdr->page_len + page_off;
|
|
|
+ if (!page_bytes)
|
|
|
+ goto encode_tail;
|
|
|
+
|
|
|
+ /* Map the pages */
|
|
|
+ vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
|
|
|
+ vec->sge[sge_no].iov_len = page_bytes;
|
|
|
+ sge_no++;
|
|
|
+ while (page_bytes) {
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ page = xdr->pages[page_no++];
|
|
|
+ sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
|
|
|
+ page_bytes -= sge_bytes;
|
|
|
+
|
|
|
+ frmr->page_list->page_list[page_no] =
|
|
|
+ ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
|
|
|
+ PAGE_SIZE, DMA_TO_DEVICE);
|
|
|
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
|
|
+ frmr->page_list->page_list[page_no]))
|
|
|
+ goto fatal_err;
|
|
|
+
|
|
|
+ atomic_inc(&xprt->sc_dma_used);
|
|
|
+ page_off = 0; /* reset for next time through loop */
|
|
|
+ frmr->map_len += PAGE_SIZE;
|
|
|
+ frmr->page_list_len++;
|
|
|
+ }
|
|
|
+ vec->count++;
|
|
|
+
|
|
|
+ encode_tail:
|
|
|
+ /* Map tail */
|
|
|
+ if (0 == xdr->tail[0].iov_len)
|
|
|
+ goto done;
|
|
|
+
|
|
|
+ vec->count++;
|
|
|
+ vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
|
|
|
+
|
|
|
+ if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
|
|
|
+ ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
|
|
|
+ /*
|
|
|
+ * If head and tail use the same page, we don't need
|
|
|
+ * to map it again.
|
|
|
+ */
|
|
|
+ vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
|
|
|
+ } else {
|
|
|
+ void *va;
|
|
|
+
|
|
|
+ /* Map another page for the tail */
|
|
|
+ page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
|
|
|
+ va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
|
|
|
+ vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
|
|
|
+
|
|
|
+ frmr->page_list->page_list[page_no] =
|
|
|
+ ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
|
|
|
+ DMA_TO_DEVICE);
|
|
|
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
|
|
+ frmr->page_list->page_list[page_no]))
|
|
|
+ goto fatal_err;
|
|
|
+ atomic_inc(&xprt->sc_dma_used);
|
|
|
+ frmr->map_len += PAGE_SIZE;
|
|
|
+ frmr->page_list_len++;
|
|
|
+ }
|
|
|
+
|
|
|
+ done:
|
|
|
+ if (svc_rdma_fastreg(xprt, frmr))
|
|
|
+ goto fatal_err;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ fatal_err:
|
|
|
+ printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
|
|
|
+ svc_rdma_put_frmr(xprt, frmr);
|
|
|
+ return -EIO;
|
|
|
+}
|
|
|
+
|
|
|
+static int map_xdr(struct svcxprt_rdma *xprt,
|
|
|
+ struct xdr_buf *xdr,
|
|
|
+ struct svc_rdma_req_map *vec)
|
|
|
{
|
|
|
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
|
|
|
int sge_no;
|
|
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
|
|
|
BUG_ON(xdr->len !=
|
|
|
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
|
|
|
|
|
|
+ if (xprt->sc_frmr_pg_list_len)
|
|
|
+ return fast_reg_xdr(xprt, xdr, vec);
|
|
|
+
|
|
|
/* Skip the first sge, this is for the RPCRDMA header */
|
|
|
sge_no = 1;
|
|
|
|
|
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
|
|
|
|
|
|
BUG_ON(sge_no > sge_max);
|
|
|
vec->count = sge_no;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
/* Assumptions:
|
|
|
+ * - We are using FRMR
|
|
|
+ * - or -
|
|
|
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
|
|
|
*/
|
|
|
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
|
|
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
|
|
|
sge_no = 0;
|
|
|
|
|
|
/* Copy the remaining SGE */
|
|
|
- while (bc != 0 && xdr_sge_no < vec->count) {
|
|
|
- sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
|
|
|
- sge_bytes = min((size_t)bc,
|
|
|
- (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
|
|
|
+ while (bc != 0) {
|
|
|
+ sge_bytes = min_t(size_t,
|
|
|
+ bc, vec->sge[xdr_sge_no].iov_len-sge_off);
|
|
|
sge[sge_no].length = sge_bytes;
|
|
|
- atomic_inc(&xprt->sc_dma_used);
|
|
|
- sge[sge_no].addr =
|
|
|
- ib_dma_map_single(xprt->sc_cm_id->device,
|
|
|
- (void *)
|
|
|
- vec->sge[xdr_sge_no].iov_base + sge_off,
|
|
|
- sge_bytes, DMA_TO_DEVICE);
|
|
|
- if (dma_mapping_error(xprt->sc_cm_id->device->dma_device,
|
|
|
- sge[sge_no].addr))
|
|
|
- goto err;
|
|
|
+ if (!vec->frmr) {
|
|
|
+ sge[sge_no].addr =
|
|
|
+ ib_dma_map_single(xprt->sc_cm_id->device,
|
|
|
+ (void *)
|
|
|
+ vec->sge[xdr_sge_no].iov_base + sge_off,
|
|
|
+ sge_bytes, DMA_TO_DEVICE);
|
|
|
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
|
|
+ sge[sge_no].addr))
|
|
|
+ goto err;
|
|
|
+ atomic_inc(&xprt->sc_dma_used);
|
|
|
+ sge[sge_no].lkey = xprt->sc_dma_lkey;
|
|
|
+ } else {
|
|
|
+ sge[sge_no].addr = (unsigned long)
|
|
|
+ vec->sge[xdr_sge_no].iov_base + sge_off;
|
|
|
+ sge[sge_no].lkey = vec->frmr->mr->lkey;
|
|
|
+ }
|
|
|
+ ctxt->count++;
|
|
|
+ ctxt->frmr = vec->frmr;
|
|
|
sge_off = 0;
|
|
|
sge_no++;
|
|
|
- ctxt->count++;
|
|
|
xdr_sge_no++;
|
|
|
+ BUG_ON(xdr_sge_no > vec->count);
|
|
|
bc -= sge_bytes;
|
|
|
}
|
|
|
|
|
|
- BUG_ON(bc != 0);
|
|
|
- BUG_ON(xdr_sge_no > vec->count);
|
|
|
-
|
|
|
/* Prepare WRITE WR */
|
|
|
memset(&write_wr, 0, sizeof write_wr);
|
|
|
ctxt->wr_op = IB_WR_RDMA_WRITE;
|
|
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
|
|
|
res_ary = (struct rpcrdma_write_array *)
|
|
|
&rdma_resp->rm_body.rm_chunks[1];
|
|
|
|
|
|
- max_write = xprt->sc_max_sge * PAGE_SIZE;
|
|
|
+ if (vec->frmr)
|
|
|
+ max_write = vec->frmr->map_len;
|
|
|
+ else
|
|
|
+ max_write = xprt->sc_max_sge * PAGE_SIZE;
|
|
|
|
|
|
/* Write chunks start at the pagelist */
|
|
|
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
|
|
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
|
|
|
res_ary = (struct rpcrdma_write_array *)
|
|
|
&rdma_resp->rm_body.rm_chunks[2];
|
|
|
|
|
|
- max_write = xprt->sc_max_sge * PAGE_SIZE;
|
|
|
+ if (vec->frmr)
|
|
|
+ max_write = vec->frmr->map_len;
|
|
|
+ else
|
|
|
+ max_write = xprt->sc_max_sge * PAGE_SIZE;
|
|
|
|
|
|
/* xdr offset starts at RPC message */
|
|
|
for (xdr_off = 0, chunk_no = 0;
|
|
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
|
|
|
ch = &arg_ary->wc_array[chunk_no].wc_target;
|
|
|
write_len = min(xfer_len, ch->rs_length);
|
|
|
|
|
|
-
|
|
|
/* Prepare the reply chunk given the length actually
|
|
|
* written */
|
|
|
rs_offset = get_unaligned(&(ch->rs_offset));
|
|
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
|
|
int byte_count)
|
|
|
{
|
|
|
struct ib_send_wr send_wr;
|
|
|
+ struct ib_send_wr inv_wr;
|
|
|
int sge_no;
|
|
|
int sge_bytes;
|
|
|
int page_no;
|
|
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
|
|
/* Prepare the context */
|
|
|
ctxt->pages[0] = page;
|
|
|
ctxt->count = 1;
|
|
|
+ ctxt->frmr = vec->frmr;
|
|
|
+ if (vec->frmr)
|
|
|
+ set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
|
|
|
+ else
|
|
|
+ clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
|
|
|
|
|
|
/* Prepare the SGE for the RPCRDMA Header */
|
|
|
- atomic_inc(&rdma->sc_dma_used);
|
|
|
ctxt->sge[0].addr =
|
|
|
ib_dma_map_page(rdma->sc_cm_id->device,
|
|
|
page, 0, PAGE_SIZE, DMA_TO_DEVICE);
|
|
|
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
|
|
|
+ goto err;
|
|
|
+ atomic_inc(&rdma->sc_dma_used);
|
|
|
+
|
|
|
ctxt->direction = DMA_TO_DEVICE;
|
|
|
+
|
|
|
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
|
|
|
- ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
|
|
|
+ ctxt->sge[0].lkey = rdma->sc_dma_lkey;
|
|
|
|
|
|
/* Determine how many of our SGE are to be transmitted */
|
|
|
for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
|
|
|
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
|
|
|
byte_count -= sge_bytes;
|
|
|
- atomic_inc(&rdma->sc_dma_used);
|
|
|
- ctxt->sge[sge_no].addr =
|
|
|
- ib_dma_map_single(rdma->sc_cm_id->device,
|
|
|
- vec->sge[sge_no].iov_base,
|
|
|
- sge_bytes, DMA_TO_DEVICE);
|
|
|
+ if (!vec->frmr) {
|
|
|
+ ctxt->sge[sge_no].addr =
|
|
|
+ ib_dma_map_single(rdma->sc_cm_id->device,
|
|
|
+ vec->sge[sge_no].iov_base,
|
|
|
+ sge_bytes, DMA_TO_DEVICE);
|
|
|
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device,
|
|
|
+ ctxt->sge[sge_no].addr))
|
|
|
+ goto err;
|
|
|
+ atomic_inc(&rdma->sc_dma_used);
|
|
|
+ ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
|
|
|
+ } else {
|
|
|
+ ctxt->sge[sge_no].addr = (unsigned long)
|
|
|
+ vec->sge[sge_no].iov_base;
|
|
|
+ ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
|
|
|
+ }
|
|
|
ctxt->sge[sge_no].length = sge_bytes;
|
|
|
- ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
|
|
|
}
|
|
|
BUG_ON(byte_count != 0);
|
|
|
|
|
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
|
|
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
|
|
|
ctxt->count++;
|
|
|
rqstp->rq_respages[page_no] = NULL;
|
|
|
- /* If there are more pages than SGE, terminate SGE list */
|
|
|
+ /*
|
|
|
+ * If there are more pages than SGE, terminate SGE
|
|
|
+ * list so that svc_rdma_unmap_dma doesn't attempt to
|
|
|
+ * unmap garbage.
|
|
|
+ */
|
|
|
if (page_no+1 >= sge_no)
|
|
|
ctxt->sge[page_no+1].length = 0;
|
|
|
}
|
|
|
BUG_ON(sge_no > rdma->sc_max_sge);
|
|
|
+ BUG_ON(sge_no > ctxt->count);
|
|
|
memset(&send_wr, 0, sizeof send_wr);
|
|
|
ctxt->wr_op = IB_WR_SEND;
|
|
|
send_wr.wr_id = (unsigned long)ctxt;
|
|
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
|
|
send_wr.num_sge = sge_no;
|
|
|
send_wr.opcode = IB_WR_SEND;
|
|
|
send_wr.send_flags = IB_SEND_SIGNALED;
|
|
|
+ if (vec->frmr) {
|
|
|
+ /* Prepare INVALIDATE WR */
|
|
|
+ memset(&inv_wr, 0, sizeof inv_wr);
|
|
|
+ inv_wr.opcode = IB_WR_LOCAL_INV;
|
|
|
+ inv_wr.send_flags = IB_SEND_SIGNALED;
|
|
|
+ inv_wr.ex.invalidate_rkey =
|
|
|
+ vec->frmr->mr->lkey;
|
|
|
+ send_wr.next = &inv_wr;
|
|
|
+ }
|
|
|
|
|
|
ret = svc_rdma_send(rdma, &send_wr);
|
|
|
if (ret)
|
|
|
- svc_rdma_put_context(ctxt, 1);
|
|
|
+ goto err;
|
|
|
|
|
|
- return ret;
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ err:
|
|
|
+ svc_rdma_put_frmr(rdma, vec->frmr);
|
|
|
+ svc_rdma_put_context(ctxt, 1);
|
|
|
+ return -EIO;
|
|
|
}
|
|
|
|
|
|
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
|
|
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
|
|
ctxt = svc_rdma_get_context(rdma);
|
|
|
ctxt->direction = DMA_TO_DEVICE;
|
|
|
vec = svc_rdma_get_req_map();
|
|
|
- xdr_to_sge(rdma, &rqstp->rq_res, vec);
|
|
|
-
|
|
|
+ ret = map_xdr(rdma, &rqstp->rq_res, vec);
|
|
|
+ if (ret)
|
|
|
+ goto err0;
|
|
|
inline_bytes = rqstp->rq_res.len;
|
|
|
|
|
|
/* Create the RDMA response header */
|
|
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
|
|
if (ret < 0) {
|
|
|
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
|
|
|
ret);
|
|
|
- goto error;
|
|
|
+ goto err1;
|
|
|
}
|
|
|
inline_bytes -= ret;
|
|
|
|
|
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
|
|
if (ret < 0) {
|
|
|
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
|
|
|
ret);
|
|
|
- goto error;
|
|
|
+ goto err1;
|
|
|
}
|
|
|
inline_bytes -= ret;
|
|
|
|
|
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
|
|
svc_rdma_put_req_map(vec);
|
|
|
dprintk("svcrdma: send_reply returns %d\n", ret);
|
|
|
return ret;
|
|
|
- error:
|
|
|
+
|
|
|
+ err1:
|
|
|
+ put_page(res_page);
|
|
|
+ err0:
|
|
|
svc_rdma_put_req_map(vec);
|
|
|
svc_rdma_put_context(ctxt, 0);
|
|
|
- put_page(res_page);
|
|
|
return ret;
|
|
|
}
|