12 years ago · 19d2f8e0fb
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -31,3 +31,16 @@ config 9P_FS_POSIX_ACL
 
				 	  If you don't know what Access Control Lists are, say N
			
 
				 
			
 
				 endif
			
 
				+
			
 
				+
			
 
				+config 9P_FS_SECURITY
			
 
				+        bool "9P Security Labels"
			
 
				+        depends on 9P_FS
			
 
				+        help
			
 
				+          Security labels support alternative access control models
			
 
				+          implemented by security modules like SELinux.  This option
			
 
				+          enables an extended attribute handler for file security
			
 
				+          labels in the 9P filesystem.
			
 
				+
			
 
				+          If you are not using a security module that requires using
			
 
				+          extended attributes for file security labels, say N.
			
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -11,7 +11,9 @@ obj-$(CONFIG_9P_FS) := 9p.o
 
				 	v9fs.o \
			
 
				 	fid.o  \
			
 
				 	xattr.o \
			
 
				-	xattr_user.o
			
 
				+	xattr_user.o \
			
 
				+	xattr_trusted.o
			
 
				 
			
 
				 9p-$(CONFIG_9P_FSCACHE) += cache.o
			
 
				 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
			
 
				+9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o
			
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1054,13 +1054,11 @@ static int
 
				 v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
			
 
				 		 struct kstat *stat)
			
 
				 {
			
 
				-	int err;
			
 
				 	struct v9fs_session_info *v9ses;
			
 
				 	struct p9_fid *fid;
			
 
				 	struct p9_wstat *st;
			
 
				 
			
 
				 	p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
			
 
				-	err = -EPERM;
			
 
				 	v9ses = v9fs_dentry2v9ses(dentry);
			
 
				 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
			
 
				 		generic_fillattr(dentry->d_inode, stat);
			
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -167,9 +167,13 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 
				 
			
 
				 const struct xattr_handler *v9fs_xattr_handlers[] = {
			
 
				 	&v9fs_xattr_user_handler,
			
 
				+	&v9fs_xattr_trusted_handler,
			
 
				 #ifdef CONFIG_9P_FS_POSIX_ACL
			
 
				 	&v9fs_xattr_acl_access_handler,
			
 
				 	&v9fs_xattr_acl_default_handler,
			
 
				+#endif
			
 
				+#ifdef CONFIG_9P_FS_SECURITY
			
 
				+	&v9fs_xattr_security_handler,
			
 
				 #endif
			
 
				 	NULL
			
 
				 };
			
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -20,6 +20,8 @@
 
				 
			
 
				 extern const struct xattr_handler *v9fs_xattr_handlers[];
			
 
				 extern struct xattr_handler v9fs_xattr_user_handler;
			
 
				+extern struct xattr_handler v9fs_xattr_trusted_handler;
			
 
				+extern struct xattr_handler v9fs_xattr_security_handler;
			
 
				 extern const struct xattr_handler v9fs_xattr_acl_access_handler;
			
 
				 extern const struct xattr_handler v9fs_xattr_acl_default_handler;
			
 
				 
			
--- a/fs/9p/xattr_security.c
+++ b/fs/9p/xattr_security.c
@@ -0,0 +1,80 @@
 
				+/*
			
 
				+ * Copyright IBM Corporation, 2010
			
 
				+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify it
			
 
				+ * under the terms of version 2.1 of the GNU Lesser General Public License
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/string.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include "xattr.h"
			
 
				+
			
 
				+static int v9fs_xattr_security_get(struct dentry *dentry, const char *name,
			
 
				+			void *buffer, size_t size, int type)
			
 
				+{
			
 
				+	int retval;
			
 
				+	char *full_name;
			
 
				+	size_t name_len;
			
 
				+	size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
			
 
				+
			
 
				+	if (name == NULL)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (strcmp(name, "") == 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	name_len = strlen(name);
			
 
				+	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
			
 
				+	if (!full_name)
			
 
				+		return -ENOMEM;
			
 
				+	memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
			
 
				+	memcpy(full_name+prefix_len, name, name_len);
			
 
				+	full_name[prefix_len + name_len] = '\0';
			
 
				+
			
 
				+	retval = v9fs_xattr_get(dentry, full_name, buffer, size);
			
 
				+	kfree(full_name);
			
 
				+	return retval;
			
 
				+}
			
 
				+
			
 
				+static int v9fs_xattr_security_set(struct dentry *dentry, const char *name,
			
 
				+			const void *value, size_t size, int flags, int type)
			
 
				+{
			
 
				+	int retval;
			
 
				+	char *full_name;
			
 
				+	size_t name_len;
			
 
				+	size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
			
 
				+
			
 
				+	if (name == NULL)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (strcmp(name, "") == 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	name_len = strlen(name);
			
 
				+	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
			
 
				+	if (!full_name)
			
 
				+		return -ENOMEM;
			
 
				+	memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
			
 
				+	memcpy(full_name + prefix_len, name, name_len);
			
 
				+	full_name[prefix_len + name_len] = '\0';
			
 
				+
			
 
				+	retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
			
 
				+	kfree(full_name);
			
 
				+	return retval;
			
 
				+}
			
 
				+
			
 
				+struct xattr_handler v9fs_xattr_security_handler = {
			
 
				+	.prefix	= XATTR_SECURITY_PREFIX,
			
 
				+	.get	= v9fs_xattr_security_get,
			
 
				+	.set	= v9fs_xattr_security_set,
			
 
				+};
			
--- a/fs/9p/xattr_trusted.c
+++ b/fs/9p/xattr_trusted.c
@@ -0,0 +1,80 @@
 
				+/*
			
 
				+ * Copyright IBM Corporation, 2010
			
 
				+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify it
			
 
				+ * under the terms of version 2.1 of the GNU Lesser General Public License
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/string.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include "xattr.h"
			
 
				+
			
 
				+static int v9fs_xattr_trusted_get(struct dentry *dentry, const char *name,
			
 
				+			void *buffer, size_t size, int type)
			
 
				+{
			
 
				+	int retval;
			
 
				+	char *full_name;
			
 
				+	size_t name_len;
			
 
				+	size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
			
 
				+
			
 
				+	if (name == NULL)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (strcmp(name, "") == 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	name_len = strlen(name);
			
 
				+	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
			
 
				+	if (!full_name)
			
 
				+		return -ENOMEM;
			
 
				+	memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
			
 
				+	memcpy(full_name+prefix_len, name, name_len);
			
 
				+	full_name[prefix_len + name_len] = '\0';
			
 
				+
			
 
				+	retval = v9fs_xattr_get(dentry, full_name, buffer, size);
			
 
				+	kfree(full_name);
			
 
				+	return retval;
			
 
				+}
			
 
				+
			
 
				+static int v9fs_xattr_trusted_set(struct dentry *dentry, const char *name,
			
 
				+			const void *value, size_t size, int flags, int type)
			
 
				+{
			
 
				+	int retval;
			
 
				+	char *full_name;
			
 
				+	size_t name_len;
			
 
				+	size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
			
 
				+
			
 
				+	if (name == NULL)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (strcmp(name, "") == 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	name_len = strlen(name);
			
 
				+	full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
			
 
				+	if (!full_name)
			
 
				+		return -ENOMEM;
			
 
				+	memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
			
 
				+	memcpy(full_name + prefix_len, name, name_len);
			
 
				+	full_name[prefix_len + name_len] = '\0';
			
 
				+
			
 
				+	retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
			
 
				+	kfree(full_name);
			
 
				+	return retval;
			
 
				+}
			
 
				+
			
 
				+struct xattr_handler v9fs_xattr_trusted_handler = {
			
 
				+	.prefix	= XATTR_TRUSTED_PREFIX,
			
 
				+	.get	= v9fs_xattr_trusted_get,
			
 
				+	.set	= v9fs_xattr_trusted_set,
			
 
				+};
			
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -26,6 +26,9 @@
 
				 #ifndef NET_9P_TRANSPORT_H
			
 
				 #define NET_9P_TRANSPORT_H
			
 
				 
			
 
				+#define P9_DEF_MIN_RESVPORT	(665U)
			
 
				+#define P9_DEF_MAX_RESVPORT	(1023U)
			
 
				+
			
 
				 /**
			
 
				  * struct p9_trans_module - transport module interface
			
 
				  * @list: used to maintain a list of currently available transports
			
@@ -37,6 +40,8 @@
 
				  * @close: member function to discard a connection on this transport
			
 
				  * @request: member function to issue a request to the transport
			
 
				  * @cancel: member function to cancel a request (if it hasn't been sent)
			
 
				+ * @cancelled: member function to notify that a cancelled request will not
			
 
				+ *             not receive a reply
			
 
				  *
			
 
				  * This is the basic API for a transport module which is registered by the
			
 
				  * transport module with the 9P core network module and used by the client
			
@@ -55,6 +60,7 @@ struct p9_trans_module {
 
				 	void (*close) (struct p9_client *);
			
 
				 	int (*request) (struct p9_client *, struct p9_req_t *req);
			
 
				 	int (*cancel) (struct p9_client *, struct p9_req_t *req);
			
 
				+	int (*cancelled)(struct p9_client *, struct p9_req_t *req);
			
 
				 	int (*zc_request)(struct p9_client *, struct p9_req_t *,
			
 
				 			  char *, char *, int , int, int, int);
			
 
				 };
			
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -204,6 +204,17 @@ free_and_return:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+struct p9_fcall *p9_fcall_alloc(int alloc_msize)
			
 
				+{
			
 
				+	struct p9_fcall *fc;
			
 
				+	fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
			
 
				+	if (!fc)
			
 
				+		return NULL;
			
 
				+	fc->capacity = alloc_msize;
			
 
				+	fc->sdata = (char *) fc + sizeof(struct p9_fcall);
			
 
				+	return fc;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * p9_tag_alloc - lookup/allocate a request by tag
			
 
				  * @c: client session to lookup tag within
			
@@ -256,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
 
				 	col = tag % P9_ROW_MAXTAG;
			
 
				 
			
 
				 	req = &c->reqs[row][col];
			
 
				-	if (!req->tc) {
			
 
				+	if (!req->wq) {
			
 
				 		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
			
 
				-		if (!req->wq) {
			
 
				-			pr_err("Couldn't grow tag array\n");
			
 
				-			return ERR_PTR(-ENOMEM);
			
 
				-		}
			
 
				+		if (!req->wq)
			
 
				+			goto grow_failed;
			
 
				 		init_waitqueue_head(req->wq);
			
 
				-		req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
			
 
				-				  GFP_NOFS);
			
 
				-		req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
			
 
				-				  GFP_NOFS);
			
 
				-		if ((!req->tc) || (!req->rc)) {
			
 
				-			pr_err("Couldn't grow tag array\n");
			
 
				-			kfree(req->tc);
			
 
				-			kfree(req->rc);
			
 
				-			kfree(req->wq);
			
 
				-			req->tc = req->rc = NULL;
			
 
				-			req->wq = NULL;
			
 
				-			return ERR_PTR(-ENOMEM);
			
 
				-		}
			
 
				-		req->tc->capacity = alloc_msize;
			
 
				-		req->rc->capacity = alloc_msize;
			
 
				-		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
			
 
				-		req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
			
 
				 	}
			
 
				 
			
 
				+	if (!req->tc)
			
 
				+		req->tc = p9_fcall_alloc(alloc_msize);
			
 
				+	if (!req->rc)
			
 
				+		req->rc = p9_fcall_alloc(alloc_msize);
			
 
				+	if (!req->tc || !req->rc)
			
 
				+		goto grow_failed;
			
 
				+
			
 
				 	p9pdu_reset(req->tc);
			
 
				 	p9pdu_reset(req->rc);
			
 
				 
			
 
				 	req->tc->tag = tag-1;
			
 
				 	req->status = REQ_STATUS_ALLOC;
			
 
				 
			
 
				-	return &c->reqs[row][col];
			
 
				+	return req;
			
 
				+
			
 
				+grow_failed:
			
 
				+	pr_err("Couldn't grow tag array\n");
			
 
				+	kfree(req->tc);
			
 
				+	kfree(req->rc);
			
 
				+	kfree(req->wq);
			
 
				+	req->tc = req->rc = NULL;
			
 
				+	req->wq = NULL;
			
 
				+	return ERR_PTR(-ENOMEM);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -648,12 +656,20 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
 
				 		return PTR_ERR(req);
			
 
				 
			
 
				 
			
 
				-	/* if we haven't received a response for oldreq,
			
 
				-	   remove it from the list. */
			
 
				+	/*
			
 
				+	 * if we haven't received a response for oldreq,
			
 
				+	 * remove it from the list, and notify the transport
			
 
				+	 * layer that the reply will never arrive.
			
 
				+	 */
			
 
				 	spin_lock(&c->lock);
			
 
				-	if (oldreq->status == REQ_STATUS_FLSH)
			
 
				+	if (oldreq->status == REQ_STATUS_FLSH) {
			
 
				 		list_del(&oldreq->req_list);
			
 
				-	spin_unlock(&c->lock);
			
 
				+		spin_unlock(&c->lock);
			
 
				+		if (c->trans_mod->cancelled)
			
 
				+			c->trans_mod->cancelled(c, req);
			
 
				+	} else {
			
 
				+		spin_unlock(&c->lock);
			
 
				+	}
			
 
				 
			
 
				 	p9_free_req(c, req);
			
 
				 	return 0;
			
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -63,6 +63,7 @@ struct p9_fd_opts {
 
				 	int rfd;
			
 
				 	int wfd;
			
 
				 	u16 port;
			
 
				+	int privport;
			
 
				 };
			
 
				 
			
 
				 /**
			
@@ -87,12 +88,15 @@ struct p9_trans_fd {
 
				 enum {
			
 
				 	/* Options that take integer arguments */
			
 
				 	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
			
 
				+	/* Options that take no arguments */
			
 
				+	Opt_privport,
			
 
				 };
			
 
				 
			
 
				 static const match_table_t tokens = {
			
 
				 	{Opt_port, "port=%u"},
			
 
				 	{Opt_rfdno, "rfdno=%u"},
			
 
				 	{Opt_wfdno, "wfdno=%u"},
			
 
				+	{Opt_privport, "privport"},
			
 
				 	{Opt_err, NULL},
			
 
				 };
			
 
				 
			
@@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock);
 
				 static LIST_HEAD(p9_poll_pending_list);
			
 
				 static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
			
 
				 
			
 
				+static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
			
 
				+static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
			
 
				+
			
 
				 static void p9_mux_poll_stop(struct p9_conn *m)
			
 
				 {
			
 
				 	unsigned long flags;
			
@@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 
				 		if (!*p)
			
 
				 			continue;
			
 
				 		token = match_token(p, tokens, args);
			
 
				-		if (token != Opt_err) {
			
 
				+		if ((token != Opt_err) && (token != Opt_privport)) {
			
 
				 			r = match_int(&args[0], &option);
			
 
				 			if (r < 0) {
			
 
				 				p9_debug(P9_DEBUG_ERROR,
			
@@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
 
				 		case Opt_wfdno:
			
 
				 			opts->wfd = option;
			
 
				 			break;
			
 
				+		case Opt_privport:
			
 
				+			opts->privport = 1;
			
 
				+			break;
			
 
				 		default:
			
 
				 			continue;
			
 
				 		}
			
@@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int p9_bind_privport(struct socket *sock)
			
 
				+{
			
 
				+	struct sockaddr_in cl;
			
 
				+	int port, err = -EINVAL;
			
 
				+
			
 
				+	memset(&cl, 0, sizeof(cl));
			
 
				+	cl.sin_family = AF_INET;
			
 
				+	cl.sin_addr.s_addr = INADDR_ANY;
			
 
				+	for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
			
 
				+		cl.sin_port = htons((ushort)port);
			
 
				+		err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
			
 
				+		if (err != -EADDRINUSE)
			
 
				+			break;
			
 
				+	}
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 static int
			
 
				 p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
			
 
				 {
			
@@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 
				 		return err;
			
 
				 	}
			
 
				 
			
 
				+	if (opts.privport) {
			
 
				+		err = p9_bind_privport(csocket);
			
 
				+		if (err < 0) {
			
 
				+			pr_err("%s (%d): problem binding to privport\n",
			
 
				+			       __func__, task_pid_nr(current));
			
 
				+			sock_release(csocket);
			
 
				+			return err;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	err = csocket->ops->connect(csocket,
			
 
				 				    (struct sockaddr *)&sin_server,
			
 
				 				    sizeof(struct sockaddr_in), 0);
			
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -57,9 +57,7 @@
 
				 #define P9_RDMA_IRD		0
			
 
				 #define P9_RDMA_ORD		0
			
 
				 #define P9_RDMA_TIMEOUT		30000		/* 30 seconds */
			
 
				-#define P9_RDMA_MAXSIZE		(4*4096)	/* Min SGE is 4, so we can
			
 
				-						 * safely advertise a maxsize
			
 
				-						 * of 64k */
			
 
				+#define P9_RDMA_MAXSIZE		(1024*1024)	/* 1MB */
			
 
				 
			
 
				 /**
			
 
				  * struct p9_trans_rdma - RDMA transport instance
			
@@ -75,7 +73,9 @@
 
				  * @sq_depth: The depth of the Send Queue
			
 
				  * @sq_sem: Semaphore for the SQ
			
 
				  * @rq_depth: The depth of the Receive Queue.
			
 
				- * @rq_count: Count of requests in the Receive Queue.
			
 
				+ * @rq_sem: Semaphore for the RQ
			
 
				+ * @excess_rc : Amount of posted Receive Contexts without a pending request.
			
 
				+ *		See rdma_request()
			
 
				  * @addr: The remote peer's address
			
 
				  * @req_lock: Protects the active request list
			
 
				  * @cm_done: Completion event for connection management tracking
			
@@ -100,7 +100,8 @@ struct p9_trans_rdma {
 
				 	int sq_depth;
			
 
				 	struct semaphore sq_sem;
			
 
				 	int rq_depth;
			
 
				-	atomic_t rq_count;
			
 
				+	struct semaphore rq_sem;
			
 
				+	atomic_t excess_rc;
			
 
				 	struct sockaddr_in addr;
			
 
				 	spinlock_t req_lock;
			
 
				 
			
@@ -296,6 +297,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
 
				 	if (!req)
			
 
				 		goto err_out;
			
 
				 
			
 
				+	/* Check that we have not yet received a reply for this request.
			
 
				+	 */
			
 
				+	if (unlikely(req->rc)) {
			
 
				+		pr_err("Duplicate reply for request %d", tag);
			
 
				+		goto err_out;
			
 
				+	}
			
 
				+
			
 
				 	req->rc = c->rc;
			
 
				 	req->status = REQ_STATUS_RCVD;
			
 
				 	p9_client_cb(client, req);
			
@@ -336,8 +344,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
 
				 
			
 
				 		switch (c->wc_op) {
			
 
				 		case IB_WC_RECV:
			
 
				-			atomic_dec(&rdma->rq_count);
			
 
				 			handle_recv(client, rdma, c, wc.status, wc.byte_len);
			
 
				+			up(&rdma->rq_sem);
			
 
				 			break;
			
 
				 
			
 
				 		case IB_WC_SEND:
			
@@ -421,32 +429,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 
				 	struct p9_rdma_context *c = NULL;
			
 
				 	struct p9_rdma_context *rpl_context = NULL;
			
 
				 
			
 
				+	/* When an error occurs between posting the recv and the send,
			
 
				+	 * there will be a receive context posted without a pending request.
			
 
				+	 * Since there is no way to "un-post" it, we remember it and skip
			
 
				+	 * post_recv() for the next request.
			
 
				+	 * So here,
			
 
				+	 * see if we are this `next request' and need to absorb an excess rc.
			
 
				+	 * If yes, then drop and free our own, and do not recv_post().
			
 
				+	 **/
			
 
				+	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
			
 
				+		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
			
 
				+			/* Got one ! */
			
 
				+			kfree(req->rc);
			
 
				+			req->rc = NULL;
			
 
				+			goto dont_need_post_recv;
			
 
				+		} else {
			
 
				+			/* We raced and lost. */
			
 
				+			atomic_inc(&rdma->excess_rc);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	/* Allocate an fcall for the reply */
			
 
				 	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
			
 
				 	if (!rpl_context) {
			
 
				 		err = -ENOMEM;
			
 
				-		goto err_close;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If the request has a buffer, steal it, otherwise
			
 
				-	 * allocate a new one.  Typically, requests should already
			
 
				-	 * have receive buffers allocated and just swap them around
			
 
				-	 */
			
 
				-	if (!req->rc) {
			
 
				-		req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
			
 
				-				  GFP_NOFS);
			
 
				-		if (req->rc) {
			
 
				-			req->rc->sdata = (char *) req->rc +
			
 
				-						sizeof(struct p9_fcall);
			
 
				-			req->rc->capacity = client->msize;
			
 
				-		}
			
 
				+		goto recv_error;
			
 
				 	}
			
 
				 	rpl_context->rc = req->rc;
			
 
				-	if (!rpl_context->rc) {
			
 
				-		err = -ENOMEM;
			
 
				-		goto err_free2;
			
 
				-	}
			
 
				 
			
 
				 	/*
			
 
				 	 * Post a receive buffer for this request. We need to ensure
			
@@ -455,29 +464,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 
				 	 * outstanding request, so we must keep a count to avoid
			
 
				 	 * overflowing the RQ.
			
 
				 	 */
			
 
				-	if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
			
 
				-		err = post_recv(client, rpl_context);
			
 
				-		if (err)
			
 
				-			goto err_free1;
			
 
				-	} else
			
 
				-		atomic_dec(&rdma->rq_count);
			
 
				+	if (down_interruptible(&rdma->rq_sem)) {
			
 
				+		err = -EINTR;
			
 
				+		goto recv_error;
			
 
				+	}
			
 
				 
			
 
				+	err = post_recv(client, rpl_context);
			
 
				+	if (err) {
			
 
				+		p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
			
 
				+		goto recv_error;
			
 
				+	}
			
 
				 	/* remove posted receive buffer from request structure */
			
 
				 	req->rc = NULL;
			
 
				 
			
 
				+dont_need_post_recv:
			
 
				 	/* Post the request */
			
 
				 	c = kmalloc(sizeof *c, GFP_NOFS);
			
 
				 	if (!c) {
			
 
				 		err = -ENOMEM;
			
 
				-		goto err_free1;
			
 
				+		goto send_error;
			
 
				 	}
			
 
				 	c->req = req;
			
 
				 
			
 
				 	c->busa = ib_dma_map_single(rdma->cm_id->device,
			
 
				 				    c->req->tc->sdata, c->req->tc->size,
			
 
				 				    DMA_TO_DEVICE);
			
 
				-	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
			
 
				-		goto error;
			
 
				+	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
			
 
				+		err = -EIO;
			
 
				+		goto send_error;
			
 
				+	}
			
 
				 
			
 
				 	sge.addr = c->busa;
			
 
				 	sge.length = c->req->tc->size;
			
@@ -491,22 +506,32 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 
				 	wr.sg_list = &sge;
			
 
				 	wr.num_sge = 1;
			
 
				 
			
 
				-	if (down_interruptible(&rdma->sq_sem))
			
 
				-		goto error;
			
 
				+	if (down_interruptible(&rdma->sq_sem)) {
			
 
				+		err = -EINTR;
			
 
				+		goto send_error;
			
 
				+	}
			
 
				 
			
 
				-	return ib_post_send(rdma->qp, &wr, &bad_wr);
			
 
				+	err = ib_post_send(rdma->qp, &wr, &bad_wr);
			
 
				+	if (err)
			
 
				+		goto send_error;
			
 
				 
			
 
				- error:
			
 
				+	/* Success */
			
 
				+	return 0;
			
 
				+
			
 
				+ /* Handle errors that happened during or while preparing the send: */
			
 
				+ send_error:
			
 
				 	kfree(c);
			
 
				-	kfree(rpl_context->rc);
			
 
				-	kfree(rpl_context);
			
 
				-	p9_debug(P9_DEBUG_ERROR, "EIO\n");
			
 
				-	return -EIO;
			
 
				- err_free1:
			
 
				-	kfree(rpl_context->rc);
			
 
				- err_free2:
			
 
				+	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
			
 
				+
			
 
				+	/* Ach.
			
 
				+	 *  We did recv_post(), but not send. We have one recv_post in excess.
			
 
				+	 */
			
 
				+	atomic_inc(&rdma->excess_rc);
			
 
				+	return err;
			
 
				+
			
 
				+ /* Handle errors that happened during or while preparing post_recv(): */
			
 
				+ recv_error:
			
 
				 	kfree(rpl_context);
			
 
				- err_close:
			
 
				 	spin_lock_irqsave(&rdma->req_lock, flags);
			
 
				 	if (rdma->state < P9_RDMA_CLOSING) {
			
 
				 		rdma->state = P9_RDMA_CLOSING;
			
@@ -551,7 +576,8 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
 
				 	spin_lock_init(&rdma->req_lock);
			
 
				 	init_completion(&rdma->cm_done);
			
 
				 	sema_init(&rdma->sq_sem, rdma->sq_depth);
			
 
				-	atomic_set(&rdma->rq_count, 0);
			
 
				+	sema_init(&rdma->rq_sem, rdma->rq_depth);
			
 
				+	atomic_set(&rdma->excess_rc, 0);
			
 
				 
			
 
				 	return rdma;
			
 
				 }
			
@@ -562,6 +588,17 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				+/* A request has been fully flushed without a reply.
			
 
				+ * That means we have posted one buffer in excess.
			
 
				+ */
			
 
				+static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
			
 
				+{
			
 
				+	struct p9_trans_rdma *rdma = client->trans;
			
 
				+
			
 
				+	atomic_inc(&rdma->excess_rc);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * trans_create_rdma - Transport method for creating atransport instance
			
 
				  * @client: client instance