Browse Source

Merge branch 'nfs-for-2.6.38' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6

* 'nfs-for-2.6.38' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6: (89 commits)
  NFS fix the setting of exchange id flag
  NFS: Don't use vm_map_ram() in readdir
  NFSv4: Ensure continued open and lockowner name uniqueness
  NFS: Move cl_delegations to the nfs_server struct
  NFS: Introduce nfs_detach_delegations()
  NFS: Move cl_state_owners and related fields to the nfs_server struct
  NFS: Allow walking nfs_client.cl_superblocks list outside client.c
  pnfs: layout roc code
  pnfs: update nfs4_callback_recallany to handle layouts
  pnfs: add CB_LAYOUTRECALL handling
  pnfs: CB_LAYOUTRECALL xdr code
  pnfs: change lo refcounting to atomic_t
  pnfs: check that partial LAYOUTGET return is ignored
  pnfs: add layout to client list before sending rpc
  pnfs: serialize LAYOUTGET(openstateid)
  pnfs: layoutget rpc code cleanup
  pnfs: change how lsegs are removed from layout list
  pnfs: change layout state seqlock to a spinlock
  pnfs: add prefix to struct pnfs_layout_hdr fields
  pnfs: add prefix to struct pnfs_layout_segment fields
  ...
Linus Torvalds 14 years ago
parent
commit
b9d919a4ac
60 changed files with 7440 additions and 3656 deletions
  1. 3 3
      fs/lockd/Makefile
  2. 605 0
      fs/lockd/clnt4xdr.c
  3. 2 2
      fs/lockd/clntlock.c
  4. 9 9
      fs/lockd/clntproc.c
  5. 627 0
      fs/lockd/clntxdr.c
  6. 245 164
      fs/lockd/host.c
  7. 42 68
      fs/lockd/mon.c
  8. 10 10
      fs/lockd/svc4proc.c
  9. 32 2
      fs/lockd/svclock.c
  10. 18 10
      fs/lockd/svcproc.c
  11. 0 287
      fs/lockd/xdr.c
  12. 0 255
      fs/lockd/xdr4.c
  13. 70 13
      fs/nfs/callback.c
  14. 51 8
      fs/nfs/callback.h
  15. 221 105
      fs/nfs/callback_proc.c
  16. 127 16
      fs/nfs/callback_xdr.c
  17. 210 92
      fs/nfs/client.c
  18. 259 103
      fs/nfs/delegation.c
  19. 1 0
      fs/nfs/delegation.h
  20. 36 36
      fs/nfs/dir.c
  21. 1 1
      fs/nfs/idmap.c
  22. 2 1
      fs/nfs/inode.c
  23. 13 6
      fs/nfs/internal.h
  24. 34 49
      fs/nfs/mount_clnt.c
  25. 834 410
      fs/nfs/nfs2xdr.c
  26. 2082 799
      fs/nfs/nfs3xdr.c
  27. 4 9
      fs/nfs/nfs4_fs.h
  28. 3 3
      fs/nfs/nfs4filelayout.c
  29. 128 60
      fs/nfs/nfs4proc.c
  30. 8 3
      fs/nfs/nfs4renewd.c
  31. 213 80
      fs/nfs/nfs4state.c
  32. 286 361
      fs/nfs/nfs4xdr.c
  33. 2 5
      fs/nfs/pagelist.c
  34. 353 171
      fs/nfs/pnfs.c
  35. 61 15
      fs/nfs/pnfs.h
  36. 3 2
      fs/nfs/proc.c
  37. 16 2
      fs/nfs/super.c
  38. 1 1
      fs/nfs/unlink.c
  39. 424 266
      fs/nfsd/nfs4callback.c
  40. 0 10
      include/linux/lockd/debug.h
  41. 4 2
      include/linux/lockd/lockd.h
  42. 3 0
      include/linux/nfs3.h
  43. 6 2
      include/linux/nfs4.h
  44. 9 6
      include/linux/nfs_fs_sb.h
  45. 4 2
      include/linux/nfs_xdr.h
  46. 4 4
      include/linux/sunrpc/auth.h
  47. 14 1
      include/linux/sunrpc/bc_xprt.h
  48. 2 2
      include/linux/sunrpc/clnt.h
  49. 1 1
      include/linux/sunrpc/svc.h
  50. 1 0
      include/linux/sunrpc/svc_xprt.h
  51. 11 3
      include/linux/sunrpc/xdr.h
  52. 24 4
      net/sunrpc/auth.c
  53. 31 13
      net/sunrpc/auth_gss/auth_gss.c
  54. 1 1
      net/sunrpc/bc_svc.c
  55. 6 15
      net/sunrpc/clnt.c
  56. 1 1
      net/sunrpc/rpc_pipe.c
  57. 56 91
      net/sunrpc/rpcb_clnt.c
  58. 17 19
      net/sunrpc/svc.c
  59. 85 21
      net/sunrpc/svcsock.c
  60. 124 31
      net/sunrpc/xdr.c

+ 3 - 3
fs/lockd/Makefile

@@ -4,7 +4,7 @@
 
 
 obj-$(CONFIG_LOCKD) += lockd.o
 obj-$(CONFIG_LOCKD) += lockd.o
 
 
-lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \
-	        svcproc.o svcsubs.o mon.o xdr.o grace.o
-lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o
+lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
+	        svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o
+lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
 lockd-objs		      := $(lockd-objs-y)
 lockd-objs		      := $(lockd-objs-y)

+ 605 - 0
fs/lockd/clnt4xdr.c

@@ -0,0 +1,605 @@
+/*
+ * linux/fs/lockd/clnt4xdr.c
+ *
+ * XDR functions to encode/decode NLM version 4 RPC arguments and results.
+ *
+ * NLM client-side only.
+ *
+ * Copyright (C) 2010, Oracle.  All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/lockd/lockd.h>
+
+#define NLMDBG_FACILITY		NLMDBG_XDR
+
+#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
+#  error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
+#endif
+
+#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
+#  error "NLM host name cannot be larger than NLM's maximum string length!"
+#endif
+
+/*
+ * Declare the space requirements for NLM arguments and replies as
+ * number of 32bit-words
+ */
+#define NLM4_void_sz		(0)
+#define NLM4_cookie_sz		(1+(NLM_MAXCOOKIELEN>>2))
+#define NLM4_caller_sz		(1+(NLMCLNT_OHSIZE>>2))
+#define NLM4_owner_sz		(1+(NLMCLNT_OHSIZE>>2))
+#define NLM4_fhandle_sz		(1+(NFS3_FHSIZE>>2))
+#define NLM4_lock_sz		(5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz)
+#define NLM4_holder_sz		(6+NLM4_owner_sz)
+
+#define NLM4_testargs_sz	(NLM4_cookie_sz+1+NLM4_lock_sz)
+#define NLM4_lockargs_sz	(NLM4_cookie_sz+4+NLM4_lock_sz)
+#define NLM4_cancargs_sz	(NLM4_cookie_sz+2+NLM4_lock_sz)
+#define NLM4_unlockargs_sz	(NLM4_cookie_sz+NLM4_lock_sz)
+
+#define NLM4_testres_sz		(NLM4_cookie_sz+1+NLM4_holder_sz)
+#define NLM4_res_sz		(NLM4_cookie_sz+1)
+#define NLM4_norep_sz		(0)
+
+
+static s64 loff_t_to_s64(loff_t offset)
+{
+	s64 res;
+
+	if (offset >= NLM4_OFFSET_MAX)
+		res = NLM4_OFFSET_MAX;
+	else if (offset <= -NLM4_OFFSET_MAX)
+		res = -NLM4_OFFSET_MAX;
+	else
+		res = offset;
+	return res;
+}
+
+static void nlm4_compute_offsets(const struct nlm_lock *lock,
+				 u64 *l_offset, u64 *l_len)
+{
+	const struct file_lock *fl = &lock->fl;
+
+	BUG_ON(fl->fl_start > NLM4_OFFSET_MAX);
+	BUG_ON(fl->fl_end > NLM4_OFFSET_MAX &&
+				fl->fl_end != OFFSET_MAX);
+
+	*l_offset = loff_t_to_s64(fl->fl_start);
+	if (fl->fl_end == OFFSET_MAX)
+		*l_len = 0;
+	else
+		*l_len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
+}
+
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+	dprintk("lockd: %s prematurely hit the end of our receive buffer. "
+		"Remaining buffer length is %tu words.\n",
+		func, xdr->end - xdr->p);
+}
+
+
+/*
+ * Encode/decode NLMv4 basic data types
+ *
+ * Basic NLMv4 data types are defined in Appendix II, section 6.1.4
+ * of RFC 1813: "NFS Version 3 Protocol Specification" and in Chapter
+ * 10 of X/Open's "Protocols for Interworking: XNFS, Version 3W".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions.  For run-time efficiency, some data types are encoded
+ * or decoded inline.
+ */
+
+static void encode_bool(struct xdr_stream *xdr, const int value)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	*p = value ? xdr_one : xdr_zero;
+}
+
+static void encode_int32(struct xdr_stream *xdr, const s32 value)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	*p = cpu_to_be32(value);
+}
+
+/*
+ *	typedef opaque netobj<MAXNETOBJ_SZ>
+ */
+static void encode_netobj(struct xdr_stream *xdr,
+			  const u8 *data, const unsigned int length)
+{
+	__be32 *p;
+
+	BUG_ON(length > XDR_MAX_NETOBJ);
+	p = xdr_reserve_space(xdr, 4 + length);
+	xdr_encode_opaque(p, data, length);
+}
+
+static int decode_netobj(struct xdr_stream *xdr,
+			 struct xdr_netobj *obj)
+{
+	u32 length;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	length = be32_to_cpup(p++);
+	if (unlikely(length > XDR_MAX_NETOBJ))
+		goto out_size;
+	obj->len = length;
+	obj->data = (u8 *)p;
+	return 0;
+out_size:
+	dprintk("NFS: returned netobj was too long: %u\n", length);
+	return -EIO;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ *	netobj cookie;
+ */
+static void encode_cookie(struct xdr_stream *xdr,
+			  const struct nlm_cookie *cookie)
+{
+	BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
+	encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
+}
+
+static int decode_cookie(struct xdr_stream *xdr,
+			     struct nlm_cookie *cookie)
+{
+	u32 length;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	length = be32_to_cpup(p++);
+	/* apparently HPUX can return empty cookies */
+	if (length == 0)
+		goto out_hpux;
+	if (length > NLM_MAXCOOKIELEN)
+		goto out_size;
+	p = xdr_inline_decode(xdr, length);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	cookie->len = length;
+	memcpy(cookie->data, p, length);
+	return 0;
+out_hpux:
+	cookie->len = 4;
+	memset(cookie->data, 0, 4);
+	return 0;
+out_size:
+	dprintk("NFS: returned cookie was too long: %u\n", length);
+	return -EIO;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ *	netobj fh;
+ */
+static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+	BUG_ON(fh->size > NFS3_FHSIZE);
+	encode_netobj(xdr, (u8 *)&fh->data, fh->size);
+}
+
+/*
+ *	enum nlm4_stats {
+ *		NLM4_GRANTED = 0,
+ *		NLM4_DENIED = 1,
+ *		NLM4_DENIED_NOLOCKS = 2,
+ *		NLM4_BLOCKED = 3,
+ *		NLM4_DENIED_GRACE_PERIOD = 4,
+ *		NLM4_DEADLCK = 5,
+ *		NLM4_ROFS = 6,
+ *		NLM4_STALE_FH = 7,
+ *		NLM4_FBIG = 8,
+ *		NLM4_FAILED = 9
+ *	};
+ *
+ *	struct nlm4_stat {
+ *		nlm4_stats stat;
+ *	};
+ *
+ * NB: we don't swap bytes for the NLM status values.  The upper
+ * layers deal directly with the status value in network byte
+ * order.
+ */
+static void encode_nlm4_stat(struct xdr_stream *xdr,
+			     const __be32 stat)
+{
+	__be32 *p;
+
+	BUG_ON(be32_to_cpu(stat) > NLM_FAILED);
+	p = xdr_reserve_space(xdr, 4);
+	*p = stat;
+}
+
+static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	if (unlikely(*p > nlm4_failed))
+		goto out_bad_xdr;
+	*stat = *p;
+	return 0;
+out_bad_xdr:
+	dprintk("%s: server returned invalid nlm4_stats value: %u\n",
+			__func__, be32_to_cpup(p));
+	return -EIO;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ *	struct nlm4_holder {
+ *		bool	exclusive;
+ *		int32	svid;
+ *		netobj	oh;
+ *		uint64	l_offset;
+ *		uint64	l_len;
+ *	};
+ */
+static void encode_nlm4_holder(struct xdr_stream *xdr,
+			       const struct nlm_res *result)
+{
+	const struct nlm_lock *lock = &result->lock;
+	u64 l_offset, l_len;
+	__be32 *p;
+
+	encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
+	encode_int32(xdr, lock->svid);
+	encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+	p = xdr_reserve_space(xdr, 4 + 4);
+	nlm4_compute_offsets(lock, &l_offset, &l_len);
+	p = xdr_encode_hyper(p, l_offset);
+	xdr_encode_hyper(p, l_len);
+}
+
+static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
+{
+	struct nlm_lock *lock = &result->lock;
+	struct file_lock *fl = &lock->fl;
+	u64 l_offset, l_len;
+	u32 exclusive;
+	int error;
+	__be32 *p;
+	s32 end;
+
+	memset(lock, 0, sizeof(*lock));
+	locks_init_lock(fl);
+
+	p = xdr_inline_decode(xdr, 4 + 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	exclusive = be32_to_cpup(p++);
+	lock->svid = be32_to_cpup(p);
+	fl->fl_pid = (pid_t)lock->svid;
+
+	error = decode_netobj(xdr, &lock->oh);
+	if (unlikely(error))
+		goto out;
+
+	p = xdr_inline_decode(xdr, 8 + 8);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+
+	fl->fl_flags = FL_POSIX;
+	fl->fl_type  = exclusive != 0 ? F_WRLCK : F_RDLCK;
+	p = xdr_decode_hyper(p, &l_offset);
+	xdr_decode_hyper(p, &l_len);
+	end = l_offset + l_len - 1;
+
+	fl->fl_start = (loff_t)l_offset;
+	if (l_len == 0 || end < 0)
+		fl->fl_end = OFFSET_MAX;
+	else
+		fl->fl_end = (loff_t)end;
+	error = 0;
+out:
+	return error;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ *	string caller_name<LM_MAXSTRLEN>;
+ */
+static void encode_caller_name(struct xdr_stream *xdr, const char *name)
+{
+	/* NB: client-side does not set lock->len */
+	u32 length = strlen(name);
+	__be32 *p;
+
+	BUG_ON(length > NLM_MAXSTRLEN);
+	p = xdr_reserve_space(xdr, 4 + length);
+	xdr_encode_opaque(p, name, length);
+}
+
+/*
+ *	struct nlm4_lock {
+ *		string	caller_name<LM_MAXSTRLEN>;
+ *		netobj	fh;
+ *		netobj	oh;
+ *		int32	svid;
+ *		uint64	l_offset;
+ *		uint64	l_len;
+ *	};
+ */
+static void encode_nlm4_lock(struct xdr_stream *xdr,
+			     const struct nlm_lock *lock)
+{
+	u64 l_offset, l_len;
+	__be32 *p;
+
+	encode_caller_name(xdr, lock->caller);
+	encode_fh(xdr, &lock->fh);
+	encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+	p = xdr_reserve_space(xdr, 4 + 8 + 8);
+	*p++ = cpu_to_be32(lock->svid);
+
+	nlm4_compute_offsets(lock, &l_offset, &l_len);
+	p = xdr_encode_hyper(p, l_offset);
+	xdr_encode_hyper(p, l_len);
+}
+
+
+/*
+ * NLMv4 XDR encode functions
+ *
+ * NLMv4 argument types are defined in Appendix II of RFC 1813:
+ * "NFS Version 3 Protocol Specification" and Chapter 10 of X/Open's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ *	struct nlm4_testargs {
+ *		netobj cookie;
+ *		bool exclusive;
+ *		struct nlm4_lock alock;
+ *	};
+ */
+static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nlm_args *args)
+{
+	const struct nlm_lock *lock = &args->lock;
+
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm4_lock(xdr, lock);
+}
+
+/*
+ *	struct nlm4_lockargs {
+ *		netobj cookie;
+ *		bool block;
+ *		bool exclusive;
+ *		struct nlm4_lock alock;
+ *		bool reclaim;
+ *		int state;
+ *	};
+ */
+static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nlm_args *args)
+{
+	const struct nlm_lock *lock = &args->lock;
+
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm4_lock(xdr, lock);
+	encode_bool(xdr, args->reclaim);
+	encode_int32(xdr, args->state);
+}
+
+/*
+ *	struct nlm4_cancargs {
+ *		netobj cookie;
+ *		bool block;
+ *		bool exclusive;
+ *		struct nlm4_lock alock;
+ *	};
+ */
+static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nlm_args *args)
+{
+	const struct nlm_lock *lock = &args->lock;
+
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm4_lock(xdr, lock);
+}
+
+/*
+ *	struct nlm4_unlockargs {
+ *		netobj cookie;
+ *		struct nlm4_lock alock;
+ *	};
+ */
+static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nlm_args *args)
+{
+	const struct nlm_lock *lock = &args->lock;
+
+	encode_cookie(xdr, &args->cookie);
+	encode_nlm4_lock(xdr, lock);
+}
+
+/*
+ *	struct nlm4_res {
+ *		netobj cookie;
+ *		nlm4_stat stat;
+ *	};
+ */
+static void nlm4_xdr_enc_res(struct rpc_rqst *req,
+			     struct xdr_stream *xdr,
+			     const struct nlm_res *result)
+{
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm4_stat(xdr, result->status);
+}
+
+/*
+ *	union nlm4_testrply switch (nlm4_stats stat) {
+ *	case NLM4_DENIED:
+ *		struct nlm4_holder holder;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	struct nlm4_testres {
+ *		netobj cookie;
+ *		nlm4_testrply test_stat;
+ *	};
+ */
+static void nlm4_xdr_enc_testres(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_res *result)
+{
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm4_stat(xdr, result->status);
+	if (result->status == nlm_lck_denied)
+		encode_nlm4_holder(xdr, result);
+}
+
+
+/*
+ * NLMv4 XDR decode functions
+ *
+ * NLMv4 argument types are defined in Appendix II of RFC 1813:
+ * "NFS Version 3 Protocol Specification" and Chapter 10 of X/Open's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ *	union nlm4_testrply switch (nlm4_stats stat) {
+ *	case NLM4_DENIED:
+ *		struct nlm4_holder holder;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	struct nlm4_testres {
+ *		netobj cookie;
+ *		nlm4_testrply test_stat;
+ *	};
+ */
+static int decode_nlm4_testrply(struct xdr_stream *xdr,
+				struct nlm_res *result)
+{
+	int error;
+
+	error = decode_nlm4_stat(xdr, &result->status);
+	if (unlikely(error))
+		goto out;
+	if (result->status == nlm_lck_denied)
+		error = decode_nlm4_holder(xdr, result);
+out:
+	return error;
+}
+
+static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
+				struct nlm_res *result)
+{
+	int error;
+
+	error = decode_cookie(xdr, &result->cookie);
+	if (unlikely(error))
+		goto out;
+	error = decode_nlm4_testrply(xdr, result);
+out:
+	return error;
+}
+
+/*
+ *	struct nlm4_res {
+ *		netobj cookie;
+ *		nlm4_stat stat;
+ *	};
+ */
+static int nlm4_xdr_dec_res(struct rpc_rqst *req,
+			    struct xdr_stream *xdr,
+			    struct nlm_res *result)
+{
+	int error;
+
+	error = decode_cookie(xdr, &result->cookie);
+	if (unlikely(error))
+		goto out;
+	error = decode_nlm4_stat(xdr, &result->status);
+out:
+	return error;
+}
+
+
+/*
+ * For NLM, a void procedure really returns nothing
+ */
+#define nlm4_xdr_dec_norep	NULL
+
+#define PROC(proc, argtype, restype)					\
+[NLMPROC_##proc] = {							\
+	.p_proc      = NLMPROC_##proc,					\
+	.p_encode    = (kxdreproc_t)nlm4_xdr_enc_##argtype,		\
+	.p_decode    = (kxdrdproc_t)nlm4_xdr_dec_##restype,		\
+	.p_arglen    = NLM4_##argtype##_sz,				\
+	.p_replen    = NLM4_##restype##_sz,				\
+	.p_statidx   = NLMPROC_##proc,					\
+	.p_name      = #proc,						\
+	}
+
+static struct rpc_procinfo	nlm4_procedures[] = {
+	PROC(TEST,		testargs,	testres),
+	PROC(LOCK,		lockargs,	res),
+	PROC(CANCEL,		cancargs,	res),
+	PROC(UNLOCK,		unlockargs,	res),
+	PROC(GRANTED,		testargs,	res),
+	PROC(TEST_MSG,		testargs,	norep),
+	PROC(LOCK_MSG,		lockargs,	norep),
+	PROC(CANCEL_MSG,	cancargs,	norep),
+	PROC(UNLOCK_MSG,	unlockargs,	norep),
+	PROC(GRANTED_MSG,	testargs,	norep),
+	PROC(TEST_RES,		testres,	norep),
+	PROC(LOCK_RES,		res,		norep),
+	PROC(CANCEL_RES,	res,		norep),
+	PROC(UNLOCK_RES,	res,		norep),
+	PROC(GRANTED_RES,	res,		norep),
+};
+
+struct rpc_version	nlm_version4 = {
+	.number		= 4,
+	.nrprocs	= ARRAY_SIZE(nlm4_procedures),
+	.procs		= nlm4_procedures,
+};

+ 2 - 2
fs/lockd/clntlock.c

@@ -79,7 +79,7 @@ EXPORT_SYMBOL_GPL(nlmclnt_init);
  */
  */
 void nlmclnt_done(struct nlm_host *host)
 void nlmclnt_done(struct nlm_host *host)
 {
 {
-	nlm_release_host(host);
+	nlmclnt_release_host(host);
 	lockd_down();
 	lockd_down();
 }
 }
 EXPORT_SYMBOL_GPL(nlmclnt_done);
 EXPORT_SYMBOL_GPL(nlmclnt_done);
@@ -273,7 +273,7 @@ restart:
 	spin_unlock(&nlm_blocked_lock);
 	spin_unlock(&nlm_blocked_lock);
 
 
 	/* Release host handle after use */
 	/* Release host handle after use */
-	nlm_release_host(host);
+	nlmclnt_release_host(host);
 	lockd_down();
 	lockd_down();
 	return 0;
 	return 0;
 }
 }

+ 9 - 9
fs/lockd/clntproc.c

@@ -58,7 +58,7 @@ static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
 		return;
 		return;
 	list_del(&lockowner->list);
 	list_del(&lockowner->list);
 	spin_unlock(&lockowner->host->h_lock);
 	spin_unlock(&lockowner->host->h_lock);
-	nlm_release_host(lockowner->host);
+	nlmclnt_release_host(lockowner->host);
 	kfree(lockowner);
 	kfree(lockowner);
 }
 }
 
 
@@ -207,22 +207,22 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 		printk("nlm_alloc_call: failed, waiting for memory\n");
 		printk("nlm_alloc_call: failed, waiting for memory\n");
 		schedule_timeout_interruptible(5*HZ);
 		schedule_timeout_interruptible(5*HZ);
 	}
 	}
-	nlm_release_host(host);
+	nlmclnt_release_host(host);
 	return NULL;
 	return NULL;
 }
 }
 
 
-void nlm_release_call(struct nlm_rqst *call)
+void nlmclnt_release_call(struct nlm_rqst *call)
 {
 {
 	if (!atomic_dec_and_test(&call->a_count))
 	if (!atomic_dec_and_test(&call->a_count))
 		return;
 		return;
-	nlm_release_host(call->a_host);
+	nlmclnt_release_host(call->a_host);
 	nlmclnt_release_lockargs(call);
 	nlmclnt_release_lockargs(call);
 	kfree(call);
 	kfree(call);
 }
 }
 
 
 static void nlmclnt_rpc_release(void *data)
 static void nlmclnt_rpc_release(void *data)
 {
 {
-	nlm_release_call(data);
+	nlmclnt_release_call(data);
 }
 }
 
 
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -436,7 +436,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 			status = nlm_stat_to_errno(req->a_res.status);
 			status = nlm_stat_to_errno(req->a_res.status);
 	}
 	}
 out:
 out:
-	nlm_release_call(req);
+	nlmclnt_release_call(req);
 	return status;
 	return status;
 }
 }
 
 
@@ -593,7 +593,7 @@ again:
 out_unblock:
 out_unblock:
 	nlmclnt_finish_block(block);
 	nlmclnt_finish_block(block);
 out:
 out:
-	nlm_release_call(req);
+	nlmclnt_release_call(req);
 	return status;
 	return status;
 out_unlock:
 out_unlock:
 	/* Fatal error: ensure that we remove the lock altogether */
 	/* Fatal error: ensure that we remove the lock altogether */
@@ -694,7 +694,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	/* What to do now? I'm out of my depth... */
 	/* What to do now? I'm out of my depth... */
 	status = -ENOLCK;
 	status = -ENOLCK;
 out:
 out:
-	nlm_release_call(req);
+	nlmclnt_release_call(req);
 	return status;
 	return status;
 }
 }
 
 
@@ -755,7 +755,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
 			NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 			NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 	if (status == 0 && req->a_res.status == nlm_lck_denied)
 	if (status == 0 && req->a_res.status == nlm_lck_denied)
 		status = -ENOLCK;
 		status = -ENOLCK;
-	nlm_release_call(req);
+	nlmclnt_release_call(req);
 	return status;
 	return status;
 }
 }
 
 

+ 627 - 0
fs/lockd/clntxdr.c

@@ -0,0 +1,627 @@
+/*
+ * linux/fs/lockd/clntxdr.c
+ *
+ * XDR functions to encode/decode NLM version 3 RPC arguments and results.
+ * NLM version 3 is backwards compatible with NLM versions 1 and 2.
+ *
+ * NLM client-side only.
+ *
+ * Copyright (C) 2010, Oracle.  All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/lockd/lockd.h>
+
+#define NLMDBG_FACILITY		NLMDBG_XDR
+
+#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
+#  error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
+#endif
+
+/*
+ * Declare the space requirements for NLM arguments and replies as
+ * number of 32bit-words
+ */
+#define NLM_cookie_sz		(1+(NLM_MAXCOOKIELEN>>2))
+#define NLM_caller_sz		(1+(NLMCLNT_OHSIZE>>2))
+#define NLM_owner_sz		(1+(NLMCLNT_OHSIZE>>2))
+#define NLM_fhandle_sz		(1+(NFS2_FHSIZE>>2))
+#define NLM_lock_sz		(3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz)
+#define NLM_holder_sz		(4+NLM_owner_sz)
+
+#define NLM_testargs_sz		(NLM_cookie_sz+1+NLM_lock_sz)
+#define NLM_lockargs_sz		(NLM_cookie_sz+4+NLM_lock_sz)
+#define NLM_cancargs_sz		(NLM_cookie_sz+2+NLM_lock_sz)
+#define NLM_unlockargs_sz	(NLM_cookie_sz+NLM_lock_sz)
+
+#define NLM_testres_sz		(NLM_cookie_sz+1+NLM_holder_sz)
+#define NLM_res_sz		(NLM_cookie_sz+1)
+#define NLM_norep_sz		(0)
+
+
+static s32 loff_t_to_s32(loff_t offset)
+{
+	s32 res;
+
+	if (offset >= NLM_OFFSET_MAX)
+		res = NLM_OFFSET_MAX;
+	else if (offset <= -NLM_OFFSET_MAX)
+		res = -NLM_OFFSET_MAX;
+	else
+		res = offset;
+	return res;
+}
+
+static void nlm_compute_offsets(const struct nlm_lock *lock,
+				u32 *l_offset, u32 *l_len)
+{
+	const struct file_lock *fl = &lock->fl;
+
+	BUG_ON(fl->fl_start > NLM_OFFSET_MAX);
+	BUG_ON(fl->fl_end > NLM_OFFSET_MAX &&
+				fl->fl_end != OFFSET_MAX);
+
+	*l_offset = loff_t_to_s32(fl->fl_start);
+	if (fl->fl_end == OFFSET_MAX)
+		*l_len = 0;
+	else
+		*l_len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
+}
+
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+	dprintk("lockd: %s prematurely hit the end of our receive buffer. "
+		"Remaining buffer length is %tu words.\n",
+		func, xdr->end - xdr->p);
+}
+
+
+/*
+ * Encode/decode NLMv3 basic data types
+ *
+ * Basic NLMv3 data types are not defined in an IETF standards
+ * document.  X/Open has a description of these data types that
+ * is useful.  See Chapter 10 of "Protocols for Interworking:
+ * XNFS, Version 3W".
+ *
+ * Not all basic data types have their own encoding and decoding
+ * functions.  For run-time efficiency, some data types are encoded
+ * or decoded inline.
+ */
+
+static void encode_bool(struct xdr_stream *xdr, const int value)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	*p = value ? xdr_one : xdr_zero;
+}
+
+static void encode_int32(struct xdr_stream *xdr, const s32 value)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4);
+	*p = cpu_to_be32(value);
+}
+
+/*
+ *	typedef opaque netobj<MAXNETOBJ_SZ>
+ */
+static void encode_netobj(struct xdr_stream *xdr,
+			  const u8 *data, const unsigned int length)
+{
+	__be32 *p;
+
+	BUG_ON(length > XDR_MAX_NETOBJ);
+	p = xdr_reserve_space(xdr, 4 + length);
+	xdr_encode_opaque(p, data, length);
+}
+
+static int decode_netobj(struct xdr_stream *xdr,
+			 struct xdr_netobj *obj)
+{
+	u32 length;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	length = be32_to_cpup(p++);
+	if (unlikely(length > XDR_MAX_NETOBJ))
+		goto out_size;
+	obj->len = length;
+	obj->data = (u8 *)p;
+	return 0;
+out_size:
+	dprintk("NFS: returned netobj was too long: %u\n", length);
+	return -EIO;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ *	netobj cookie;
+ */
+static void encode_cookie(struct xdr_stream *xdr,
+			  const struct nlm_cookie *cookie)
+{
+	BUG_ON(cookie->len > NLM_MAXCOOKIELEN);
+	encode_netobj(xdr, (u8 *)&cookie->data, cookie->len);
+}
+
+static int decode_cookie(struct xdr_stream *xdr,
+			 struct nlm_cookie *cookie)
+{
+	u32 length;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	length = be32_to_cpup(p++);
+	/* apparently HPUX can return empty cookies */
+	if (length == 0)
+		goto out_hpux;
+	if (length > NLM_MAXCOOKIELEN)
+		goto out_size;
+	p = xdr_inline_decode(xdr, length);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	cookie->len = length;
+	memcpy(cookie->data, p, length);
+	return 0;
+out_hpux:
+	cookie->len = 4;
+	memset(cookie->data, 0, 4);
+	return 0;
+out_size:
+	dprintk("NFS: returned cookie was too long: %u\n", length);
+	return -EIO;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ *	netobj fh;
+ */
+static void encode_fh(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+	BUG_ON(fh->size != NFS2_FHSIZE);
+	encode_netobj(xdr, (u8 *)&fh->data, NFS2_FHSIZE);
+}
+
+/*
+ *	enum nlm_stats {
+ *		LCK_GRANTED = 0,
+ *		LCK_DENIED = 1,
+ *		LCK_DENIED_NOLOCKS = 2,
+ *		LCK_BLOCKED = 3,
+ *		LCK_DENIED_GRACE_PERIOD = 4
+ *	};
+ *
+ *
+ *	struct nlm_stat {
+ *		nlm_stats stat;
+ *	};
+ *
+ * NB: we don't swap bytes for the NLM status values.  The upper
+ * layers deal directly with the status value in network byte
+ * order.
+ */
+
+static void encode_nlm_stat(struct xdr_stream *xdr,
+			    const __be32 stat)
+{
+	__be32 *p;
+
+	BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
+	p = xdr_reserve_space(xdr, 4);
+	*p = stat;
+}
+
+static int decode_nlm_stat(struct xdr_stream *xdr,
+			   __be32 *stat)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	if (unlikely(*p > nlm_lck_denied_grace_period))
+		goto out_enum;
+	*stat = *p;
+	return 0;
+out_enum:
+	dprintk("%s: server returned invalid nlm_stats value: %u\n",
+		__func__, be32_to_cpup(p));
+	return -EIO;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ *	struct nlm_holder {
+ *		bool exclusive;
+ *		int uppid;
+ *		netobj oh;
+ *		unsigned l_offset;
+ *		unsigned l_len;
+ *	};
+ */
+static void encode_nlm_holder(struct xdr_stream *xdr,
+			      const struct nlm_res *result)
+{
+	const struct nlm_lock *lock = &result->lock;
+	u32 l_offset, l_len;
+	__be32 *p;
+
+	encode_bool(xdr, lock->fl.fl_type == F_RDLCK);
+	encode_int32(xdr, lock->svid);
+	encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+	p = xdr_reserve_space(xdr, 4 + 4);
+	nlm_compute_offsets(lock, &l_offset, &l_len);
+	*p++ = cpu_to_be32(l_offset);
+	*p   = cpu_to_be32(l_len);
+}
+
+static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
+{
+	struct nlm_lock *lock = &result->lock;
+	struct file_lock *fl = &lock->fl;
+	u32 exclusive, l_offset, l_len;
+	int error;
+	__be32 *p;
+	s32 end;
+
+	memset(lock, 0, sizeof(*lock));
+	locks_init_lock(fl);
+
+	p = xdr_inline_decode(xdr, 4 + 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	exclusive = be32_to_cpup(p++);
+	lock->svid = be32_to_cpup(p);
+	fl->fl_pid = (pid_t)lock->svid;
+
+	error = decode_netobj(xdr, &lock->oh);
+	if (unlikely(error))
+		goto out;
+
+	p = xdr_inline_decode(xdr, 4 + 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+
+	fl->fl_flags = FL_POSIX;
+	fl->fl_type  = exclusive != 0 ? F_WRLCK : F_RDLCK;
+	l_offset = be32_to_cpup(p++);
+	l_len = be32_to_cpup(p);
+	end = l_offset + l_len - 1;
+
+	fl->fl_start = (loff_t)l_offset;
+	if (l_len == 0 || end < 0)
+		fl->fl_end = OFFSET_MAX;
+	else
+		fl->fl_end = (loff_t)end;
+	error = 0;
+out:
+	return error;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ *	string caller_name<LM_MAXSTRLEN>;
+ */
+static void encode_caller_name(struct xdr_stream *xdr, const char *name)
+{
+	/* NB: client-side does not set lock->len */
+	u32 length = strlen(name);
+	__be32 *p;
+
+	BUG_ON(length > NLM_MAXSTRLEN);
+	p = xdr_reserve_space(xdr, 4 + length);
+	xdr_encode_opaque(p, name, length);
+}
+
+/*
+ *	struct nlm_lock {
+ *		string caller_name<LM_MAXSTRLEN>;
+ *		netobj fh;
+ *		netobj oh;
+ *		int uppid;
+ *		unsigned l_offset;
+ *		unsigned l_len;
+ *	};
+ */
+static void encode_nlm_lock(struct xdr_stream *xdr,
+			    const struct nlm_lock *lock)
+{
+	u32 l_offset, l_len;
+	__be32 *p;
+
+	encode_caller_name(xdr, lock->caller);
+	encode_fh(xdr, &lock->fh);
+	encode_netobj(xdr, lock->oh.data, lock->oh.len);
+
+	p = xdr_reserve_space(xdr, 4 + 4 + 4);
+	*p++ = cpu_to_be32(lock->svid);
+
+	nlm_compute_offsets(lock, &l_offset, &l_len);
+	*p++ = cpu_to_be32(l_offset);
+	*p   = cpu_to_be32(l_len);
+}
+
+
+/*
+ * NLMv3 XDR encode functions
+ *
+ * NLMv3 argument types are defined in Chapter 10 of The Open Group's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ *	struct nlm_testargs {
+ *		netobj cookie;
+ *		bool exclusive;
+ *		struct nlm_lock alock;
+ *	};
+ */
+static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_args *args)
+{
+	const struct nlm_lock *lock = &args->lock;
+
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm_lock(xdr, lock);
+}
+
+/*
+ *	struct nlm_lockargs {
+ *		netobj cookie;
+ *		bool block;
+ *		bool exclusive;
+ *		struct nlm_lock alock;
+ *		bool reclaim;
+ *		int state;
+ *	};
+ */
+static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_args *args)
+{
+	const struct nlm_lock *lock = &args->lock;
+
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm_lock(xdr, lock);
+	encode_bool(xdr, args->reclaim);
+	encode_int32(xdr, args->state);
+}
+
+/*
+ *	struct nlm_cancargs {
+ *		netobj cookie;
+ *		bool block;
+ *		bool exclusive;
+ *		struct nlm_lock alock;
+ *	};
+ */
+static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_args *args)
+{
+	const struct nlm_lock *lock = &args->lock;
+
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm_lock(xdr, lock);
+}
+
+/*
+ *	struct nlm_unlockargs {
+ *		netobj cookie;
+ *		struct nlm_lock alock;
+ *	};
+ */
+static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nlm_args *args)
+{
+	const struct nlm_lock *lock = &args->lock;
+
+	encode_cookie(xdr, &args->cookie);
+	encode_nlm_lock(xdr, lock);
+}
+
+/*
+ *	struct nlm_res {
+ *		netobj cookie;
+ *		nlm_stat stat;
+ *	};
+ */
+static void nlm_xdr_enc_res(struct rpc_rqst *req,
+			    struct xdr_stream *xdr,
+			    const struct nlm_res *result)
+{
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm_stat(xdr, result->status);
+}
+
+/*
+ *	union nlm_testrply switch (nlm_stats stat) {
+ *	case LCK_DENIED:
+ *		struct nlm_holder holder;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	struct nlm_testres {
+ *		netobj cookie;
+ *		nlm_testrply test_stat;
+ *	};
+ */
+static void encode_nlm_testrply(struct xdr_stream *xdr,
+				const struct nlm_res *result)
+{
+	if (result->status == nlm_lck_denied)
+		encode_nlm_holder(xdr, result);
+}
+
+static void nlm_xdr_enc_testres(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
+				const struct nlm_res *result)
+{
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm_stat(xdr, result->status);
+	encode_nlm_testrply(xdr, result);
+}
+
+
+/*
+ * NLMv3 XDR decode functions
+ *
+ * NLMv3 result types are defined in Chapter 10 of The Open Group's
+ * "Protocols for Interworking: XNFS, Version 3W".
+ */
+
+/*
+ *	union nlm_testrply switch (nlm_stats stat) {
+ *	case LCK_DENIED:
+ *		struct nlm_holder holder;
+ *	default:
+ *		void;
+ *	};
+ *
+ *	struct nlm_testres {
+ *		netobj cookie;
+ *		nlm_testrply test_stat;
+ *	};
+ */
+static int decode_nlm_testrply(struct xdr_stream *xdr,
+			       struct nlm_res *result)
+{
+	int error;
+
+	error = decode_nlm_stat(xdr, &result->status);
+	if (unlikely(error))
+		goto out;
+	if (result->status == nlm_lck_denied)
+		error = decode_nlm_holder(xdr, result);
+out:
+	return error;
+}
+
+static int nlm_xdr_dec_testres(struct rpc_rqst *req,
+			       struct xdr_stream *xdr,
+			       struct nlm_res *result)
+{
+	int error;
+
+	error = decode_cookie(xdr, &result->cookie);
+	if (unlikely(error))
+		goto out;
+	error = decode_nlm_testrply(xdr, result);
+out:
+	return error;
+}
+
+/*
+ *	struct nlm_res {
+ *		netobj cookie;
+ *		nlm_stat stat;
+ *	};
+ */
+static int nlm_xdr_dec_res(struct rpc_rqst *req,
+			   struct xdr_stream *xdr,
+			   struct nlm_res *result)
+{
+	int error;
+
+	error = decode_cookie(xdr, &result->cookie);
+	if (unlikely(error))
+		goto out;
+	error = decode_nlm_stat(xdr, &result->status);
+out:
+	return error;
+}
+
+
+/*
+ * For NLM, a void procedure really returns nothing
+ */
+#define nlm_xdr_dec_norep	NULL
+
+#define PROC(proc, argtype, restype)	\
+[NLMPROC_##proc] = {							\
+	.p_proc      = NLMPROC_##proc,					\
+	.p_encode    = (kxdreproc_t)nlm_xdr_enc_##argtype,		\
+	.p_decode    = (kxdrdproc_t)nlm_xdr_dec_##restype,		\
+	.p_arglen    = NLM_##argtype##_sz,				\
+	.p_replen    = NLM_##restype##_sz,				\
+	.p_statidx   = NLMPROC_##proc,					\
+	.p_name      = #proc,						\
+	}
+
+static struct rpc_procinfo	nlm_procedures[] = {
+	PROC(TEST,		testargs,	testres),
+	PROC(LOCK,		lockargs,	res),
+	PROC(CANCEL,		cancargs,	res),
+	PROC(UNLOCK,		unlockargs,	res),
+	PROC(GRANTED,		testargs,	res),
+	PROC(TEST_MSG,		testargs,	norep),
+	PROC(LOCK_MSG,		lockargs,	norep),
+	PROC(CANCEL_MSG,	cancargs,	norep),
+	PROC(UNLOCK_MSG,	unlockargs,	norep),
+	PROC(GRANTED_MSG,	testargs,	norep),
+	PROC(TEST_RES,		testres,	norep),
+	PROC(LOCK_RES,		res,		norep),
+	PROC(CANCEL_RES,	res,		norep),
+	PROC(UNLOCK_RES,	res,		norep),
+	PROC(GRANTED_RES,	res,		norep),
+};
+
+static struct rpc_version	nlm_version1 = {
+		.number		= 1,
+		.nrprocs	= ARRAY_SIZE(nlm_procedures),
+		.procs		= nlm_procedures,
+};
+
+static struct rpc_version	nlm_version3 = {
+		.number		= 3,
+		.nrprocs	= ARRAY_SIZE(nlm_procedures),
+		.procs		= nlm_procedures,
+};
+
+static struct rpc_version	*nlm_versions[] = {
+	[1] = &nlm_version1,
+	[3] = &nlm_version3,
+#ifdef CONFIG_LOCKD_V4
+	[4] = &nlm_version4,
+#endif
+};
+
+static struct rpc_stat		nlm_rpc_stats;
+
+struct rpc_program		nlm_program = {
+		.name		= "lockd",
+		.number		= NLM_PROGRAM,
+		.nrvers		= ARRAY_SIZE(nlm_versions),
+		.version	= nlm_versions,
+		.stats		= &nlm_rpc_stats,
+};

+ 245 - 164
fs/lockd/host.c

@@ -25,9 +25,22 @@
 #define NLM_HOST_EXPIRE		(300 * HZ)
 #define NLM_HOST_EXPIRE		(300 * HZ)
 #define NLM_HOST_COLLECT	(120 * HZ)
 #define NLM_HOST_COLLECT	(120 * HZ)
 
 
-static struct hlist_head	nlm_hosts[NLM_HOST_NRHASH];
+static struct hlist_head	nlm_server_hosts[NLM_HOST_NRHASH];
+static struct hlist_head	nlm_client_hosts[NLM_HOST_NRHASH];
+
+#define for_each_host(host, pos, chain, table) \
+	for ((chain) = (table); \
+	     (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
+		hlist_for_each_entry((host), (pos), (chain), h_hash)
+
+#define for_each_host_safe(host, pos, next, chain, table) \
+	for ((chain) = (table); \
+	     (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
+		hlist_for_each_entry_safe((host), (pos), (next), \
+						(chain), h_hash)
+
 static unsigned long		next_gc;
 static unsigned long		next_gc;
-static int			nrhosts;
+static unsigned long		nrhosts;
 static DEFINE_MUTEX(nlm_host_mutex);
 static DEFINE_MUTEX(nlm_host_mutex);
 
 
 static void			nlm_gc_hosts(void);
 static void			nlm_gc_hosts(void);
@@ -40,8 +53,6 @@ struct nlm_lookup_host_info {
 	const u32		version;	/* NLM version to search for */
 	const u32		version;	/* NLM version to search for */
 	const char		*hostname;	/* remote's hostname */
 	const char		*hostname;	/* remote's hostname */
 	const size_t		hostname_len;	/* it's length */
 	const size_t		hostname_len;	/* it's length */
-	const struct sockaddr	*src_sap;	/* our address (optional) */
-	const size_t		src_len;	/* it's length */
 	const int		noresvport;	/* use non-priv port */
 	const int		noresvport;	/* use non-priv port */
 };
 };
 
 
@@ -88,127 +99,83 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap)
 }
 }
 
 
 /*
 /*
- * Common host lookup routine for server & client
+ * Allocate and initialize an nlm_host.  Common to both client and server.
  */
  */
-static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
+static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
+				       struct nsm_handle *nsm)
 {
 {
-	struct hlist_head *chain;
-	struct hlist_node *pos;
-	struct nlm_host	*host;
-	struct nsm_handle *nsm = NULL;
-
-	mutex_lock(&nlm_host_mutex);
+	struct nlm_host *host = NULL;
+	unsigned long now = jiffies;
 
 
-	if (time_after_eq(jiffies, next_gc))
-		nlm_gc_hosts();
-
-	/* We may keep several nlm_host objects for a peer, because each
-	 * nlm_host is identified by
-	 * (address, protocol, version, server/client)
-	 * We could probably simplify this a little by putting all those
-	 * different NLM rpc_clients into one single nlm_host object.
-	 * This would allow us to have one nlm_host per address.
-	 */
-	chain = &nlm_hosts[nlm_hash_address(ni->sap)];
-	hlist_for_each_entry(host, pos, chain, h_hash) {
-		if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
-			continue;
-
-		/* See if we have an NSM handle for this client */
-		if (!nsm)
-			nsm = host->h_nsmhandle;
-
-		if (host->h_proto != ni->protocol)
-			continue;
-		if (host->h_version != ni->version)
-			continue;
-		if (host->h_server != ni->server)
-			continue;
-		if (ni->server && ni->src_len != 0 &&
-		    !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
-			continue;
-
-		/* Move to head of hash chain. */
-		hlist_del(&host->h_hash);
-		hlist_add_head(&host->h_hash, chain);
-
-		nlm_get_host(host);
-		dprintk("lockd: nlm_lookup_host found host %s (%s)\n",
-				host->h_name, host->h_addrbuf);
-		goto out;
-	}
-
-	/*
-	 * The host wasn't in our hash table.  If we don't
-	 * have an NSM handle for it yet, create one.
-	 */
-	if (nsm)
+	if (nsm != NULL)
 		atomic_inc(&nsm->sm_count);
 		atomic_inc(&nsm->sm_count);
 	else {
 	else {
 		host = NULL;
 		host = NULL;
 		nsm = nsm_get_handle(ni->sap, ni->salen,
 		nsm = nsm_get_handle(ni->sap, ni->salen,
 					ni->hostname, ni->hostname_len);
 					ni->hostname, ni->hostname_len);
-		if (!nsm) {
-			dprintk("lockd: nlm_lookup_host failed; "
-				"no nsm handle\n");
+		if (unlikely(nsm == NULL)) {
+			dprintk("lockd: %s failed; no nsm handle\n",
+				__func__);
 			goto out;
 			goto out;
 		}
 		}
 	}
 	}
 
 
-	host = kzalloc(sizeof(*host), GFP_KERNEL);
-	if (!host) {
+	host = kmalloc(sizeof(*host), GFP_KERNEL);
+	if (unlikely(host == NULL)) {
+		dprintk("lockd: %s failed; no memory\n", __func__);
 		nsm_release(nsm);
 		nsm_release(nsm);
-		dprintk("lockd: nlm_lookup_host failed; no memory\n");
 		goto out;
 		goto out;
 	}
 	}
-	host->h_name	   = nsm->sm_name;
-	host->h_addrbuf    = nsm->sm_addrbuf;
+
 	memcpy(nlm_addr(host), ni->sap, ni->salen);
 	memcpy(nlm_addr(host), ni->sap, ni->salen);
-	host->h_addrlen = ni->salen;
+	host->h_addrlen    = ni->salen;
 	rpc_set_port(nlm_addr(host), 0);
 	rpc_set_port(nlm_addr(host), 0);
-	memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
-	host->h_srcaddrlen = ni->src_len;
+	host->h_srcaddrlen = 0;
+
+	host->h_rpcclnt    = NULL;
+	host->h_name	   = nsm->sm_name;
 	host->h_version    = ni->version;
 	host->h_version    = ni->version;
 	host->h_proto      = ni->protocol;
 	host->h_proto      = ni->protocol;
-	host->h_rpcclnt    = NULL;
-	mutex_init(&host->h_mutex);
-	host->h_nextrebind = jiffies + NLM_HOST_REBIND;
-	host->h_expires    = jiffies + NLM_HOST_EXPIRE;
-	atomic_set(&host->h_count, 1);
+	host->h_reclaiming = 0;
+	host->h_server     = ni->server;
+	host->h_noresvport = ni->noresvport;
+	host->h_inuse      = 0;
 	init_waitqueue_head(&host->h_gracewait);
 	init_waitqueue_head(&host->h_gracewait);
 	init_rwsem(&host->h_rwsem);
 	init_rwsem(&host->h_rwsem);
-	host->h_state      = 0;			/* pseudo NSM state */
-	host->h_nsmstate   = 0;			/* real NSM state */
-	host->h_nsmhandle  = nsm;
-	host->h_server	   = ni->server;
-	host->h_noresvport = ni->noresvport;
-	hlist_add_head(&host->h_hash, chain);
+	host->h_state      = 0;
+	host->h_nsmstate   = 0;
+	host->h_pidcount   = 0;
+	atomic_set(&host->h_count, 1);
+	mutex_init(&host->h_mutex);
+	host->h_nextrebind = now + NLM_HOST_REBIND;
+	host->h_expires    = now + NLM_HOST_EXPIRE;
 	INIT_LIST_HEAD(&host->h_lockowners);
 	INIT_LIST_HEAD(&host->h_lockowners);
 	spin_lock_init(&host->h_lock);
 	spin_lock_init(&host->h_lock);
 	INIT_LIST_HEAD(&host->h_granted);
 	INIT_LIST_HEAD(&host->h_granted);
 	INIT_LIST_HEAD(&host->h_reclaim);
 	INIT_LIST_HEAD(&host->h_reclaim);
-
-	nrhosts++;
-
-	dprintk("lockd: nlm_lookup_host created host %s\n",
-			host->h_name);
+	host->h_nsmhandle  = nsm;
+	host->h_addrbuf    = nsm->sm_addrbuf;
 
 
 out:
 out:
-	mutex_unlock(&nlm_host_mutex);
 	return host;
 	return host;
 }
 }
 
 
 /*
 /*
- * Destroy a host
+ * Destroy an nlm_host and free associated resources
+ *
+ * Caller must hold nlm_host_mutex.
  */
  */
-static void
-nlm_destroy_host(struct nlm_host *host)
+static void nlm_destroy_host_locked(struct nlm_host *host)
 {
 {
 	struct rpc_clnt	*clnt;
 	struct rpc_clnt	*clnt;
 
 
+	dprintk("lockd: destroy host %s\n", host->h_name);
+
 	BUG_ON(!list_empty(&host->h_lockowners));
 	BUG_ON(!list_empty(&host->h_lockowners));
 	BUG_ON(atomic_read(&host->h_count));
 	BUG_ON(atomic_read(&host->h_count));
 
 
+	hlist_del_init(&host->h_hash);
+
 	nsm_unmonitor(host);
 	nsm_unmonitor(host);
 	nsm_release(host->h_nsmhandle);
 	nsm_release(host->h_nsmhandle);
 
 
@@ -216,6 +183,8 @@ nlm_destroy_host(struct nlm_host *host)
 	if (clnt != NULL)
 	if (clnt != NULL)
 		rpc_shutdown_client(clnt);
 		rpc_shutdown_client(clnt);
 	kfree(host);
 	kfree(host);
+
+	nrhosts--;
 }
 }
 
 
 /**
 /**
@@ -249,12 +218,76 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 		.hostname_len	= strlen(hostname),
 		.hostname_len	= strlen(hostname),
 		.noresvport	= noresvport,
 		.noresvport	= noresvport,
 	};
 	};
+	struct hlist_head *chain;
+	struct hlist_node *pos;
+	struct nlm_host	*host;
+	struct nsm_handle *nsm = NULL;
 
 
 	dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
 	dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
 			(hostname ? hostname : "<none>"), version,
 			(hostname ? hostname : "<none>"), version,
 			(protocol == IPPROTO_UDP ? "udp" : "tcp"));
 			(protocol == IPPROTO_UDP ? "udp" : "tcp"));
 
 
-	return nlm_lookup_host(&ni);
+	mutex_lock(&nlm_host_mutex);
+
+	chain = &nlm_client_hosts[nlm_hash_address(sap)];
+	hlist_for_each_entry(host, pos, chain, h_hash) {
+		if (!rpc_cmp_addr(nlm_addr(host), sap))
+			continue;
+
+		/* Same address. Share an NSM handle if we already have one */
+		if (nsm == NULL)
+			nsm = host->h_nsmhandle;
+
+		if (host->h_proto != protocol)
+			continue;
+		if (host->h_version != version)
+			continue;
+
+		nlm_get_host(host);
+		dprintk("lockd: %s found host %s (%s)\n", __func__,
+			host->h_name, host->h_addrbuf);
+		goto out;
+	}
+
+	host = nlm_alloc_host(&ni, nsm);
+	if (unlikely(host == NULL))
+		goto out;
+
+	hlist_add_head(&host->h_hash, chain);
+	nrhosts++;
+
+	dprintk("lockd: %s created host %s (%s)\n", __func__,
+		host->h_name, host->h_addrbuf);
+
+out:
+	mutex_unlock(&nlm_host_mutex);
+	return host;
+}
+
+/**
+ * nlmclnt_release_host - release client nlm_host
+ * @host: nlm_host to release
+ *
+ */
+void nlmclnt_release_host(struct nlm_host *host)
+{
+	if (host == NULL)
+		return;
+
+	dprintk("lockd: release client host %s\n", host->h_name);
+
+	BUG_ON(atomic_read(&host->h_count) < 0);
+	BUG_ON(host->h_server);
+
+	if (atomic_dec_and_test(&host->h_count)) {
+		BUG_ON(!list_empty(&host->h_lockowners));
+		BUG_ON(!list_empty(&host->h_granted));
+		BUG_ON(!list_empty(&host->h_reclaim));
+
+		mutex_lock(&nlm_host_mutex);
+		nlm_destroy_host_locked(host);
+		mutex_unlock(&nlm_host_mutex);
+	}
 }
 }
 
 
 /**
 /**
@@ -279,12 +312,18 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 				    const char *hostname,
 				    const char *hostname,
 				    const size_t hostname_len)
 				    const size_t hostname_len)
 {
 {
+	struct hlist_head *chain;
+	struct hlist_node *pos;
+	struct nlm_host	*host = NULL;
+	struct nsm_handle *nsm = NULL;
 	struct sockaddr_in sin = {
 	struct sockaddr_in sin = {
 		.sin_family	= AF_INET,
 		.sin_family	= AF_INET,
 	};
 	};
 	struct sockaddr_in6 sin6 = {
 	struct sockaddr_in6 sin6 = {
 		.sin6_family	= AF_INET6,
 		.sin6_family	= AF_INET6,
 	};
 	};
+	struct sockaddr *src_sap;
+	size_t src_len = rqstp->rq_addrlen;
 	struct nlm_lookup_host_info ni = {
 	struct nlm_lookup_host_info ni = {
 		.server		= 1,
 		.server		= 1,
 		.sap		= svc_addr(rqstp),
 		.sap		= svc_addr(rqstp),
@@ -293,27 +332,91 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 		.version	= rqstp->rq_vers,
 		.version	= rqstp->rq_vers,
 		.hostname	= hostname,
 		.hostname	= hostname,
 		.hostname_len	= hostname_len,
 		.hostname_len	= hostname_len,
-		.src_len	= rqstp->rq_addrlen,
 	};
 	};
 
 
 	dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
 	dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
 			(int)hostname_len, hostname, rqstp->rq_vers,
 			(int)hostname_len, hostname, rqstp->rq_vers,
 			(rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp"));
 			(rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp"));
 
 
+	mutex_lock(&nlm_host_mutex);
+
 	switch (ni.sap->sa_family) {
 	switch (ni.sap->sa_family) {
 	case AF_INET:
 	case AF_INET:
 		sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr;
 		sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr;
-		ni.src_sap = (struct sockaddr *)&sin;
+		src_sap = (struct sockaddr *)&sin;
 		break;
 		break;
 	case AF_INET6:
 	case AF_INET6:
 		ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6);
 		ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6);
-		ni.src_sap = (struct sockaddr *)&sin6;
+		src_sap = (struct sockaddr *)&sin6;
 		break;
 		break;
 	default:
 	default:
-		return NULL;
+		dprintk("lockd: %s failed; unrecognized address family\n",
+			__func__);
+		goto out;
+	}
+
+	if (time_after_eq(jiffies, next_gc))
+		nlm_gc_hosts();
+
+	chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
+	hlist_for_each_entry(host, pos, chain, h_hash) {
+		if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
+			continue;
+
+		/* Same address. Share an NSM handle if we already have one */
+		if (nsm == NULL)
+			nsm = host->h_nsmhandle;
+
+		if (host->h_proto != ni.protocol)
+			continue;
+		if (host->h_version != ni.version)
+			continue;
+		if (!rpc_cmp_addr(nlm_srcaddr(host), src_sap))
+			continue;
+
+		/* Move to head of hash chain. */
+		hlist_del(&host->h_hash);
+		hlist_add_head(&host->h_hash, chain);
+
+		nlm_get_host(host);
+		dprintk("lockd: %s found host %s (%s)\n",
+			__func__, host->h_name, host->h_addrbuf);
+		goto out;
 	}
 	}
 
 
-	return nlm_lookup_host(&ni);
+	host = nlm_alloc_host(&ni, nsm);
+	if (unlikely(host == NULL))
+		goto out;
+
+	memcpy(nlm_srcaddr(host), src_sap, src_len);
+	host->h_srcaddrlen = src_len;
+	hlist_add_head(&host->h_hash, chain);
+	nrhosts++;
+
+	dprintk("lockd: %s created host %s (%s)\n",
+		__func__, host->h_name, host->h_addrbuf);
+
+out:
+	mutex_unlock(&nlm_host_mutex);
+	return host;
+}
+
+/**
+ * nlmsvc_release_host - release server nlm_host
+ * @host: nlm_host to release
+ *
+ * Host is destroyed later in nlm_gc_host().
+ */
+void nlmsvc_release_host(struct nlm_host *host)
+{
+	if (host == NULL)
+		return;
+
+	dprintk("lockd: release server host %s\n", host->h_name);
+
+	BUG_ON(atomic_read(&host->h_count) < 0);
+	BUG_ON(!host->h_server);
+	atomic_dec(&host->h_count);
 }
 }
 
 
 /*
 /*
@@ -413,20 +516,28 @@ struct nlm_host * nlm_get_host(struct nlm_host *host)
 	return host;
 	return host;
 }
 }
 
 
-/*
- * Release NLM host after use
- */
-void nlm_release_host(struct nlm_host *host)
+static struct nlm_host *next_host_state(struct hlist_head *cache,
+					struct nsm_handle *nsm,
+					const struct nlm_reboot *info)
 {
 {
-	if (host != NULL) {
-		dprintk("lockd: release host %s\n", host->h_name);
-		BUG_ON(atomic_read(&host->h_count) < 0);
-		if (atomic_dec_and_test(&host->h_count)) {
-			BUG_ON(!list_empty(&host->h_lockowners));
-			BUG_ON(!list_empty(&host->h_granted));
-			BUG_ON(!list_empty(&host->h_reclaim));
+	struct nlm_host *host = NULL;
+	struct hlist_head *chain;
+	struct hlist_node *pos;
+
+	mutex_lock(&nlm_host_mutex);
+	for_each_host(host, pos, chain, cache) {
+		if (host->h_nsmhandle == nsm
+		    && host->h_nsmstate != info->state) {
+			host->h_nsmstate = info->state;
+			host->h_state++;
+
+			nlm_get_host(host);
+			goto out;
 		}
 		}
 	}
 	}
+out:
+	mutex_unlock(&nlm_host_mutex);
+	return host;
 }
 }
 
 
 /**
 /**
@@ -438,8 +549,6 @@ void nlm_release_host(struct nlm_host *host)
  */
  */
 void nlm_host_rebooted(const struct nlm_reboot *info)
 void nlm_host_rebooted(const struct nlm_reboot *info)
 {
 {
-	struct hlist_head *chain;
-	struct hlist_node *pos;
 	struct nsm_handle *nsm;
 	struct nsm_handle *nsm;
 	struct nlm_host	*host;
 	struct nlm_host	*host;
 
 
@@ -452,32 +561,15 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
 	 * lock for this.
 	 * lock for this.
 	 * To avoid processing a host several times, we match the nsmstate.
 	 * To avoid processing a host several times, we match the nsmstate.
 	 */
 	 */
-again:	mutex_lock(&nlm_host_mutex);
-	for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
-		hlist_for_each_entry(host, pos, chain, h_hash) {
-			if (host->h_nsmhandle == nsm
-			 && host->h_nsmstate != info->state) {
-				host->h_nsmstate = info->state;
-				host->h_state++;
-
-				nlm_get_host(host);
-				mutex_unlock(&nlm_host_mutex);
-
-				if (host->h_server) {
-					/* We're server for this guy, just ditch
-					 * all the locks he held. */
-					nlmsvc_free_host_resources(host);
-				} else {
-					/* He's the server, initiate lock recovery. */
-					nlmclnt_recovery(host);
-				}
-
-				nlm_release_host(host);
-				goto again;
-			}
-		}
+	while ((host = next_host_state(nlm_server_hosts, nsm, info)) != NULL) {
+		nlmsvc_free_host_resources(host);
+		nlmsvc_release_host(host);
 	}
 	}
-	mutex_unlock(&nlm_host_mutex);
+	while ((host = next_host_state(nlm_client_hosts, nsm, info)) != NULL) {
+		nlmclnt_recovery(host);
+		nlmclnt_release_host(host);
+	}
+
 	nsm_release(nsm);
 	nsm_release(nsm);
 }
 }
 
 
@@ -497,13 +589,11 @@ nlm_shutdown_hosts(void)
 
 
 	/* First, make all hosts eligible for gc */
 	/* First, make all hosts eligible for gc */
 	dprintk("lockd: nuking all hosts...\n");
 	dprintk("lockd: nuking all hosts...\n");
-	for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
-		hlist_for_each_entry(host, pos, chain, h_hash) {
-			host->h_expires = jiffies - 1;
-			if (host->h_rpcclnt) {
-				rpc_shutdown_client(host->h_rpcclnt);
-				host->h_rpcclnt = NULL;
-			}
+	for_each_host(host, pos, chain, nlm_server_hosts) {
+		host->h_expires = jiffies - 1;
+		if (host->h_rpcclnt) {
+			rpc_shutdown_client(host->h_rpcclnt);
+			host->h_rpcclnt = NULL;
 		}
 		}
 	}
 	}
 
 
@@ -512,15 +602,13 @@ nlm_shutdown_hosts(void)
 	mutex_unlock(&nlm_host_mutex);
 	mutex_unlock(&nlm_host_mutex);
 
 
 	/* complain if any hosts are left */
 	/* complain if any hosts are left */
-	if (nrhosts) {
+	if (nrhosts != 0) {
 		printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
 		printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
-		dprintk("lockd: %d hosts left:\n", nrhosts);
-		for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
-			hlist_for_each_entry(host, pos, chain, h_hash) {
-				dprintk("       %s (cnt %d use %d exp %ld)\n",
-					host->h_name, atomic_read(&host->h_count),
-					host->h_inuse, host->h_expires);
-			}
+		dprintk("lockd: %lu hosts left:\n", nrhosts);
+		for_each_host(host, pos, chain, nlm_server_hosts) {
+			dprintk("       %s (cnt %d use %d exp %ld)\n",
+				host->h_name, atomic_read(&host->h_count),
+				host->h_inuse, host->h_expires);
 		}
 		}
 	}
 	}
 }
 }
@@ -538,29 +626,22 @@ nlm_gc_hosts(void)
 	struct nlm_host	*host;
 	struct nlm_host	*host;
 
 
 	dprintk("lockd: host garbage collection\n");
 	dprintk("lockd: host garbage collection\n");
-	for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
-		hlist_for_each_entry(host, pos, chain, h_hash)
-			host->h_inuse = 0;
-	}
+	for_each_host(host, pos, chain, nlm_server_hosts)
+		host->h_inuse = 0;
 
 
 	/* Mark all hosts that hold locks, blocks or shares */
 	/* Mark all hosts that hold locks, blocks or shares */
 	nlmsvc_mark_resources();
 	nlmsvc_mark_resources();
 
 
-	for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
-		hlist_for_each_entry_safe(host, pos, next, chain, h_hash) {
-			if (atomic_read(&host->h_count) || host->h_inuse
-			 || time_before(jiffies, host->h_expires)) {
-				dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
-					host->h_name, atomic_read(&host->h_count),
-					host->h_inuse, host->h_expires);
-				continue;
-			}
-			dprintk("lockd: delete host %s\n", host->h_name);
-			hlist_del_init(&host->h_hash);
-
-			nlm_destroy_host(host);
-			nrhosts--;
+	for_each_host_safe(host, pos, next, chain, nlm_server_hosts) {
+		if (atomic_read(&host->h_count) || host->h_inuse
+		 || time_before(jiffies, host->h_expires)) {
+			dprintk("nlm_gc_hosts skipping %s "
+				"(cnt %d use %d exp %ld)\n",
+				host->h_name, atomic_read(&host->h_count),
+				host->h_inuse, host->h_expires);
+			continue;
 		}
 		}
+		nlm_destroy_host_locked(host);
 	}
 	}
 
 
 	next_gc = jiffies + NLM_HOST_COLLECT;
 	next_gc = jiffies + NLM_HOST_COLLECT;

+ 42 - 68
fs/lockd/mon.c

@@ -401,26 +401,22 @@ void nsm_release(struct nsm_handle *nsm)
  * Status Monitor wire protocol.
  * Status Monitor wire protocol.
  */
  */
 
 
-static int encode_nsm_string(struct xdr_stream *xdr, const char *string)
+static void encode_nsm_string(struct xdr_stream *xdr, const char *string)
 {
 {
 	const u32 len = strlen(string);
 	const u32 len = strlen(string);
 	__be32 *p;
 	__be32 *p;
 
 
-	if (unlikely(len > SM_MAXSTRLEN))
-		return -EIO;
-	p = xdr_reserve_space(xdr, sizeof(u32) + len);
-	if (unlikely(p == NULL))
-		return -EIO;
+	BUG_ON(len > SM_MAXSTRLEN);
+	p = xdr_reserve_space(xdr, 4 + len);
 	xdr_encode_opaque(p, string, len);
 	xdr_encode_opaque(p, string, len);
-	return 0;
 }
 }
 
 
 /*
 /*
  * "mon_name" specifies the host to be monitored.
  * "mon_name" specifies the host to be monitored.
  */
  */
-static int encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
 {
 {
-	return encode_nsm_string(xdr, argp->mon_name);
+	encode_nsm_string(xdr, argp->mon_name);
 }
 }
 
 
 /*
 /*
@@ -429,35 +425,25 @@ static int encode_mon_name(struct xdr_stream *xdr, const struct nsm_args *argp)
  * (via the NLMPROC_SM_NOTIFY call) that the state of host "mon_name"
  * (via the NLMPROC_SM_NOTIFY call) that the state of host "mon_name"
  * has changed.
  * has changed.
  */
  */
-static int encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
 {
 {
-	int status;
 	__be32 *p;
 	__be32 *p;
 
 
-	status = encode_nsm_string(xdr, utsname()->nodename);
-	if (unlikely(status != 0))
-		return status;
-	p = xdr_reserve_space(xdr, 3 * sizeof(u32));
-	if (unlikely(p == NULL))
-		return -EIO;
-	*p++ = htonl(argp->prog);
-	*p++ = htonl(argp->vers);
-	*p++ = htonl(argp->proc);
-	return 0;
+	encode_nsm_string(xdr, utsname()->nodename);
+	p = xdr_reserve_space(xdr, 4 + 4 + 4);
+	*p++ = cpu_to_be32(argp->prog);
+	*p++ = cpu_to_be32(argp->vers);
+	*p = cpu_to_be32(argp->proc);
 }
 }
 
 
 /*
 /*
  * The "mon_id" argument specifies the non-private arguments
  * The "mon_id" argument specifies the non-private arguments
  * of an NSMPROC_MON or NSMPROC_UNMON call.
  * of an NSMPROC_MON or NSMPROC_UNMON call.
  */
  */
-static int encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
 {
 {
-	int status;
-
-	status = encode_mon_name(xdr, argp);
-	if (unlikely(status != 0))
-		return status;
-	return encode_my_id(xdr, argp);
+	encode_mon_name(xdr, argp);
+	encode_my_id(xdr, argp);
 }
 }
 
 
 /*
 /*
@@ -465,68 +451,56 @@ static int encode_mon_id(struct xdr_stream *xdr, const struct nsm_args *argp)
  * by the NSMPROC_MON call. This information will be supplied in the
  * by the NSMPROC_MON call. This information will be supplied in the
  * NLMPROC_SM_NOTIFY call.
  * NLMPROC_SM_NOTIFY call.
  */
  */
-static int encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
+static void encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
 {
 {
 	__be32 *p;
 	__be32 *p;
 
 
 	p = xdr_reserve_space(xdr, SM_PRIV_SIZE);
 	p = xdr_reserve_space(xdr, SM_PRIV_SIZE);
-	if (unlikely(p == NULL))
-		return -EIO;
 	xdr_encode_opaque_fixed(p, argp->priv->data, SM_PRIV_SIZE);
 	xdr_encode_opaque_fixed(p, argp->priv->data, SM_PRIV_SIZE);
-	return 0;
 }
 }
 
 
-static int xdr_enc_mon(struct rpc_rqst *req, __be32 *p,
-		       const struct nsm_args *argp)
+static void nsm_xdr_enc_mon(struct rpc_rqst *req, struct xdr_stream *xdr,
+			    const struct nsm_args *argp)
 {
 {
-	struct xdr_stream xdr;
-	int status;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	status = encode_mon_id(&xdr, argp);
-	if (unlikely(status))
-		return status;
-	return encode_priv(&xdr, argp);
+	encode_mon_id(xdr, argp);
+	encode_priv(xdr, argp);
 }
 }
 
 
-static int xdr_enc_unmon(struct rpc_rqst *req, __be32 *p,
-			 const struct nsm_args *argp)
+static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
+			      const struct nsm_args *argp)
 {
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	return encode_mon_id(&xdr, argp);
+	encode_mon_id(xdr, argp);
 }
 }
 
 
-static int xdr_dec_stat_res(struct rpc_rqst *rqstp, __be32 *p,
-			    struct nsm_res *resp)
+static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct nsm_res *resp)
 {
 {
-	struct xdr_stream xdr;
+	__be32 *p;
 
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	p = xdr_inline_decode(&xdr, 2 * sizeof(u32));
+	p = xdr_inline_decode(xdr, 4 + 4);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		return -EIO;
 		return -EIO;
-	resp->status = ntohl(*p++);
-	resp->state = ntohl(*p);
+	resp->status = be32_to_cpup(p++);
+	resp->state = be32_to_cpup(p);
 
 
-	dprintk("lockd: xdr_dec_stat_res status %d state %d\n",
-			resp->status, resp->state);
+	dprintk("lockd: %s status %d state %d\n",
+		__func__, resp->status, resp->state);
 	return 0;
 	return 0;
 }
 }
 
 
-static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
-			struct nsm_res *resp)
+static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
+			    struct xdr_stream *xdr,
+			    struct nsm_res *resp)
 {
 {
-	struct xdr_stream xdr;
+	__be32 *p;
 
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	p = xdr_inline_decode(&xdr, sizeof(u32));
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		return -EIO;
 		return -EIO;
-	resp->state = ntohl(*p);
+	resp->state = be32_to_cpup(p);
 
 
-	dprintk("lockd: xdr_dec_stat state %d\n", resp->state);
+	dprintk("lockd: %s state %d\n", __func__, resp->state);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -542,8 +516,8 @@ static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
 static struct rpc_procinfo	nsm_procedures[] = {
 static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_MON] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
 		.p_proc		= NSMPROC_MON,
-		.p_encode	= (kxdrproc_t)xdr_enc_mon,
-		.p_decode	= (kxdrproc_t)xdr_dec_stat_res,
+		.p_encode	= (kxdreproc_t)nsm_xdr_enc_mon,
+		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
 		.p_replen	= SM_monres_sz,
 		.p_statidx	= NSMPROC_MON,
 		.p_statidx	= NSMPROC_MON,
@@ -551,8 +525,8 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 	},
 [NSMPROC_UNMON] = {
 [NSMPROC_UNMON] = {
 		.p_proc		= NSMPROC_UNMON,
 		.p_proc		= NSMPROC_UNMON,
-		.p_encode	= (kxdrproc_t)xdr_enc_unmon,
-		.p_decode	= (kxdrproc_t)xdr_dec_stat,
+		.p_encode	= (kxdreproc_t)nsm_xdr_enc_unmon,
+		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
 		.p_replen	= SM_unmonres_sz,
 		.p_statidx	= NSMPROC_UNMON,
 		.p_statidx	= NSMPROC_UNMON,

+ 10 - 10
fs/lockd/svc4proc.c

@@ -51,7 +51,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return 0;
 	return 0;
 
 
 no_locks:
 no_locks:
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
  	if (error)
  	if (error)
 		return error;	
 		return error;	
 	return nlm_lck_denied_nolocks;
 	return nlm_lck_denied_nolocks;
@@ -92,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 	else
 	else
 		dprintk("lockd: TEST4        status %d\n", ntohl(resp->status));
 		dprintk("lockd: TEST4        status %d\n", ntohl(resp->status));
 
 
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rc;
 	return rc;
 }
 }
@@ -134,7 +134,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	else
 	else
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
 
 
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rc;
 	return rc;
 }
 }
@@ -164,7 +164,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = nlmsvc_cancel_blocked(file, &argp->lock);
 	resp->status = nlmsvc_cancel_blocked(file, &argp->lock);
 
 
 	dprintk("lockd: CANCEL        status %d\n", ntohl(resp->status));
 	dprintk("lockd: CANCEL        status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rpc_success;
 	return rpc_success;
 }
 }
@@ -197,7 +197,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = nlmsvc_unlock(file, &argp->lock);
 	resp->status = nlmsvc_unlock(file, &argp->lock);
 
 
 	dprintk("lockd: UNLOCK        status %d\n", ntohl(resp->status));
 	dprintk("lockd: UNLOCK        status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rpc_success;
 	return rpc_success;
 }
 }
@@ -229,7 +229,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 
 
 static void nlm4svc_callback_release(void *data)
 static void nlm4svc_callback_release(void *data)
 {
 {
-	nlm_release_call(data);
+	nlmsvc_release_call(data);
 }
 }
 
 
 static const struct rpc_call_ops nlm4svc_callback_ops = {
 static const struct rpc_call_ops nlm4svc_callback_ops = {
@@ -261,7 +261,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 
 
 	stat = func(rqstp, argp, &call->a_res);
 	stat = func(rqstp, argp, &call->a_res);
 	if (stat != 0) {
 	if (stat != 0) {
-		nlm_release_call(call);
+		nlmsvc_release_call(call);
 		return stat;
 		return stat;
 	}
 	}
 
 
@@ -334,7 +334,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = nlmsvc_share_file(host, file, argp);
 	resp->status = nlmsvc_share_file(host, file, argp);
 
 
 	dprintk("lockd: SHARE         status %d\n", ntohl(resp->status));
 	dprintk("lockd: SHARE         status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rpc_success;
 	return rpc_success;
 }
 }
@@ -367,7 +367,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = nlmsvc_unshare_file(host, file, argp);
 	resp->status = nlmsvc_unshare_file(host, file, argp);
 
 
 	dprintk("lockd: UNSHARE       status %d\n", ntohl(resp->status));
 	dprintk("lockd: UNSHARE       status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rpc_success;
 	return rpc_success;
 }
 }
@@ -399,7 +399,7 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 		return rpc_success;
 		return rpc_success;
 
 
 	nlmsvc_free_host_resources(host);
 	nlmsvc_free_host_resources(host);
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	return rpc_success;
 	return rpc_success;
 }
 }
 
 

+ 32 - 2
fs/lockd/svclock.c

@@ -46,6 +46,7 @@ static void	nlmsvc_remove_block(struct nlm_block *block);
 static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
 static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
 static void nlmsvc_freegrantargs(struct nlm_rqst *call);
 static void nlmsvc_freegrantargs(struct nlm_rqst *call);
 static const struct rpc_call_ops nlmsvc_grant_ops;
 static const struct rpc_call_ops nlmsvc_grant_ops;
+static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie);
 
 
 /*
 /*
  * The list of blocked locks to retry
  * The list of blocked locks to retry
@@ -233,7 +234,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
 failed_free:
 failed_free:
 	kfree(block);
 	kfree(block);
 failed:
 failed:
-	nlm_release_call(call);
+	nlmsvc_release_call(call);
 	return NULL;
 	return NULL;
 }
 }
 
 
@@ -266,7 +267,7 @@ static void nlmsvc_free_block(struct kref *kref)
 	mutex_unlock(&file->f_mutex);
 	mutex_unlock(&file->f_mutex);
 
 
 	nlmsvc_freegrantargs(block->b_call);
 	nlmsvc_freegrantargs(block->b_call);
-	nlm_release_call(block->b_call);
+	nlmsvc_release_call(block->b_call);
 	nlm_release_file(block->b_file);
 	nlm_release_file(block->b_file);
 	kfree(block->b_fl);
 	kfree(block->b_fl);
 	kfree(block);
 	kfree(block);
@@ -934,3 +935,32 @@ nlmsvc_retry_blocked(void)
 
 
 	return timeout;
 	return timeout;
 }
 }
+
+#ifdef RPC_DEBUG
+static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
+{
+	/*
+	 * We can get away with a static buffer because we're only
+	 * called with BKL held.
+	 */
+	static char buf[2*NLM_MAXCOOKIELEN+1];
+	unsigned int i, len = sizeof(buf);
+	char *p = buf;
+
+	len--;	/* allow for trailing \0 */
+	if (len < 3)
+		return "???";
+	for (i = 0 ; i < cookie->len ; i++) {
+		if (len < 2) {
+			strcpy(p-3, "...");
+			break;
+		}
+		sprintf(p, "%02x", cookie->data[i]);
+		p += 2;
+		len -= 2;
+	}
+	*p = '\0';
+
+	return buf;
+}
+#endif

+ 18 - 10
fs/lockd/svcproc.c

@@ -80,7 +80,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return 0;
 	return 0;
 
 
 no_locks:
 no_locks:
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	if (error)
 	if (error)
 		return error;
 		return error;
 	return nlm_lck_denied_nolocks;
 	return nlm_lck_denied_nolocks;
@@ -122,7 +122,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 		dprintk("lockd: TEST          status %d vers %d\n",
 		dprintk("lockd: TEST          status %d vers %d\n",
 			ntohl(resp->status), rqstp->rq_vers);
 			ntohl(resp->status), rqstp->rq_vers);
 
 
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rc;
 	return rc;
 }
 }
@@ -164,7 +164,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	else
 	else
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
 
 
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rc;
 	return rc;
 }
 }
@@ -194,7 +194,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock));
 	resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock));
 
 
 	dprintk("lockd: CANCEL        status %d\n", ntohl(resp->status));
 	dprintk("lockd: CANCEL        status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rpc_success;
 	return rpc_success;
 }
 }
@@ -227,7 +227,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = cast_status(nlmsvc_unlock(file, &argp->lock));
 	resp->status = cast_status(nlmsvc_unlock(file, &argp->lock));
 
 
 	dprintk("lockd: UNLOCK        status %d\n", ntohl(resp->status));
 	dprintk("lockd: UNLOCK        status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rpc_success;
 	return rpc_success;
 }
 }
@@ -257,9 +257,17 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 			-task->tk_status);
 			-task->tk_status);
 }
 }
 
 
+void nlmsvc_release_call(struct nlm_rqst *call)
+{
+	if (!atomic_dec_and_test(&call->a_count))
+		return;
+	nlmsvc_release_host(call->a_host);
+	kfree(call);
+}
+
 static void nlmsvc_callback_release(void *data)
 static void nlmsvc_callback_release(void *data)
 {
 {
-	nlm_release_call(data);
+	nlmsvc_release_call(data);
 }
 }
 
 
 static const struct rpc_call_ops nlmsvc_callback_ops = {
 static const struct rpc_call_ops nlmsvc_callback_ops = {
@@ -291,7 +299,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 
 
 	stat = func(rqstp, argp, &call->a_res);
 	stat = func(rqstp, argp, &call->a_res);
 	if (stat != 0) {
 	if (stat != 0) {
-		nlm_release_call(call);
+		nlmsvc_release_call(call);
 		return stat;
 		return stat;
 	}
 	}
 
 
@@ -366,7 +374,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = cast_status(nlmsvc_share_file(host, file, argp));
 	resp->status = cast_status(nlmsvc_share_file(host, file, argp));
 
 
 	dprintk("lockd: SHARE         status %d\n", ntohl(resp->status));
 	dprintk("lockd: SHARE         status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rpc_success;
 	return rpc_success;
 }
 }
@@ -399,7 +407,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
 	resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
 
 
 	dprintk("lockd: UNSHARE       status %d\n", ntohl(resp->status));
 	dprintk("lockd: UNSHARE       status %d\n", ntohl(resp->status));
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	nlm_release_file(file);
 	return rpc_success;
 	return rpc_success;
 }
 }
@@ -431,7 +439,7 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 		return rpc_success;
 		return rpc_success;
 
 
 	nlmsvc_free_host_resources(host);
 	nlmsvc_free_host_resources(host);
-	nlm_release_host(host);
+	nlmsvc_release_host(host);
 	return rpc_success;
 	return rpc_success;
 }
 }
 
 

+ 0 - 287
fs/lockd/xdr.c

@@ -148,37 +148,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
 	return p;
 	return p;
 }
 }
 
 
-/*
- * Encode a lock as part of an NLM call
- */
-static __be32 *
-nlm_encode_lock(__be32 *p, struct nlm_lock *lock)
-{
-	struct file_lock	*fl = &lock->fl;
-	__s32			start, len;
-
-	if (!(p = xdr_encode_string(p, lock->caller))
-	 || !(p = nlm_encode_fh(p, &lock->fh))
-	 || !(p = nlm_encode_oh(p, &lock->oh)))
-		return NULL;
-
-	if (fl->fl_start > NLM_OFFSET_MAX
-	 || (fl->fl_end > NLM_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
-		return NULL;
-
-	start = loff_t_to_s32(fl->fl_start);
-	if (fl->fl_end == OFFSET_MAX)
-		len = 0;
-	else
-		len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
-
-	*p++ = htonl(lock->svid);
-	*p++ = htonl(start);
-	*p++ = htonl(len);
-
-	return p;
-}
-
 /*
 /*
  * Encode result of a TEST/TEST_MSG call
  * Encode result of a TEST/TEST_MSG call
  */
  */
@@ -372,259 +341,3 @@ nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 {
 	return xdr_ressize_check(rqstp, p);
 	return xdr_ressize_check(rqstp, p);
 }
 }
-
-/*
- * Now, the client side XDR functions
- */
-#ifdef NLMCLNT_SUPPORT_SHARES
-static int
-nlmclt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
-{
-	return 0;
-}
-#endif
-
-static int
-nlmclt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
-	struct nlm_lock	*lock = &argp->lock;
-
-	if (!(p = nlm_encode_cookie(p, &argp->cookie)))
-		return -EIO;
-	*p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
-	if (!(p = nlm_encode_lock(p, lock)))
-		return -EIO;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlmclt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
-	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
-		return -EIO;
-	resp->status = *p++;
-	if (resp->status == nlm_lck_denied) {
-		struct file_lock	*fl = &resp->lock.fl;
-		u32			excl;
-		s32			start, len, end;
-
-		memset(&resp->lock, 0, sizeof(resp->lock));
-		locks_init_lock(fl);
-		excl = ntohl(*p++);
-		resp->lock.svid = ntohl(*p++);
-		fl->fl_pid = (pid_t)resp->lock.svid;
-		if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
-			return -EIO;
-
-		fl->fl_flags = FL_POSIX;
-		fl->fl_type  = excl? F_WRLCK : F_RDLCK;
-		start = ntohl(*p++);
-		len = ntohl(*p++);
-		end = start + len - 1;
-
-		fl->fl_start = s32_to_loff_t(start);
-		if (len == 0 || end < 0)
-			fl->fl_end = OFFSET_MAX;
-		else
-			fl->fl_end = s32_to_loff_t(end);
-	}
-	return 0;
-}
-
-
-static int
-nlmclt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
-	struct nlm_lock	*lock = &argp->lock;
-
-	if (!(p = nlm_encode_cookie(p, &argp->cookie)))
-		return -EIO;
-	*p++ = argp->block? xdr_one : xdr_zero;
-	*p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
-	if (!(p = nlm_encode_lock(p, lock)))
-		return -EIO;
-	*p++ = argp->reclaim? xdr_one : xdr_zero;
-	*p++ = htonl(argp->state);
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlmclt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
-	struct nlm_lock	*lock = &argp->lock;
-
-	if (!(p = nlm_encode_cookie(p, &argp->cookie)))
-		return -EIO;
-	*p++ = argp->block? xdr_one : xdr_zero;
-	*p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
-	if (!(p = nlm_encode_lock(p, lock)))
-		return -EIO;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlmclt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
-	struct nlm_lock	*lock = &argp->lock;
-
-	if (!(p = nlm_encode_cookie(p, &argp->cookie)))
-		return -EIO;
-	if (!(p = nlm_encode_lock(p, lock)))
-		return -EIO;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlmclt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
-	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
-		return -EIO;
-	*p++ = resp->status;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlmclt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
-	if (!(p = nlm_encode_testres(p, resp)))
-		return -EIO;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
-	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
-		return -EIO;
-	resp->status = *p++;
-	return 0;
-}
-
-#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
-#  error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
-#endif
-
-/*
- * Buffer requirements for NLM
- */
-#define NLM_void_sz		0
-#define NLM_cookie_sz		1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
-#define NLM_caller_sz		1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM_owner_sz		1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM_fhandle_sz		1+XDR_QUADLEN(NFS2_FHSIZE)
-#define NLM_lock_sz		3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz
-#define NLM_holder_sz		4+NLM_owner_sz
-
-#define NLM_testargs_sz		NLM_cookie_sz+1+NLM_lock_sz
-#define NLM_lockargs_sz		NLM_cookie_sz+4+NLM_lock_sz
-#define NLM_cancargs_sz		NLM_cookie_sz+2+NLM_lock_sz
-#define NLM_unlockargs_sz	NLM_cookie_sz+NLM_lock_sz
-
-#define NLM_testres_sz		NLM_cookie_sz+1+NLM_holder_sz
-#define NLM_res_sz		NLM_cookie_sz+1
-#define NLM_norep_sz		0
-
-/*
- * For NLM, a void procedure really returns nothing
- */
-#define nlmclt_decode_norep	NULL
-
-#define PROC(proc, argtype, restype)	\
-[NLMPROC_##proc] = {							\
-	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdrproc_t) nlmclt_encode_##argtype,		\
-	.p_decode    = (kxdrproc_t) nlmclt_decode_##restype,		\
-	.p_arglen    = NLM_##argtype##_sz,				\
-	.p_replen    = NLM_##restype##_sz,				\
-	.p_statidx   = NLMPROC_##proc,					\
-	.p_name      = #proc,						\
-	}
-
-static struct rpc_procinfo	nlm_procedures[] = {
-    PROC(TEST,		testargs,	testres),
-    PROC(LOCK,		lockargs,	res),
-    PROC(CANCEL,	cancargs,	res),
-    PROC(UNLOCK,	unlockargs,	res),
-    PROC(GRANTED,	testargs,	res),
-    PROC(TEST_MSG,	testargs,	norep),
-    PROC(LOCK_MSG,	lockargs,	norep),
-    PROC(CANCEL_MSG,	cancargs,	norep),
-    PROC(UNLOCK_MSG,	unlockargs,	norep),
-    PROC(GRANTED_MSG,	testargs,	norep),
-    PROC(TEST_RES,	testres,	norep),
-    PROC(LOCK_RES,	res,		norep),
-    PROC(CANCEL_RES,	res,		norep),
-    PROC(UNLOCK_RES,	res,		norep),
-    PROC(GRANTED_RES,	res,		norep),
-#ifdef NLMCLNT_SUPPORT_SHARES
-    PROC(SHARE,		shareargs,	shareres),
-    PROC(UNSHARE,	shareargs,	shareres),
-    PROC(NM_LOCK,	lockargs,	res),
-    PROC(FREE_ALL,	notify,		void),
-#endif
-};
-
-static struct rpc_version	nlm_version1 = {
-		.number		= 1,
-		.nrprocs	= 16,
-		.procs		= nlm_procedures,
-};
-
-static struct rpc_version	nlm_version3 = {
-		.number		= 3,
-		.nrprocs	= 24,
-		.procs		= nlm_procedures,
-};
-
-static struct rpc_version *	nlm_versions[] = {
-	[1] = &nlm_version1,
-	[3] = &nlm_version3,
-#ifdef 	CONFIG_LOCKD_V4
-	[4] = &nlm_version4,
-#endif
-};
-
-static struct rpc_stat		nlm_stats;
-
-struct rpc_program		nlm_program = {
-		.name		= "lockd",
-		.number		= NLM_PROGRAM,
-		.nrvers		= ARRAY_SIZE(nlm_versions),
-		.version	= nlm_versions,
-		.stats		= &nlm_stats,
-};
-
-#ifdef RPC_DEBUG
-const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
-{
-	/*
-	 * We can get away with a static buffer because we're only
-	 * called with BKL held.
-	 */
-	static char buf[2*NLM_MAXCOOKIELEN+1];
-	unsigned int i, len = sizeof(buf);
-	char *p = buf;
-
-	len--;	/* allow for trailing \0 */
-	if (len < 3)
-		return "???";
-	for (i = 0 ; i < cookie->len ; i++) {
-		if (len < 2) {
-			strcpy(p-3, "...");
-			break;
-		}
-		sprintf(p, "%02x", cookie->data[i]);
-		p += 2;
-		len -= 2;
-	}
-	*p = '\0';
-
-	return buf;
-}
-#endif

+ 0 - 255
fs/lockd/xdr4.c

@@ -93,15 +93,6 @@ nlm4_decode_fh(__be32 *p, struct nfs_fh *f)
 	return p + XDR_QUADLEN(f->size);
 	return p + XDR_QUADLEN(f->size);
 }
 }
 
 
-static __be32 *
-nlm4_encode_fh(__be32 *p, struct nfs_fh *f)
-{
-	*p++ = htonl(f->size);
-	if (f->size) p[XDR_QUADLEN(f->size)-1] = 0; /* don't leak anything */
-	memcpy(p, f->data, f->size);
-	return p + XDR_QUADLEN(f->size);
-}
-
 /*
 /*
  * Encode and decode owner handle
  * Encode and decode owner handle
  */
  */
@@ -111,12 +102,6 @@ nlm4_decode_oh(__be32 *p, struct xdr_netobj *oh)
 	return xdr_decode_netobj(p, oh);
 	return xdr_decode_netobj(p, oh);
 }
 }
 
 
-static __be32 *
-nlm4_encode_oh(__be32 *p, struct xdr_netobj *oh)
-{
-	return xdr_encode_netobj(p, oh);
-}
-
 static __be32 *
 static __be32 *
 nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
 nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
 {
 {
@@ -149,38 +134,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
 	return p;
 	return p;
 }
 }
 
 
-/*
- * Encode a lock as part of an NLM call
- */
-static __be32 *
-nlm4_encode_lock(__be32 *p, struct nlm_lock *lock)
-{
-	struct file_lock	*fl = &lock->fl;
-	__s64			start, len;
-
-	if (!(p = xdr_encode_string(p, lock->caller))
-	 || !(p = nlm4_encode_fh(p, &lock->fh))
-	 || !(p = nlm4_encode_oh(p, &lock->oh)))
-		return NULL;
-
-	if (fl->fl_start > NLM4_OFFSET_MAX
-	 || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
-		return NULL;
-
-	*p++ = htonl(lock->svid);
-
-	start = loff_t_to_s64(fl->fl_start);
-	if (fl->fl_end == OFFSET_MAX)
-		len = 0;
-	else
-		len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
-
-	p = xdr_encode_hyper(p, start);
-	p = xdr_encode_hyper(p, len);
-
-	return p;
-}
-
 /*
 /*
  * Encode result of a TEST/TEST_MSG call
  * Encode result of a TEST/TEST_MSG call
  */
  */
@@ -379,211 +332,3 @@ nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 {
 	return xdr_ressize_check(rqstp, p);
 	return xdr_ressize_check(rqstp, p);
 }
 }
-
-/*
- * Now, the client side XDR functions
- */
-#ifdef NLMCLNT_SUPPORT_SHARES
-static int
-nlm4clt_decode_void(struct rpc_rqst *req, __be32 *p, void *ptr)
-{
-	return 0;
-}
-#endif
-
-static int
-nlm4clt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
-	struct nlm_lock	*lock = &argp->lock;
-
-	if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
-		return -EIO;
-	*p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
-	if (!(p = nlm4_encode_lock(p, lock)))
-		return -EIO;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
-	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
-		return -EIO;
-	resp->status = *p++;
-	if (resp->status == nlm_lck_denied) {
-		struct file_lock	*fl = &resp->lock.fl;
-		u32			excl;
-		__u64			start, len;
-		__s64			end;
-
-		memset(&resp->lock, 0, sizeof(resp->lock));
-		locks_init_lock(fl);
-		excl = ntohl(*p++);
-		resp->lock.svid = ntohl(*p++);
-		fl->fl_pid = (pid_t)resp->lock.svid;
-		if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
-			return -EIO;
-
-		fl->fl_flags = FL_POSIX;
-		fl->fl_type  = excl? F_WRLCK : F_RDLCK;
-		p = xdr_decode_hyper(p, &start);
-		p = xdr_decode_hyper(p, &len);
-		end = start + len - 1;
-
-		fl->fl_start = s64_to_loff_t(start);
-		if (len == 0 || end < 0)
-			fl->fl_end = OFFSET_MAX;
-		else
-			fl->fl_end = s64_to_loff_t(end);
-	}
-	return 0;
-}
-
-
-static int
-nlm4clt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
-	struct nlm_lock	*lock = &argp->lock;
-
-	if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
-		return -EIO;
-	*p++ = argp->block? xdr_one : xdr_zero;
-	*p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
-	if (!(p = nlm4_encode_lock(p, lock)))
-		return -EIO;
-	*p++ = argp->reclaim? xdr_one : xdr_zero;
-	*p++ = htonl(argp->state);
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlm4clt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
-	struct nlm_lock	*lock = &argp->lock;
-
-	if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
-		return -EIO;
-	*p++ = argp->block? xdr_one : xdr_zero;
-	*p++ = (lock->fl.fl_type == F_WRLCK)? xdr_one : xdr_zero;
-	if (!(p = nlm4_encode_lock(p, lock)))
-		return -EIO;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlm4clt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
-{
-	struct nlm_lock	*lock = &argp->lock;
-
-	if (!(p = nlm4_encode_cookie(p, &argp->cookie)))
-		return -EIO;
-	if (!(p = nlm4_encode_lock(p, lock)))
-		return -EIO;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlm4clt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
-	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
-		return -EIO;
-	*p++ = resp->status;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlm4clt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
-	if (!(p = nlm4_encode_testres(p, resp)))
-		return -EIO;
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
-	return 0;
-}
-
-static int
-nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
-{
-	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
-		return -EIO;
-	resp->status = *p++;
-	return 0;
-}
-
-#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
-#  error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
-#endif
-
-#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
-#  error "NLM host name cannot be larger than NLM's maximum string length!"
-#endif
-
-/*
- * Buffer requirements for NLM
- */
-#define NLM4_void_sz		0
-#define NLM4_cookie_sz		1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
-#define NLM4_caller_sz		1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM4_owner_sz		1+XDR_QUADLEN(NLMCLNT_OHSIZE)
-#define NLM4_fhandle_sz		1+XDR_QUADLEN(NFS3_FHSIZE)
-#define NLM4_lock_sz		5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz
-#define NLM4_holder_sz		6+NLM4_owner_sz
-
-#define NLM4_testargs_sz	NLM4_cookie_sz+1+NLM4_lock_sz
-#define NLM4_lockargs_sz	NLM4_cookie_sz+4+NLM4_lock_sz
-#define NLM4_cancargs_sz	NLM4_cookie_sz+2+NLM4_lock_sz
-#define NLM4_unlockargs_sz	NLM4_cookie_sz+NLM4_lock_sz
-
-#define NLM4_testres_sz		NLM4_cookie_sz+1+NLM4_holder_sz
-#define NLM4_res_sz		NLM4_cookie_sz+1
-#define NLM4_norep_sz		0
-
-/*
- * For NLM, a void procedure really returns nothing
- */
-#define nlm4clt_decode_norep	NULL
-
-#define PROC(proc, argtype, restype)					\
-[NLMPROC_##proc] = {							\
-	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdrproc_t) nlm4clt_encode_##argtype,		\
-	.p_decode    = (kxdrproc_t) nlm4clt_decode_##restype,		\
-	.p_arglen    = NLM4_##argtype##_sz,				\
-	.p_replen    = NLM4_##restype##_sz,				\
-	.p_statidx   = NLMPROC_##proc,					\
-	.p_name      = #proc,						\
-	}
-
-static struct rpc_procinfo	nlm4_procedures[] = {
-    PROC(TEST,		testargs,	testres),
-    PROC(LOCK,		lockargs,	res),
-    PROC(CANCEL,	cancargs,	res),
-    PROC(UNLOCK,	unlockargs,	res),
-    PROC(GRANTED,	testargs,	res),
-    PROC(TEST_MSG,	testargs,	norep),
-    PROC(LOCK_MSG,	lockargs,	norep),
-    PROC(CANCEL_MSG,	cancargs,	norep),
-    PROC(UNLOCK_MSG,	unlockargs,	norep),
-    PROC(GRANTED_MSG,	testargs,	norep),
-    PROC(TEST_RES,	testres,	norep),
-    PROC(LOCK_RES,	res,		norep),
-    PROC(CANCEL_RES,	res,		norep),
-    PROC(UNLOCK_RES,	res,		norep),
-    PROC(GRANTED_RES,	res,		norep),
-#ifdef NLMCLNT_SUPPORT_SHARES
-    PROC(SHARE,		shareargs,	shareres),
-    PROC(UNSHARE,	shareargs,	shareres),
-    PROC(NM_LOCK,	lockargs,	res),
-    PROC(FREE_ALL,	notify,		void),
-#endif
-};
-
-struct rpc_version	nlm_version4 = {
-	.number		= 4,
-	.nrprocs	= 24,
-	.procs		= nlm4_procedures,
-};

+ 70 - 13
fs/nfs/callback.c

@@ -16,9 +16,7 @@
 #include <linux/freezer.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/kthread.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/svcauth_gss.h>
-#if defined(CONFIG_NFS_V4_1)
 #include <linux/sunrpc/bc_xprt.h>
 #include <linux/sunrpc/bc_xprt.h>
-#endif
 
 
 #include <net/inet_sock.h>
 #include <net/inet_sock.h>
 
 
@@ -136,6 +134,33 @@ out_err:
 }
 }
 
 
 #if defined(CONFIG_NFS_V4_1)
 #if defined(CONFIG_NFS_V4_1)
+/*
+ *  * CB_SEQUENCE operations will fail until the callback sessionid is set.
+ *   */
+int nfs4_set_callback_sessionid(struct nfs_client *clp)
+{
+	struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
+	struct nfs4_sessionid *bc_sid;
+
+	if (!serv->sv_bc_xprt)
+		return -EINVAL;
+
+	/* on success freed in xprt_free */
+	bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
+	if (!bc_sid)
+		return -ENOMEM;
+	memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
+		NFS4_MAX_SESSIONID_LEN);
+	spin_lock_bh(&serv->sv_cb_lock);
+	serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
+	spin_unlock_bh(&serv->sv_cb_lock);
+	dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
+		((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
+		((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
+		serv->sv_bc_xprt);
+	return 0;
+}
+
 /*
 /*
  * The callback service for NFSv4.1 callbacks
  * The callback service for NFSv4.1 callbacks
  */
  */
@@ -177,30 +202,38 @@ nfs41_callback_svc(void *vrqstp)
 struct svc_rqst *
 struct svc_rqst *
 nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
 nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
 {
 {
-	struct svc_xprt *bc_xprt;
-	struct svc_rqst *rqstp = ERR_PTR(-ENOMEM);
+	struct svc_rqst *rqstp;
+	int ret;
 
 
-	dprintk("--> %s\n", __func__);
-	/* Create a svc_sock for the service */
-	bc_xprt = svc_sock_create(serv, xprt->prot);
-	if (!bc_xprt)
+	/*
+	 * Create an svc_sock for the back channel service that shares the
+	 * fore channel connection.
+	 * Returns the input port (0) and sets the svc_serv bc_xprt on success
+	 */
+	ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,
+			      SVC_SOCK_ANONYMOUS);
+	if (ret < 0) {
+		rqstp = ERR_PTR(ret);
 		goto out;
 		goto out;
+	}
 
 
 	/*
 	/*
 	 * Save the svc_serv in the transport so that it can
 	 * Save the svc_serv in the transport so that it can
 	 * be referenced when the session backchannel is initialized
 	 * be referenced when the session backchannel is initialized
 	 */
 	 */
-	serv->bc_xprt = bc_xprt;
 	xprt->bc_serv = serv;
 	xprt->bc_serv = serv;
 
 
 	INIT_LIST_HEAD(&serv->sv_cb_list);
 	INIT_LIST_HEAD(&serv->sv_cb_list);
 	spin_lock_init(&serv->sv_cb_lock);
 	spin_lock_init(&serv->sv_cb_lock);
 	init_waitqueue_head(&serv->sv_cb_waitq);
 	init_waitqueue_head(&serv->sv_cb_waitq);
 	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
 	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
-	if (IS_ERR(rqstp))
-		svc_sock_destroy(bc_xprt);
+	if (IS_ERR(rqstp)) {
+		svc_xprt_put(serv->sv_bc_xprt);
+		serv->sv_bc_xprt = NULL;
+	}
 out:
 out:
-	dprintk("--> %s return %p\n", __func__, rqstp);
+	dprintk("--> %s return %ld\n", __func__,
+		IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0);
 	return rqstp;
 	return rqstp;
 }
 }
 
 
@@ -233,6 +266,10 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
 		struct nfs_callback_data *cb_info)
 		struct nfs_callback_data *cb_info)
 {
 {
 }
 }
+int nfs4_set_callback_sessionid(struct nfs_client *clp)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* CONFIG_NFS_V4_1 */
 
 
 /*
 /*
@@ -328,6 +365,9 @@ static int check_gss_callback_principal(struct nfs_client *clp,
 	struct rpc_clnt *r = clp->cl_rpcclient;
 	struct rpc_clnt *r = clp->cl_rpcclient;
 	char *p = svc_gss_principal(rqstp);
 	char *p = svc_gss_principal(rqstp);
 
 
+	/* No RPC_AUTH_GSS on NFSv4.1 back channel yet */
+	if (clp->cl_minorversion != 0)
+		return SVC_DROP;
 	/*
 	/*
 	 * It might just be a normal user principal, in which case
 	 * It might just be a normal user principal, in which case
 	 * userspace won't bother to tell us the name at all.
 	 * userspace won't bother to tell us the name at all.
@@ -345,6 +385,23 @@ static int check_gss_callback_principal(struct nfs_client *clp,
 	return SVC_OK;
 	return SVC_OK;
 }
 }
 
 
+/* pg_authenticate method helper */
+static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp)
+{
+	struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp);
+	int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0;
+
+	dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc);
+	if (svc_is_backchannel(rqstp))
+		/* Sessionid (usually) set after CB_NULL ping */
+		return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid,
+						  is_cb_compound);
+	else
+		/* No callback identifier in pg_authenticate */
+		return nfs4_find_client_no_ident(svc_addr(rqstp));
+}
+
+/* pg_authenticate method for nfsv4 callback threads. */
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
 {
 	struct nfs_client *clp;
 	struct nfs_client *clp;
@@ -352,7 +409,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 	int ret = SVC_OK;
 	int ret = SVC_OK;
 
 
 	/* Don't talk to strangers */
 	/* Don't talk to strangers */
-	clp = nfs_find_client(svc_addr(rqstp), 4);
+	clp = nfs_cb_find_client(rqstp);
 	if (clp == NULL)
 	if (clp == NULL)
 		return SVC_DROP;
 		return SVC_DROP;
 
 

+ 51 - 8
fs/nfs/callback.h

@@ -34,10 +34,17 @@ enum nfs4_callback_opnum {
 	OP_CB_ILLEGAL = 10044,
 	OP_CB_ILLEGAL = 10044,
 };
 };
 
 
+struct cb_process_state {
+	__be32			drc_status;
+	struct nfs_client	*clp;
+	struct nfs4_sessionid	*svc_sid; /* v4.1 callback service sessionid */
+};
+
 struct cb_compound_hdr_arg {
 struct cb_compound_hdr_arg {
 	unsigned int taglen;
 	unsigned int taglen;
 	const char *tag;
 	const char *tag;
 	unsigned int minorversion;
 	unsigned int minorversion;
+	unsigned int cb_ident; /* v4.0 callback identifier */
 	unsigned nops;
 	unsigned nops;
 };
 };
 
 
@@ -103,14 +110,23 @@ struct cb_sequenceres {
 	uint32_t			csr_target_highestslotid;
 	uint32_t			csr_target_highestslotid;
 };
 };
 
 
-extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
-				       struct cb_sequenceres *res);
+extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
+				       struct cb_sequenceres *res,
+				       struct cb_process_state *cps);
 
 
 extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
 extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
 					     const nfs4_stateid *stateid);
 					     const nfs4_stateid *stateid);
 
 
 #define RCA4_TYPE_MASK_RDATA_DLG	0
 #define RCA4_TYPE_MASK_RDATA_DLG	0
 #define RCA4_TYPE_MASK_WDATA_DLG	1
 #define RCA4_TYPE_MASK_WDATA_DLG	1
+#define RCA4_TYPE_MASK_DIR_DLG         2
+#define RCA4_TYPE_MASK_FILE_LAYOUT     3
+#define RCA4_TYPE_MASK_BLK_LAYOUT      4
+#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN  8
+#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX  9
+#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
+#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
+#define RCA4_TYPE_MASK_ALL 0xf31f
 
 
 struct cb_recallanyargs {
 struct cb_recallanyargs {
 	struct sockaddr	*craa_addr;
 	struct sockaddr	*craa_addr;
@@ -118,25 +134,52 @@ struct cb_recallanyargs {
 	uint32_t	craa_type_mask;
 	uint32_t	craa_type_mask;
 };
 };
 
 
-extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
+extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
+					void *dummy,
+					struct cb_process_state *cps);
 
 
 struct cb_recallslotargs {
 struct cb_recallslotargs {
 	struct sockaddr	*crsa_addr;
 	struct sockaddr	*crsa_addr;
 	uint32_t	crsa_target_max_slots;
 	uint32_t	crsa_target_max_slots;
 };
 };
-extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
-					  void *dummy);
+extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
+					 void *dummy,
+					 struct cb_process_state *cps);
+
+struct cb_layoutrecallargs {
+	struct sockaddr		*cbl_addr;
+	uint32_t		cbl_recall_type;
+	uint32_t		cbl_layout_type;
+	uint32_t		cbl_layoutchanged;
+	union {
+		struct {
+			struct nfs_fh		cbl_fh;
+			struct pnfs_layout_range cbl_range;
+			nfs4_stateid		cbl_stateid;
+		};
+		struct nfs_fsid		cbl_fsid;
+	};
+};
 
 
-#endif /* CONFIG_NFS_V4_1 */
+extern unsigned nfs4_callback_layoutrecall(
+	struct cb_layoutrecallargs *args,
+	void *dummy, struct cb_process_state *cps);
 
 
-extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
-extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
+extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
+extern void nfs4_cb_take_slot(struct nfs_client *clp);
+#endif /* CONFIG_NFS_V4_1 */
 
 
+extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+				    struct cb_getattrres *res,
+				    struct cb_process_state *cps);
+extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+				   struct cb_process_state *cps);
 #ifdef CONFIG_NFS_V4
 #ifdef CONFIG_NFS_V4
 extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
 extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
 extern void nfs_callback_down(int minorversion);
 extern void nfs_callback_down(int minorversion);
 extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
 extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
 					    const nfs4_stateid *stateid);
 					    const nfs4_stateid *stateid);
+extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
 #endif /* CONFIG_NFS_V4 */
 #endif /* CONFIG_NFS_V4 */
 /*
 /*
  * nfs41: Callbacks are expected to not cause substantial latency,
  * nfs41: Callbacks are expected to not cause substantial latency,

+ 221 - 105
fs/nfs/callback_proc.c

@@ -12,30 +12,33 @@
 #include "callback.h"
 #include "callback.h"
 #include "delegation.h"
 #include "delegation.h"
 #include "internal.h"
 #include "internal.h"
+#include "pnfs.h"
 
 
 #ifdef NFS_DEBUG
 #ifdef NFS_DEBUG
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 #endif
 #endif
- 
-__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
+
+__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+			     struct cb_getattrres *res,
+			     struct cb_process_state *cps)
 {
 {
-	struct nfs_client *clp;
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
 	struct inode *inode;
 
 
+	res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+	if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
+		goto out;
+
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->status = htonl(NFS4ERR_BADHANDLE);
 	res->status = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs_find_client(args->addr, 4);
-	if (clp == NULL)
-		goto out;
 
 
 	dprintk("NFS: GETATTR callback request from %s\n",
 	dprintk("NFS: GETATTR callback request from %s\n",
-		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+		rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
 
 
-	inode = nfs_delegation_find_inode(clp, &args->fh);
+	inode = nfs_delegation_find_inode(cps->clp, &args->fh);
 	if (inode == NULL)
 	if (inode == NULL)
-		goto out_putclient;
+		goto out;
 	nfsi = NFS_I(inode);
 	nfsi = NFS_I(inode);
 	rcu_read_lock();
 	rcu_read_lock();
 	delegation = rcu_dereference(nfsi->delegation);
 	delegation = rcu_dereference(nfsi->delegation);
@@ -55,49 +58,41 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
 out_iput:
 out_iput:
 	rcu_read_unlock();
 	rcu_read_unlock();
 	iput(inode);
 	iput(inode);
-out_putclient:
-	nfs_put_client(clp);
 out:
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
 	return res->status;
 	return res->status;
 }
 }
 
 
-__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+			    struct cb_process_state *cps)
 {
 {
-	struct nfs_client *clp;
 	struct inode *inode;
 	struct inode *inode;
 	__be32 res;
 	__be32 res;
 	
 	
-	res = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs_find_client(args->addr, 4);
-	if (clp == NULL)
+	res = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+	if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
 		goto out;
 		goto out;
 
 
 	dprintk("NFS: RECALL callback request from %s\n",
 	dprintk("NFS: RECALL callback request from %s\n",
-		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
-
-	do {
-		struct nfs_client *prev = clp;
-
-		inode = nfs_delegation_find_inode(clp, &args->fh);
-		if (inode != NULL) {
-			/* Set up a helper thread to actually return the delegation */
-			switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
-				case 0:
-					res = 0;
-					break;
-				case -ENOENT:
-					if (res != 0)
-						res = htonl(NFS4ERR_BAD_STATEID);
-					break;
-				default:
-					res = htonl(NFS4ERR_RESOURCE);
-			}
-			iput(inode);
-		}
-		clp = nfs_find_client_next(prev);
-		nfs_put_client(prev);
-	} while (clp != NULL);
+		rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+
+	res = htonl(NFS4ERR_BADHANDLE);
+	inode = nfs_delegation_find_inode(cps->clp, &args->fh);
+	if (inode == NULL)
+		goto out;
+	/* Set up a helper thread to actually return the delegation */
+	switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
+	case 0:
+		res = 0;
+		break;
+	case -ENOENT:
+		if (res != 0)
+			res = htonl(NFS4ERR_BAD_STATEID);
+		break;
+	default:
+		res = htonl(NFS4ERR_RESOURCE);
+	}
+	iput(inode);
 out:
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
 	return res;
 	return res;
@@ -113,6 +108,139 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
 
 
 #if defined(CONFIG_NFS_V4_1)
 #if defined(CONFIG_NFS_V4_1)
 
 
+static u32 initiate_file_draining(struct nfs_client *clp,
+				  struct cb_layoutrecallargs *args)
+{
+	struct pnfs_layout_hdr *lo;
+	struct inode *ino;
+	bool found = false;
+	u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
+	LIST_HEAD(free_me_list);
+
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+		if (nfs_compare_fh(&args->cbl_fh,
+				   &NFS_I(lo->plh_inode)->fh))
+			continue;
+		ino = igrab(lo->plh_inode);
+		if (!ino)
+			continue;
+		found = true;
+		/* Without this, layout can be freed as soon
+		 * as we release cl_lock.
+		 */
+		get_layout_hdr(lo);
+		break;
+	}
+	spin_unlock(&clp->cl_lock);
+	if (!found)
+		return NFS4ERR_NOMATCHING_LAYOUT;
+
+	spin_lock(&ino->i_lock);
+	if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+	    mark_matching_lsegs_invalid(lo, &free_me_list,
+					args->cbl_range.iomode))
+		rv = NFS4ERR_DELAY;
+	else
+		rv = NFS4ERR_NOMATCHING_LAYOUT;
+	pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
+	spin_unlock(&ino->i_lock);
+	pnfs_free_lseg_list(&free_me_list);
+	put_layout_hdr(lo);
+	iput(ino);
+	return rv;
+}
+
+static u32 initiate_bulk_draining(struct nfs_client *clp,
+				  struct cb_layoutrecallargs *args)
+{
+	struct pnfs_layout_hdr *lo;
+	struct inode *ino;
+	u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
+	struct pnfs_layout_hdr *tmp;
+	LIST_HEAD(recall_list);
+	LIST_HEAD(free_me_list);
+	struct pnfs_layout_range range = {
+		.iomode = IOMODE_ANY,
+		.offset = 0,
+		.length = NFS4_MAX_UINT64,
+	};
+
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+		if ((args->cbl_recall_type == RETURN_FSID) &&
+		    memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
+			   &args->cbl_fsid, sizeof(struct nfs_fsid)))
+			continue;
+		if (!igrab(lo->plh_inode))
+			continue;
+		get_layout_hdr(lo);
+		BUG_ON(!list_empty(&lo->plh_bulk_recall));
+		list_add(&lo->plh_bulk_recall, &recall_list);
+	}
+	spin_unlock(&clp->cl_lock);
+	list_for_each_entry_safe(lo, tmp,
+				 &recall_list, plh_bulk_recall) {
+		ino = lo->plh_inode;
+		spin_lock(&ino->i_lock);
+		set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+		if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
+			rv = NFS4ERR_DELAY;
+		list_del_init(&lo->plh_bulk_recall);
+		spin_unlock(&ino->i_lock);
+		put_layout_hdr(lo);
+		iput(ino);
+	}
+	pnfs_free_lseg_list(&free_me_list);
+	return rv;
+}
+
+static u32 do_callback_layoutrecall(struct nfs_client *clp,
+				    struct cb_layoutrecallargs *args)
+{
+	u32 res = NFS4ERR_DELAY;
+
+	dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
+	if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
+		goto out;
+	if (args->cbl_recall_type == RETURN_FILE)
+		res = initiate_file_draining(clp, args);
+	else
+		res = initiate_bulk_draining(clp, args);
+	clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
+out:
+	dprintk("%s returning %i\n", __func__, res);
+	return res;
+
+}
+
+__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
+				  void *dummy, struct cb_process_state *cps)
+{
+	u32 res;
+
+	dprintk("%s: -->\n", __func__);
+
+	if (cps->clp)
+		res = do_callback_layoutrecall(cps->clp, args);
+	else
+		res = NFS4ERR_OP_NOT_IN_SESSION;
+
+	dprintk("%s: exit with status = %d\n", __func__, res);
+	return cpu_to_be32(res);
+}
+
+static void pnfs_recall_all_layouts(struct nfs_client *clp)
+{
+	struct cb_layoutrecallargs args;
+
+	/* Pretend we got a CB_LAYOUTRECALL(ALL) */
+	memset(&args, 0, sizeof(args));
+	args.cbl_recall_type = RETURN_ALL;
+	/* FIXME we ignore errors, what should we do? */
+	do_callback_layoutrecall(clp, &args);
+}
+
 int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
 int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
 {
 {
 	if (delegation == NULL)
 	if (delegation == NULL)
@@ -184,42 +312,6 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
 	return htonl(NFS4ERR_SEQ_MISORDERED);
 	return htonl(NFS4ERR_SEQ_MISORDERED);
 }
 }
 
 
-/*
- * Returns a pointer to a held 'struct nfs_client' that matches the server's
- * address, major version number, and session ID.  It is the caller's
- * responsibility to release the returned reference.
- *
- * Returns NULL if there are no connections with sessions, or if no session
- * matches the one of interest.
- */
- static struct nfs_client *find_client_with_session(
-	const struct sockaddr *addr, u32 nfsversion,
-	struct nfs4_sessionid *sessionid)
-{
-	struct nfs_client *clp;
-
-	clp = nfs_find_client(addr, 4);
-	if (clp == NULL)
-		return NULL;
-
-	do {
-		struct nfs_client *prev = clp;
-
-		if (clp->cl_session != NULL) {
-			if (memcmp(clp->cl_session->sess_id.data,
-					sessionid->data,
-					NFS4_MAX_SESSIONID_LEN) == 0) {
-				/* Returns a held reference to clp */
-				return clp;
-			}
-		}
-		clp = nfs_find_client_next(prev);
-		nfs_put_client(prev);
-	} while (clp != NULL);
-
-	return NULL;
-}
-
 /*
 /*
  * For each referring call triple, check the session's slot table for
  * For each referring call triple, check the session's slot table for
  * a match.  If the slot is in use and the sequence numbers match, the
  * a match.  If the slot is in use and the sequence numbers match, the
@@ -276,20 +368,34 @@ out:
 }
 }
 
 
 __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
 __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
-				struct cb_sequenceres *res)
+			      struct cb_sequenceres *res,
+			      struct cb_process_state *cps)
 {
 {
 	struct nfs_client *clp;
 	struct nfs_client *clp;
 	int i;
 	int i;
 	__be32 status;
 	__be32 status;
 
 
+	cps->clp = NULL;
+
 	status = htonl(NFS4ERR_BADSESSION);
 	status = htonl(NFS4ERR_BADSESSION);
-	clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid);
+	/* Incoming session must match the callback session */
+	if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN))
+		goto out;
+
+	clp = nfs4_find_client_sessionid(args->csa_addr,
+					 &args->csa_sessionid, 1);
 	if (clp == NULL)
 	if (clp == NULL)
 		goto out;
 		goto out;
 
 
+	/* state manager is resetting the session */
+	if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
+		status = NFS4ERR_DELAY;
+		goto out;
+	}
+
 	status = validate_seqid(&clp->cl_session->bc_slot_table, args);
 	status = validate_seqid(&clp->cl_session->bc_slot_table, args);
 	if (status)
 	if (status)
-		goto out_putclient;
+		goto out;
 
 
 	/*
 	/*
 	 * Check for pending referring calls.  If a match is found, a
 	 * Check for pending referring calls.  If a match is found, a
@@ -298,7 +404,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
 	 */
 	 */
 	if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
 	if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
 		status = htonl(NFS4ERR_DELAY);
 		status = htonl(NFS4ERR_DELAY);
-		goto out_putclient;
+		goto out;
 	}
 	}
 
 
 	memcpy(&res->csr_sessionid, &args->csa_sessionid,
 	memcpy(&res->csr_sessionid, &args->csa_sessionid,
@@ -307,83 +413,93 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
 	res->csr_slotid = args->csa_slotid;
 	res->csr_slotid = args->csa_slotid;
 	res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
 	res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
 	res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
 	res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+	nfs4_cb_take_slot(clp);
+	cps->clp = clp; /* put in nfs4_callback_compound */
 
 
-out_putclient:
-	nfs_put_client(clp);
 out:
 out:
 	for (i = 0; i < args->csa_nrclists; i++)
 	for (i = 0; i < args->csa_nrclists; i++)
 		kfree(args->csa_rclists[i].rcl_refcalls);
 		kfree(args->csa_rclists[i].rcl_refcalls);
 	kfree(args->csa_rclists);
 	kfree(args->csa_rclists);
 
 
-	if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP))
-		res->csr_status = 0;
-	else
+	if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
+		cps->drc_status = status;
+		status = 0;
+	} else
 		res->csr_status = status;
 		res->csr_status = status;
+
 	dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
 	dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
 		ntohl(status), ntohl(res->csr_status));
 		ntohl(status), ntohl(res->csr_status));
 	return status;
 	return status;
 }
 }
 
 
-__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+static bool
+validate_bitmap_values(unsigned long mask)
+{
+	return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
+}
+
+__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
+			       struct cb_process_state *cps)
 {
 {
-	struct nfs_client *clp;
 	__be32 status;
 	__be32 status;
 	fmode_t flags = 0;
 	fmode_t flags = 0;
 
 
-	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
-	clp = nfs_find_client(args->craa_addr, 4);
-	if (clp == NULL)
+	status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+	if (!cps->clp) /* set in cb_sequence */
 		goto out;
 		goto out;
 
 
 	dprintk("NFS: RECALL_ANY callback request from %s\n",
 	dprintk("NFS: RECALL_ANY callback request from %s\n",
-		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+		rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+
+	status = cpu_to_be32(NFS4ERR_INVAL);
+	if (!validate_bitmap_values(args->craa_type_mask))
+		goto out;
 
 
+	status = cpu_to_be32(NFS4_OK);
 	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
 	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
 		     &args->craa_type_mask))
 		     &args->craa_type_mask))
 		flags = FMODE_READ;
 		flags = FMODE_READ;
 	if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
 	if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
 		     &args->craa_type_mask))
 		     &args->craa_type_mask))
 		flags |= FMODE_WRITE;
 		flags |= FMODE_WRITE;
-
+	if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
+		     &args->craa_type_mask))
+		pnfs_recall_all_layouts(cps->clp);
 	if (flags)
 	if (flags)
-		nfs_expire_all_delegation_types(clp, flags);
-	status = htonl(NFS4_OK);
+		nfs_expire_all_delegation_types(cps->clp, flags);
 out:
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	return status;
 	return status;
 }
 }
 
 
 /* Reduce the fore channel's max_slots to the target value */
 /* Reduce the fore channel's max_slots to the target value */
-__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
+__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
+				struct cb_process_state *cps)
 {
 {
-	struct nfs_client *clp;
 	struct nfs4_slot_table *fc_tbl;
 	struct nfs4_slot_table *fc_tbl;
 	__be32 status;
 	__be32 status;
 
 
 	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
 	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
-	clp = nfs_find_client(args->crsa_addr, 4);
-	if (clp == NULL)
+	if (!cps->clp) /* set in cb_sequence */
 		goto out;
 		goto out;
 
 
 	dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
 	dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
-		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+		rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
 		args->crsa_target_max_slots);
 		args->crsa_target_max_slots);
 
 
-	fc_tbl = &clp->cl_session->fc_slot_table;
+	fc_tbl = &cps->clp->cl_session->fc_slot_table;
 
 
 	status = htonl(NFS4ERR_BAD_HIGH_SLOT);
 	status = htonl(NFS4ERR_BAD_HIGH_SLOT);
 	if (args->crsa_target_max_slots > fc_tbl->max_slots ||
 	if (args->crsa_target_max_slots > fc_tbl->max_slots ||
 	    args->crsa_target_max_slots < 1)
 	    args->crsa_target_max_slots < 1)
-		goto out_putclient;
+		goto out;
 
 
 	status = htonl(NFS4_OK);
 	status = htonl(NFS4_OK);
 	if (args->crsa_target_max_slots == fc_tbl->max_slots)
 	if (args->crsa_target_max_slots == fc_tbl->max_slots)
-		goto out_putclient;
+		goto out;
 
 
 	fc_tbl->target_max_slots = args->crsa_target_max_slots;
 	fc_tbl->target_max_slots = args->crsa_target_max_slots;
-	nfs41_handle_recall_slot(clp);
-out_putclient:
-	nfs_put_client(clp);	/* balance nfs_find_client */
+	nfs41_handle_recall_slot(cps->clp);
 out:
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	return status;
 	return status;

+ 127 - 16
fs/nfs/callback_xdr.c

@@ -10,8 +10,10 @@
 #include <linux/nfs4.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
+#include <linux/sunrpc/bc_xprt.h>
 #include "nfs4_fs.h"
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "callback.h"
+#include "internal.h"
 
 
 #define CB_OP_TAGLEN_MAXSZ	(512)
 #define CB_OP_TAGLEN_MAXSZ	(512)
 #define CB_OP_HDR_RES_MAXSZ	(2 + CB_OP_TAGLEN_MAXSZ)
 #define CB_OP_HDR_RES_MAXSZ	(2 + CB_OP_TAGLEN_MAXSZ)
@@ -22,6 +24,7 @@
 #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
 #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
 
 
 #if defined(CONFIG_NFS_V4_1)
 #if defined(CONFIG_NFS_V4_1)
+#define CB_OP_LAYOUTRECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
 #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
 #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
 					4 + 1 + 3)
 					4 + 1 + 3)
 #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
 #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
@@ -33,7 +36,8 @@
 /* Internal error code */
 /* Internal error code */
 #define NFS4ERR_RESOURCE_HDR	11050
 #define NFS4ERR_RESOURCE_HDR	11050
 
 
-typedef __be32 (*callback_process_op_t)(void *, void *);
+typedef __be32 (*callback_process_op_t)(void *, void *,
+					struct cb_process_state *);
 typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
 typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
 typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
 typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
 
 
@@ -160,7 +164,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
 	hdr->minorversion = ntohl(*p++);
 	hdr->minorversion = ntohl(*p++);
 	/* Check minor version is zero or one. */
 	/* Check minor version is zero or one. */
 	if (hdr->minorversion <= 1) {
 	if (hdr->minorversion <= 1) {
-		p++;	/* skip callback_ident */
+		hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
 	} else {
 	} else {
 		printk(KERN_WARNING "%s: NFSv4 server callback with "
 		printk(KERN_WARNING "%s: NFSv4 server callback with "
 			"illegal minor version %u!\n",
 			"illegal minor version %u!\n",
@@ -220,6 +224,66 @@ out:
 
 
 #if defined(CONFIG_NFS_V4_1)
 #if defined(CONFIG_NFS_V4_1)
 
 
+static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
+				       struct xdr_stream *xdr,
+				       struct cb_layoutrecallargs *args)
+{
+	__be32 *p;
+	__be32 status = 0;
+	uint32_t iomode;
+
+	args->cbl_addr = svc_addr(rqstp);
+	p = read_buf(xdr, 4 * sizeof(uint32_t));
+	if (unlikely(p == NULL)) {
+		status = htonl(NFS4ERR_BADXDR);
+		goto out;
+	}
+
+	args->cbl_layout_type = ntohl(*p++);
+	/* Depite the spec's xdr, iomode really belongs in the FILE switch,
+	 * as it is unuseable and ignored with the other types.
+	 */
+	iomode = ntohl(*p++);
+	args->cbl_layoutchanged = ntohl(*p++);
+	args->cbl_recall_type = ntohl(*p++);
+
+	if (args->cbl_recall_type == RETURN_FILE) {
+		args->cbl_range.iomode = iomode;
+		status = decode_fh(xdr, &args->cbl_fh);
+		if (unlikely(status != 0))
+			goto out;
+
+		p = read_buf(xdr, 2 * sizeof(uint64_t));
+		if (unlikely(p == NULL)) {
+			status = htonl(NFS4ERR_BADXDR);
+			goto out;
+		}
+		p = xdr_decode_hyper(p, &args->cbl_range.offset);
+		p = xdr_decode_hyper(p, &args->cbl_range.length);
+		status = decode_stateid(xdr, &args->cbl_stateid);
+		if (unlikely(status != 0))
+			goto out;
+	} else if (args->cbl_recall_type == RETURN_FSID) {
+		p = read_buf(xdr, 2 * sizeof(uint64_t));
+		if (unlikely(p == NULL)) {
+			status = htonl(NFS4ERR_BADXDR);
+			goto out;
+		}
+		p = xdr_decode_hyper(p, &args->cbl_fsid.major);
+		p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
+	} else if (args->cbl_recall_type != RETURN_ALL) {
+		status = htonl(NFS4ERR_BADXDR);
+		goto out;
+	}
+	dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n",
+		__func__,
+		args->cbl_layout_type, iomode,
+		args->cbl_layoutchanged, args->cbl_recall_type);
+out:
+	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+	return status;
+}
+
 static __be32 decode_sessionid(struct xdr_stream *xdr,
 static __be32 decode_sessionid(struct xdr_stream *xdr,
 				 struct nfs4_sessionid *sid)
 				 struct nfs4_sessionid *sid)
 {
 {
@@ -574,10 +638,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
 	case OP_CB_SEQUENCE:
 	case OP_CB_SEQUENCE:
 	case OP_CB_RECALL_ANY:
 	case OP_CB_RECALL_ANY:
 	case OP_CB_RECALL_SLOT:
 	case OP_CB_RECALL_SLOT:
+	case OP_CB_LAYOUTRECALL:
 		*op = &callback_ops[op_nr];
 		*op = &callback_ops[op_nr];
 		break;
 		break;
 
 
-	case OP_CB_LAYOUTRECALL:
 	case OP_CB_NOTIFY_DEVICEID:
 	case OP_CB_NOTIFY_DEVICEID:
 	case OP_CB_NOTIFY:
 	case OP_CB_NOTIFY:
 	case OP_CB_PUSH_DELEG:
 	case OP_CB_PUSH_DELEG:
@@ -593,6 +657,37 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
 	return htonl(NFS_OK);
 	return htonl(NFS_OK);
 }
 }
 
 
+static void nfs4_callback_free_slot(struct nfs4_session *session)
+{
+	struct nfs4_slot_table *tbl = &session->bc_slot_table;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	/*
+	 * Let the state manager know callback processing done.
+	 * A single slot, so highest used slotid is either 0 or -1
+	 */
+	tbl->highest_used_slotid--;
+	nfs4_check_drain_bc_complete(session);
+	spin_unlock(&tbl->slot_tbl_lock);
+}
+
+static void nfs4_cb_free_slot(struct nfs_client *clp)
+{
+	if (clp && clp->cl_session)
+		nfs4_callback_free_slot(clp->cl_session);
+}
+
+/* A single slot, so highest used slotid is either 0 or -1 */
+void nfs4_cb_take_slot(struct nfs_client *clp)
+{
+	struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	tbl->highest_used_slotid++;
+	BUG_ON(tbl->highest_used_slotid != 0);
+	spin_unlock(&tbl->slot_tbl_lock);
+}
+
 #else /* CONFIG_NFS_V4_1 */
 #else /* CONFIG_NFS_V4_1 */
 
 
 static __be32
 static __be32
@@ -601,6 +696,9 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
 	return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
 	return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
 }
 }
 
 
+static void nfs4_cb_free_slot(struct nfs_client *clp)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* CONFIG_NFS_V4_1 */
 
 
 static __be32
 static __be32
@@ -621,7 +719,8 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
 static __be32 process_op(uint32_t minorversion, int nop,
 static __be32 process_op(uint32_t minorversion, int nop,
 		struct svc_rqst *rqstp,
 		struct svc_rqst *rqstp,
 		struct xdr_stream *xdr_in, void *argp,
 		struct xdr_stream *xdr_in, void *argp,
-		struct xdr_stream *xdr_out, void *resp, int* drc_status)
+		struct xdr_stream *xdr_out, void *resp,
+		struct cb_process_state *cps)
 {
 {
 	struct callback_op *op = &callback_ops[0];
 	struct callback_op *op = &callback_ops[0];
 	unsigned int op_nr;
 	unsigned int op_nr;
@@ -644,8 +743,8 @@ static __be32 process_op(uint32_t minorversion, int nop,
 	if (status)
 	if (status)
 		goto encode_hdr;
 		goto encode_hdr;
 
 
-	if (*drc_status) {
-		status = *drc_status;
+	if (cps->drc_status) {
+		status = cps->drc_status;
 		goto encode_hdr;
 		goto encode_hdr;
 	}
 	}
 
 
@@ -653,16 +752,10 @@ static __be32 process_op(uint32_t minorversion, int nop,
 	if (maxlen > 0 && maxlen < PAGE_SIZE) {
 	if (maxlen > 0 && maxlen < PAGE_SIZE) {
 		status = op->decode_args(rqstp, xdr_in, argp);
 		status = op->decode_args(rqstp, xdr_in, argp);
 		if (likely(status == 0))
 		if (likely(status == 0))
-			status = op->process_op(argp, resp);
+			status = op->process_op(argp, resp, cps);
 	} else
 	} else
 		status = htonl(NFS4ERR_RESOURCE);
 		status = htonl(NFS4ERR_RESOURCE);
 
 
-	/* Only set by OP_CB_SEQUENCE processing */
-	if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
-		*drc_status = status;
-		status = 0;
-	}
-
 encode_hdr:
 encode_hdr:
 	res = encode_op_hdr(xdr_out, op_nr, status);
 	res = encode_op_hdr(xdr_out, op_nr, status);
 	if (unlikely(res))
 	if (unlikely(res))
@@ -681,8 +774,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
 	struct cb_compound_hdr_res hdr_res = { NULL };
 	struct xdr_stream xdr_in, xdr_out;
 	struct xdr_stream xdr_in, xdr_out;
-	__be32 *p;
-	__be32 status, drc_status = 0;
+	__be32 *p, status;
+	struct cb_process_state cps = {
+		.drc_status = 0,
+		.clp = NULL,
+	};
 	unsigned int nops = 0;
 	unsigned int nops = 0;
 
 
 	dprintk("%s: start\n", __func__);
 	dprintk("%s: start\n", __func__);
@@ -696,6 +792,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	if (status == __constant_htonl(NFS4ERR_RESOURCE))
 	if (status == __constant_htonl(NFS4ERR_RESOURCE))
 		return rpc_garbage_args;
 		return rpc_garbage_args;
 
 
+	if (hdr_arg.minorversion == 0) {
+		cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
+		if (!cps.clp)
+			return rpc_drop_reply;
+	} else
+		cps.svc_sid = bc_xprt_sid(rqstp);
+
 	hdr_res.taglen = hdr_arg.taglen;
 	hdr_res.taglen = hdr_arg.taglen;
 	hdr_res.tag = hdr_arg.tag;
 	hdr_res.tag = hdr_arg.tag;
 	if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0)
 	if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0)
@@ -703,7 +806,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 
 	while (status == 0 && nops != hdr_arg.nops) {
 	while (status == 0 && nops != hdr_arg.nops) {
 		status = process_op(hdr_arg.minorversion, nops, rqstp,
 		status = process_op(hdr_arg.minorversion, nops, rqstp,
-				    &xdr_in, argp, &xdr_out, resp, &drc_status);
+				    &xdr_in, argp, &xdr_out, resp, &cps);
 		nops++;
 		nops++;
 	}
 	}
 
 
@@ -716,6 +819,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 
 	*hdr_res.status = status;
 	*hdr_res.status = status;
 	*hdr_res.nops = htonl(nops);
 	*hdr_res.nops = htonl(nops);
+	nfs4_cb_free_slot(cps.clp);
+	nfs_put_client(cps.clp);
 	dprintk("%s: done, status = %u\n", __func__, ntohl(status));
 	dprintk("%s: done, status = %u\n", __func__, ntohl(status));
 	return rpc_success;
 	return rpc_success;
 }
 }
@@ -739,6 +844,12 @@ static struct callback_op callback_ops[] = {
 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
 	},
 	},
 #if defined(CONFIG_NFS_V4_1)
 #if defined(CONFIG_NFS_V4_1)
+	[OP_CB_LAYOUTRECALL] = {
+		.process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
+		.decode_args =
+			(callback_decode_arg_t)decode_layoutrecall_args,
+		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
+	},
 	[OP_CB_SEQUENCE] = {
 	[OP_CB_SEQUENCE] = {
 		.process_op = (callback_process_op_t)nfs4_callback_sequence,
 		.process_op = (callback_process_op_t)nfs4_callback_sequence,
 		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
 		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,

+ 210 - 92
fs/nfs/client.c

@@ -56,6 +56,30 @@ static DEFINE_SPINLOCK(nfs_client_lock);
 static LIST_HEAD(nfs_client_list);
 static LIST_HEAD(nfs_client_list);
 static LIST_HEAD(nfs_volume_list);
 static LIST_HEAD(nfs_volume_list);
 static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
 static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
+#ifdef CONFIG_NFS_V4
+static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
+
+/*
+ * Get a unique NFSv4.0 callback identifier which will be used
+ * by the V4.0 callback service to lookup the nfs_client struct
+ */
+static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
+{
+	int ret = 0;
+
+	if (clp->rpc_ops->version != 4 || minorversion != 0)
+		return ret;
+retry:
+	if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL))
+		return -ENOMEM;
+	spin_lock(&nfs_client_lock);
+	ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident);
+	spin_unlock(&nfs_client_lock);
+	if (ret == -EAGAIN)
+		goto retry;
+	return ret;
+}
+#endif /* CONFIG_NFS_V4 */
 
 
 /*
 /*
  * RPC cruft for NFS
  * RPC cruft for NFS
@@ -144,7 +168,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	clp->cl_proto = cl_init->proto;
 	clp->cl_proto = cl_init->proto;
 
 
 #ifdef CONFIG_NFS_V4
 #ifdef CONFIG_NFS_V4
-	INIT_LIST_HEAD(&clp->cl_delegations);
+	err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
+	if (err)
+		goto error_cleanup;
+
 	spin_lock_init(&clp->cl_lock);
 	spin_lock_init(&clp->cl_lock);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -170,21 +197,17 @@ error_0:
 }
 }
 
 
 #ifdef CONFIG_NFS_V4
 #ifdef CONFIG_NFS_V4
-/*
- * Clears/puts all minor version specific parts from an nfs_client struct
- * reverting it to minorversion 0.
- */
-static void nfs4_clear_client_minor_version(struct nfs_client *clp)
-{
 #ifdef CONFIG_NFS_V4_1
 #ifdef CONFIG_NFS_V4_1
-	if (nfs4_has_session(clp)) {
+static void nfs4_shutdown_session(struct nfs_client *clp)
+{
+	if (nfs4_has_session(clp))
 		nfs4_destroy_session(clp->cl_session);
 		nfs4_destroy_session(clp->cl_session);
-		clp->cl_session = NULL;
-	}
-
-	clp->cl_mvops = nfs_v4_minor_ops[0];
-#endif /* CONFIG_NFS_V4_1 */
 }
 }
+#else /* CONFIG_NFS_V4_1 */
+static void nfs4_shutdown_session(struct nfs_client *clp)
+{
+}
+#endif /* CONFIG_NFS_V4_1 */
 
 
 /*
 /*
  * Destroy the NFS4 callback service
  * Destroy the NFS4 callback service
@@ -199,17 +222,49 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 {
 {
 	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
 	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
 		nfs4_kill_renewd(clp);
 		nfs4_kill_renewd(clp);
-	nfs4_clear_client_minor_version(clp);
+	nfs4_shutdown_session(clp);
 	nfs4_destroy_callback(clp);
 	nfs4_destroy_callback(clp);
 	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
 	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
 		nfs_idmap_delete(clp);
 		nfs_idmap_delete(clp);
 
 
 	rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
 	rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
 }
 }
+
+/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
+void nfs_cleanup_cb_ident_idr(void)
+{
+	idr_destroy(&cb_ident_idr);
+}
+
+/* nfs_client_lock held */
+static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
+{
+	if (clp->cl_cb_ident)
+		idr_remove(&cb_ident_idr, clp->cl_cb_ident);
+}
+
+static void pnfs_init_server(struct nfs_server *server)
+{
+	rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
+}
+
 #else
 #else
 static void nfs4_shutdown_client(struct nfs_client *clp)
 static void nfs4_shutdown_client(struct nfs_client *clp)
 {
 {
 }
 }
+
+void nfs_cleanup_cb_ident_idr(void)
+{
+}
+
+static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
+{
+}
+
+static void pnfs_init_server(struct nfs_server *server)
+{
+}
+
 #endif /* CONFIG_NFS_V4 */
 #endif /* CONFIG_NFS_V4 */
 
 
 /*
 /*
@@ -248,6 +303,7 @@ void nfs_put_client(struct nfs_client *clp)
 
 
 	if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
 	if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
 		list_del(&clp->cl_share_link);
 		list_del(&clp->cl_share_link);
+		nfs_cb_idr_remove_locked(clp);
 		spin_unlock(&nfs_client_lock);
 		spin_unlock(&nfs_client_lock);
 
 
 		BUG_ON(!list_empty(&clp->cl_superblocks));
 		BUG_ON(!list_empty(&clp->cl_superblocks));
@@ -363,70 +419,28 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
 	return 0;
 	return 0;
 }
 }
 
 
-/*
- * Find a client by IP address and protocol version
- * - returns NULL if no such client
- */
-struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
-{
-	struct nfs_client *clp;
-
-	spin_lock(&nfs_client_lock);
-	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
-		struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
-
-		/* Don't match clients that failed to initialise properly */
-		if (!(clp->cl_cons_state == NFS_CS_READY ||
-		      clp->cl_cons_state == NFS_CS_SESSION_INITING))
-			continue;
-
-		/* Different NFS versions cannot share the same nfs_client */
-		if (clp->rpc_ops->version != nfsversion)
-			continue;
-
-		/* Match only the IP address, not the port number */
-		if (!nfs_sockaddr_match_ipaddr(addr, clap))
-			continue;
-
-		atomic_inc(&clp->cl_count);
-		spin_unlock(&nfs_client_lock);
-		return clp;
-	}
-	spin_unlock(&nfs_client_lock);
-	return NULL;
-}
-
-/*
- * Find a client by IP address and protocol version
- * - returns NULL if no such client
- */
-struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
+/* Common match routine for v4.0 and v4.1 callback services */
+bool
+nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
+		     u32 minorversion)
 {
 {
-	struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr;
-	u32 nfsvers = clp->rpc_ops->version;
+	struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 
 
-	spin_lock(&nfs_client_lock);
-	list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) {
-		struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+	/* Don't match clients that failed to initialise */
+	if (!(clp->cl_cons_state == NFS_CS_READY ||
+	    clp->cl_cons_state == NFS_CS_SESSION_INITING))
+		return false;
 
 
-		/* Don't match clients that failed to initialise properly */
-		if (clp->cl_cons_state != NFS_CS_READY)
-			continue;
+	/* Match the version and minorversion */
+	if (clp->rpc_ops->version != 4 ||
+	    clp->cl_minorversion != minorversion)
+		return false;
 
 
-		/* Different NFS versions cannot share the same nfs_client */
-		if (clp->rpc_ops->version != nfsvers)
-			continue;
+	/* Match only the IP address, not the port number */
+	if (!nfs_sockaddr_match_ipaddr(addr, clap))
+		return false;
 
 
-		/* Match only the IP address, not the port number */
-		if (!nfs_sockaddr_match_ipaddr(sap, clap))
-			continue;
-
-		atomic_inc(&clp->cl_count);
-		spin_unlock(&nfs_client_lock);
-		return clp;
-	}
-	spin_unlock(&nfs_client_lock);
-	return NULL;
+	return true;
 }
 }
 
 
 /*
 /*
@@ -988,6 +1002,27 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
 	target->options = source->options;
 	target->options = source->options;
 }
 }
 
 
+static void nfs_server_insert_lists(struct nfs_server *server)
+{
+	struct nfs_client *clp = server->nfs_client;
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+}
+
+static void nfs_server_remove_lists(struct nfs_server *server)
+{
+	spin_lock(&nfs_client_lock);
+	list_del_rcu(&server->client_link);
+	list_del(&server->master_link);
+	spin_unlock(&nfs_client_lock);
+
+	synchronize_rcu();
+}
+
 /*
 /*
  * Allocate and initialise a server record
  * Allocate and initialise a server record
  */
  */
@@ -1004,6 +1039,7 @@ static struct nfs_server *nfs_alloc_server(void)
 	/* Zero out the NFS state stuff */
 	/* Zero out the NFS state stuff */
 	INIT_LIST_HEAD(&server->client_link);
 	INIT_LIST_HEAD(&server->client_link);
 	INIT_LIST_HEAD(&server->master_link);
 	INIT_LIST_HEAD(&server->master_link);
+	INIT_LIST_HEAD(&server->delegations);
 
 
 	atomic_set(&server->active, 0);
 	atomic_set(&server->active, 0);
 
 
@@ -1019,6 +1055,8 @@ static struct nfs_server *nfs_alloc_server(void)
 		return NULL;
 		return NULL;
 	}
 	}
 
 
+	pnfs_init_server(server);
+
 	return server;
 	return server;
 }
 }
 
 
@@ -1029,11 +1067,8 @@ void nfs_free_server(struct nfs_server *server)
 {
 {
 	dprintk("--> nfs_free_server()\n");
 	dprintk("--> nfs_free_server()\n");
 
 
+	nfs_server_remove_lists(server);
 	unset_pnfs_layoutdriver(server);
 	unset_pnfs_layoutdriver(server);
-	spin_lock(&nfs_client_lock);
-	list_del(&server->client_link);
-	list_del(&server->master_link);
-	spin_unlock(&nfs_client_lock);
 
 
 	if (server->destroy != NULL)
 	if (server->destroy != NULL)
 		server->destroy(server);
 		server->destroy(server);
@@ -1108,11 +1143,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
 		(unsigned long long) server->fsid.major,
 		(unsigned long long) server->fsid.major,
 		(unsigned long long) server->fsid.minor);
 		(unsigned long long) server->fsid.minor);
 
 
-	spin_lock(&nfs_client_lock);
-	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
-	list_add_tail(&server->master_link, &nfs_volume_list);
-	spin_unlock(&nfs_client_lock);
-
+	nfs_server_insert_lists(server);
 	server->mount_time = jiffies;
 	server->mount_time = jiffies;
 	nfs_free_fattr(fattr);
 	nfs_free_fattr(fattr);
 	return server;
 	return server;
@@ -1124,6 +1155,101 @@ error:
 }
 }
 
 
 #ifdef CONFIG_NFS_V4
 #ifdef CONFIG_NFS_V4
+/*
+ * NFSv4.0 callback thread helper
+ *
+ * Find a client by IP address, protocol version, and minorversion
+ *
+ * Called from the pg_authenticate method. The callback identifier
+ * is not used as it has not been decoded.
+ *
+ * Returns NULL if no such client
+ */
+struct nfs_client *
+nfs4_find_client_no_ident(const struct sockaddr *addr)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		if (nfs4_cb_match_client(addr, clp, 0) == false)
+			continue;
+		atomic_inc(&clp->cl_count);
+		spin_unlock(&nfs_client_lock);
+		return clp;
+	}
+	spin_unlock(&nfs_client_lock);
+	return NULL;
+}
+
+/*
+ * NFSv4.0 callback thread helper
+ *
+ * Find a client by callback identifier
+ */
+struct nfs_client *
+nfs4_find_client_ident(int cb_ident)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	clp = idr_find(&cb_ident_idr, cb_ident);
+	if (clp)
+		atomic_inc(&clp->cl_count);
+	spin_unlock(&nfs_client_lock);
+	return clp;
+}
+
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * NFSv4.1 callback thread helper
+ * For CB_COMPOUND calls, find a client by IP address, protocol version,
+ * minorversion, and sessionID
+ *
+ * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service
+ * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL
+ * can arrive before the callback sessionid is set. For CB_NULL calls,
+ * find a client by IP address protocol version, and minorversion.
+ *
+ * Returns NULL if no such client
+ */
+struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *addr,
+			   struct nfs4_sessionid *sid, int is_cb_compound)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		if (nfs4_cb_match_client(addr, clp, 1) == false)
+			continue;
+
+		if (!nfs4_has_session(clp))
+			continue;
+
+		/* Match sessionid unless cb_null call*/
+		if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data,
+		    sid->data, NFS4_MAX_SESSIONID_LEN) != 0))
+			continue;
+
+		atomic_inc(&clp->cl_count);
+		spin_unlock(&nfs_client_lock);
+		return clp;
+	}
+	spin_unlock(&nfs_client_lock);
+	return NULL;
+}
+
+#else /* CONFIG_NFS_V4_1 */
+
+struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *addr,
+			   struct nfs4_sessionid *sid, int is_cb_compound)
+{
+	return NULL;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 /*
 /*
  * Initialize the NFS4 callback service
  * Initialize the NFS4 callback service
  */
  */
@@ -1342,11 +1468,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
 	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
 	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
 		server->namelen = NFS4_MAXNAMLEN;
 		server->namelen = NFS4_MAXNAMLEN;
 
 
-	spin_lock(&nfs_client_lock);
-	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
-	list_add_tail(&server->master_link, &nfs_volume_list);
-	spin_unlock(&nfs_client_lock);
-
+	nfs_server_insert_lists(server);
 	server->mount_time = jiffies;
 	server->mount_time = jiffies;
 out:
 out:
 	nfs_free_fattr(fattr);
 	nfs_free_fattr(fattr);
@@ -1551,11 +1673,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	if (error < 0)
 	if (error < 0)
 		goto out_free_server;
 		goto out_free_server;
 
 
-	spin_lock(&nfs_client_lock);
-	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
-	list_add_tail(&server->master_link, &nfs_volume_list);
-	spin_unlock(&nfs_client_lock);
-
+	nfs_server_insert_lists(server);
 	server->mount_time = jiffies;
 	server->mount_time = jiffies;
 
 
 	nfs_free_fattr(fattr_fsinfo);
 	nfs_free_fattr(fattr_fsinfo);

+ 259 - 103
fs/nfs/delegation.c

@@ -40,11 +40,23 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
 	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
 	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
 }
 }
 
 
+/**
+ * nfs_mark_delegation_referenced - set delegation's REFERENCED flag
+ * @delegation: delegation to process
+ *
+ */
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
 {
 {
 	set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
 	set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
 }
 }
 
 
+/**
+ * nfs_have_delegation - check if inode has a delegation
+ * @inode: inode to check
+ * @flags: delegation types to check for
+ *
+ * Returns one if inode has the indicated delegation, otherwise zero.
+ */
 int nfs_have_delegation(struct inode *inode, fmode_t flags)
 int nfs_have_delegation(struct inode *inode, fmode_t flags)
 {
 {
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
@@ -119,10 +131,15 @@ again:
 	return 0;
 	return 0;
 }
 }
 
 
-/*
- * Set up a delegation on an inode
+/**
+ * nfs_inode_reclaim_delegation - process a delegation reclaim request
+ * @inode: inode to process
+ * @cred: credential to use for request
+ * @res: new delegation state from server
+ *
  */
  */
-void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
+				  struct nfs_openres *res)
 {
 {
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
 	struct rpc_cred *oldcred = NULL;
 	struct rpc_cred *oldcred = NULL;
@@ -175,38 +192,52 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
 	return inode;
 	return inode;
 }
 }
 
 
-static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
-							   const nfs4_stateid *stateid,
-							   struct nfs_client *clp)
+static struct nfs_delegation *
+nfs_detach_delegation_locked(struct nfs_inode *nfsi,
+			     struct nfs_server *server)
 {
 {
 	struct nfs_delegation *delegation =
 	struct nfs_delegation *delegation =
 		rcu_dereference_protected(nfsi->delegation,
 		rcu_dereference_protected(nfsi->delegation,
-					  lockdep_is_held(&clp->cl_lock));
+				lockdep_is_held(&server->nfs_client->cl_lock));
 
 
 	if (delegation == NULL)
 	if (delegation == NULL)
 		goto nomatch;
 		goto nomatch;
+
 	spin_lock(&delegation->lock);
 	spin_lock(&delegation->lock);
-	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
-				sizeof(delegation->stateid.data)) != 0)
-		goto nomatch_unlock;
 	list_del_rcu(&delegation->super_list);
 	list_del_rcu(&delegation->super_list);
 	delegation->inode = NULL;
 	delegation->inode = NULL;
 	nfsi->delegation_state = 0;
 	nfsi->delegation_state = 0;
 	rcu_assign_pointer(nfsi->delegation, NULL);
 	rcu_assign_pointer(nfsi->delegation, NULL);
 	spin_unlock(&delegation->lock);
 	spin_unlock(&delegation->lock);
 	return delegation;
 	return delegation;
-nomatch_unlock:
-	spin_unlock(&delegation->lock);
 nomatch:
 nomatch:
 	return NULL;
 	return NULL;
 }
 }
 
 
-/*
- * Set up a delegation on an inode
+static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
+						    struct nfs_server *server)
+{
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs_delegation *delegation;
+
+	spin_lock(&clp->cl_lock);
+	delegation = nfs_detach_delegation_locked(nfsi, server);
+	spin_unlock(&clp->cl_lock);
+	return delegation;
+}
+
+/**
+ * nfs_inode_set_delegation - set up a delegation on an inode
+ * @inode: inode to which delegation applies
+ * @cred: cred to use for subsequent delegation processing
+ * @res: new delegation state from server
+ *
+ * Returns zero on success, or a negative errno value.
  */
  */
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation, *old_delegation;
 	struct nfs_delegation *delegation, *old_delegation;
 	struct nfs_delegation *freeme = NULL;
 	struct nfs_delegation *freeme = NULL;
@@ -227,7 +258,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 
 
 	spin_lock(&clp->cl_lock);
 	spin_lock(&clp->cl_lock);
 	old_delegation = rcu_dereference_protected(nfsi->delegation,
 	old_delegation = rcu_dereference_protected(nfsi->delegation,
-						   lockdep_is_held(&clp->cl_lock));
+					lockdep_is_held(&clp->cl_lock));
 	if (old_delegation != NULL) {
 	if (old_delegation != NULL) {
 		if (memcmp(&delegation->stateid, &old_delegation->stateid,
 		if (memcmp(&delegation->stateid, &old_delegation->stateid,
 					sizeof(old_delegation->stateid)) == 0 &&
 					sizeof(old_delegation->stateid)) == 0 &&
@@ -246,9 +277,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 			delegation = NULL;
 			delegation = NULL;
 			goto out;
 			goto out;
 		}
 		}
-		freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
+		freeme = nfs_detach_delegation_locked(nfsi, server);
 	}
 	}
-	list_add_rcu(&delegation->super_list, &clp->cl_delegations);
+	list_add_rcu(&delegation->super_list, &server->delegations);
 	nfsi->delegation_state = delegation->type;
 	nfsi->delegation_state = delegation->type;
 	rcu_assign_pointer(nfsi->delegation, delegation);
 	rcu_assign_pointer(nfsi->delegation, delegation);
 	delegation = NULL;
 	delegation = NULL;
@@ -290,73 +321,85 @@ out:
 	return err;
 	return err;
 }
 }
 
 
-/*
- * Return all delegations that have been marked for return
+/**
+ * nfs_client_return_marked_delegations - return previously marked delegations
+ * @clp: nfs_client to process
+ *
+ * Returns zero on success, or a negative errno value.
  */
  */
 int nfs_client_return_marked_delegations(struct nfs_client *clp)
 int nfs_client_return_marked_delegations(struct nfs_client *clp)
 {
 {
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
+	struct nfs_server *server;
 	struct inode *inode;
 	struct inode *inode;
 	int err = 0;
 	int err = 0;
 
 
 restart:
 restart:
 	rcu_read_lock();
 	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
-		if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
-			continue;
-		inode = nfs_delegation_grab_inode(delegation);
-		if (inode == NULL)
-			continue;
-		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
-		spin_unlock(&clp->cl_lock);
-		rcu_read_unlock();
-		if (delegation != NULL) {
-			filemap_flush(inode->i_mapping);
-			err = __nfs_inode_return_delegation(inode, delegation, 0);
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		list_for_each_entry_rcu(delegation, &server->delegations,
+								super_list) {
+			if (!test_and_clear_bit(NFS_DELEGATION_RETURN,
+							&delegation->flags))
+				continue;
+			inode = nfs_delegation_grab_inode(delegation);
+			if (inode == NULL)
+				continue;
+			delegation = nfs_detach_delegation(NFS_I(inode),
+								server);
+			rcu_read_unlock();
+
+			if (delegation != NULL) {
+				filemap_flush(inode->i_mapping);
+				err = __nfs_inode_return_delegation(inode,
+								delegation, 0);
+			}
+			iput(inode);
+			if (!err)
+				goto restart;
+			set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+			return err;
 		}
 		}
-		iput(inode);
-		if (!err)
-			goto restart;
-		set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
-		return err;
 	}
 	}
 	rcu_read_unlock();
 	rcu_read_unlock();
 	return 0;
 	return 0;
 }
 }
 
 
-/*
- * This function returns the delegation without reclaiming opens
- * or protecting against delegation reclaims.
- * It is therefore really only safe to be called from
- * nfs4_clear_inode()
+/**
+ * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens
+ * @inode: inode to process
+ *
+ * Does not protect against delegation reclaims, therefore really only safe
+ * to be called from nfs4_clear_inode().
  */
  */
 void nfs_inode_return_delegation_noreclaim(struct inode *inode)
 void nfs_inode_return_delegation_noreclaim(struct inode *inode)
 {
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
 
 
 	if (rcu_access_pointer(nfsi->delegation) != NULL) {
 	if (rcu_access_pointer(nfsi->delegation) != NULL) {
-		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
-		spin_unlock(&clp->cl_lock);
+		delegation = nfs_detach_delegation(nfsi, server);
 		if (delegation != NULL)
 		if (delegation != NULL)
 			nfs_do_return_delegation(inode, delegation, 0);
 			nfs_do_return_delegation(inode, delegation, 0);
 	}
 	}
 }
 }
 
 
+/**
+ * nfs_inode_return_delegation - synchronously return a delegation
+ * @inode: inode to process
+ *
+ * Returns zero on success, or a negative errno value.
+ */
 int nfs_inode_return_delegation(struct inode *inode)
 int nfs_inode_return_delegation(struct inode *inode)
 {
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
 	int err = 0;
 	int err = 0;
 
 
 	if (rcu_access_pointer(nfsi->delegation) != NULL) {
 	if (rcu_access_pointer(nfsi->delegation) != NULL) {
-		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
-		spin_unlock(&clp->cl_lock);
+		delegation = nfs_detach_delegation(nfsi, server);
 		if (delegation != NULL) {
 		if (delegation != NULL) {
 			nfs_wb_all(inode);
 			nfs_wb_all(inode);
 			err = __nfs_inode_return_delegation(inode, delegation, 1);
 			err = __nfs_inode_return_delegation(inode, delegation, 1);
@@ -365,46 +408,61 @@ int nfs_inode_return_delegation(struct inode *inode)
 	return err;
 	return err;
 }
 }
 
 
-static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation)
+static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
 {
 {
+	struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
+
 	set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
 	set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
 	set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
 	set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
 }
 }
 
 
-/*
- * Return all delegations associated to a super block
+/**
+ * nfs_super_return_all_delegations - return delegations for one superblock
+ * @sb: sb to process
+ *
  */
  */
 void nfs_super_return_all_delegations(struct super_block *sb)
 void nfs_super_return_all_delegations(struct super_block *sb)
 {
 {
-	struct nfs_client *clp = NFS_SB(sb)->nfs_client;
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
 
 
 	if (clp == NULL)
 	if (clp == NULL)
 		return;
 		return;
+
 	rcu_read_lock();
 	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
 		spin_lock(&delegation->lock);
 		spin_lock(&delegation->lock);
-		if (delegation->inode != NULL && delegation->inode->i_sb == sb)
-			set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+		set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
 		spin_unlock(&delegation->lock);
 		spin_unlock(&delegation->lock);
 	}
 	}
 	rcu_read_unlock();
 	rcu_read_unlock();
+
 	if (nfs_client_return_marked_delegations(clp) != 0)
 	if (nfs_client_return_marked_delegations(clp) != 0)
 		nfs4_schedule_state_manager(clp);
 		nfs4_schedule_state_manager(clp);
 }
 }
 
 
-static
-void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, fmode_t flags)
+static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
+						 fmode_t flags)
 {
 {
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
 
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
 		if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
 		if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
 			continue;
 			continue;
 		if (delegation->type & flags)
 		if (delegation->type & flags)
-			nfs_mark_return_delegation(clp, delegation);
+			nfs_mark_return_delegation(delegation);
 	}
 	}
+}
+
+static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
+							fmode_t flags)
+{
+	struct nfs_server *server;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		nfs_mark_return_all_delegation_types(server, flags);
 	rcu_read_unlock();
 	rcu_read_unlock();
 }
 }
 
 
@@ -419,19 +477,32 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp)
 		nfs4_schedule_state_manager(clp);
 		nfs4_schedule_state_manager(clp);
 }
 }
 
 
+/**
+ * nfs_expire_all_delegation_types
+ * @clp: client to process
+ * @flags: delegation types to expire
+ *
+ */
 void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags)
 void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags)
 {
 {
 	nfs_client_mark_return_all_delegation_types(clp, flags);
 	nfs_client_mark_return_all_delegation_types(clp, flags);
 	nfs_delegation_run_state_manager(clp);
 	nfs_delegation_run_state_manager(clp);
 }
 }
 
 
+/**
+ * nfs_expire_all_delegations
+ * @clp: client to process
+ *
+ */
 void nfs_expire_all_delegations(struct nfs_client *clp)
 void nfs_expire_all_delegations(struct nfs_client *clp)
 {
 {
 	nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
 	nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
 }
 }
 
 
-/*
- * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
+/**
+ * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
+ * @clp: client to process
+ *
  */
  */
 void nfs_handle_cb_pathdown(struct nfs_client *clp)
 void nfs_handle_cb_pathdown(struct nfs_client *clp)
 {
 {
@@ -440,29 +511,43 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
 	nfs_client_mark_return_all_delegations(clp);
 	nfs_client_mark_return_all_delegations(clp);
 }
 }
 
 
-static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp)
+static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
 {
 {
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
 
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
 		if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
 		if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
 			continue;
 			continue;
-		nfs_mark_return_delegation(clp, delegation);
+		nfs_mark_return_delegation(delegation);
 	}
 	}
-	rcu_read_unlock();
 }
 }
 
 
+/**
+ * nfs_expire_unreferenced_delegations - Eliminate unused delegations
+ * @clp: nfs_client to process
+ *
+ */
 void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
 void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
 {
 {
-	nfs_client_mark_return_unreferenced_delegations(clp);
+	struct nfs_server *server;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		nfs_mark_return_unreferenced_delegations(server);
+	rcu_read_unlock();
+
 	nfs_delegation_run_state_manager(clp);
 	nfs_delegation_run_state_manager(clp);
 }
 }
 
 
-/*
- * Asynchronous delegation recall!
+/**
+ * nfs_async_inode_return_delegation - asynchronously return a delegation
+ * @inode: inode to process
+ * @stateid: state ID information from CB_RECALL arguments
+ *
+ * Returns zero on success, or a negative errno value.
  */
  */
-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+int nfs_async_inode_return_delegation(struct inode *inode,
+				      const nfs4_stateid *stateid)
 {
 {
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
@@ -474,22 +559,21 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
 		rcu_read_unlock();
 		rcu_read_unlock();
 		return -ENOENT;
 		return -ENOENT;
 	}
 	}
-
-	nfs_mark_return_delegation(clp, delegation);
+	nfs_mark_return_delegation(delegation);
 	rcu_read_unlock();
 	rcu_read_unlock();
+
 	nfs_delegation_run_state_manager(clp);
 	nfs_delegation_run_state_manager(clp);
 	return 0;
 	return 0;
 }
 }
 
 
-/*
- * Retrieve the inode associated with a delegation
- */
-struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
+static struct inode *
+nfs_delegation_find_inode_server(struct nfs_server *server,
+				 const struct nfs_fh *fhandle)
 {
 {
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
 	struct inode *res = NULL;
 	struct inode *res = NULL;
-	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+
+	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
 		spin_lock(&delegation->lock);
 		spin_lock(&delegation->lock);
 		if (delegation->inode != NULL &&
 		if (delegation->inode != NULL &&
 		    nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
 		    nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
@@ -499,49 +583,121 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
 		if (res != NULL)
 		if (res != NULL)
 			break;
 			break;
 	}
 	}
+	return res;
+}
+
+/**
+ * nfs_delegation_find_inode - retrieve the inode associated with a delegation
+ * @clp: client state handle
+ * @fhandle: filehandle from a delegation recall
+ *
+ * Returns pointer to inode matching "fhandle," or NULL if a matching inode
+ * cannot be found.
+ */
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
+					const struct nfs_fh *fhandle)
+{
+	struct nfs_server *server;
+	struct inode *res = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		res = nfs_delegation_find_inode_server(server, fhandle);
+		if (res != NULL)
+			break;
+	}
 	rcu_read_unlock();
 	rcu_read_unlock();
 	return res;
 	return res;
 }
 }
 
 
-/*
- * Mark all delegations as needing to be reclaimed
+static void nfs_delegation_mark_reclaim_server(struct nfs_server *server)
+{
+	struct nfs_delegation *delegation;
+
+	list_for_each_entry_rcu(delegation, &server->delegations, super_list)
+		set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+}
+
+/**
+ * nfs_delegation_mark_reclaim - mark all delegations as needing to be reclaimed
+ * @clp: nfs_client to process
+ *
  */
  */
 void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
 {
-	struct nfs_delegation *delegation;
+	struct nfs_server *server;
+
 	rcu_read_lock();
 	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
-		set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		nfs_delegation_mark_reclaim_server(server);
 	rcu_read_unlock();
 	rcu_read_unlock();
 }
 }
 
 
-/*
- * Reap all unclaimed delegations after reboot recovery is done
+/**
+ * nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
+ * @clp: nfs_client to process
+ *
  */
  */
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
 {
 	struct nfs_delegation *delegation;
 	struct nfs_delegation *delegation;
+	struct nfs_server *server;
 	struct inode *inode;
 	struct inode *inode;
+
 restart:
 restart:
 	rcu_read_lock();
 	rcu_read_lock();
-	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
-		if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0)
-			continue;
-		inode = nfs_delegation_grab_inode(delegation);
-		if (inode == NULL)
-			continue;
-		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
-		spin_unlock(&clp->cl_lock);
-		rcu_read_unlock();
-		if (delegation != NULL)
-			nfs_free_delegation(delegation);
-		iput(inode);
-		goto restart;
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		list_for_each_entry_rcu(delegation, &server->delegations,
+								super_list) {
+			if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
+						&delegation->flags) == 0)
+				continue;
+			inode = nfs_delegation_grab_inode(delegation);
+			if (inode == NULL)
+				continue;
+			delegation = nfs_detach_delegation(NFS_I(inode),
+								server);
+			rcu_read_unlock();
+
+			if (delegation != NULL)
+				nfs_free_delegation(delegation);
+			iput(inode);
+			goto restart;
+		}
 	}
 	}
 	rcu_read_unlock();
 	rcu_read_unlock();
 }
 }
 
 
+/**
+ * nfs_delegations_present - check for existence of delegations
+ * @clp: client state handle
+ *
+ * Returns one if there are any nfs_delegation structures attached
+ * to this nfs_client.
+ */
+int nfs_delegations_present(struct nfs_client *clp)
+{
+	struct nfs_server *server;
+	int ret = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		if (!list_empty(&server->delegations)) {
+			ret = 1;
+			break;
+		}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * nfs4_copy_delegation_stateid - Copy inode's state ID information
+ * @dst: stateid data structure to fill in
+ * @inode: inode to check
+ *
+ * Returns one and fills in "dst->data" * if inode had a delegation,
+ * otherwise zero is returned.
+ */
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);

+ 1 - 0
fs/nfs/delegation.h

@@ -44,6 +44,7 @@ void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
 void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
 void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
 void nfs_handle_cb_pathdown(struct nfs_client *clp);
 void nfs_handle_cb_pathdown(struct nfs_client *clp);
 int nfs_client_return_marked_delegations(struct nfs_client *clp);
 int nfs_client_return_marked_delegations(struct nfs_client *clp);
+int nfs_delegations_present(struct nfs_client *clp);
 
 
 void nfs_delegation_mark_reclaim(struct nfs_client *clp);
 void nfs_delegation_mark_reclaim(struct nfs_client *clp);
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp);

+ 36 - 36
fs/nfs/dir.c

@@ -33,8 +33,8 @@
 #include <linux/namei.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/mount.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
-#include <linux/vmalloc.h>
 #include <linux/kmemleak.h>
 #include <linux/kmemleak.h>
+#include <linux/xattr.h>
 
 
 #include "delegation.h"
 #include "delegation.h"
 #include "iostat.h"
 #include "iostat.h"
@@ -125,9 +125,10 @@ const struct inode_operations nfs4_dir_inode_operations = {
 	.permission	= nfs_permission,
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
 	.setattr	= nfs_setattr,
-	.getxattr       = nfs4_getxattr,
-	.setxattr       = nfs4_setxattr,
-	.listxattr      = nfs4_listxattr,
+	.getxattr	= generic_getxattr,
+	.setxattr	= generic_setxattr,
+	.listxattr	= generic_listxattr,
+	.removexattr	= generic_removexattr,
 };
 };
 
 
 #endif /* CONFIG_NFS_V4 */
 #endif /* CONFIG_NFS_V4 */
@@ -172,7 +173,7 @@ struct nfs_cache_array {
 	struct nfs_cache_array_entry array[0];
 	struct nfs_cache_array_entry array[0];
 };
 };
 
 
-typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
 typedef struct {
 typedef struct {
 	struct file	*file;
 	struct file	*file;
 	struct page	*page;
 	struct page	*page;
@@ -378,14 +379,14 @@ error:
 	return error;
 	return error;
 }
 }
 
 
-/* Fill in an entry based on the xdr code stored in desc->page */
-static
-int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
+static int xdr_decode(nfs_readdir_descriptor_t *desc,
+		      struct nfs_entry *entry, struct xdr_stream *xdr)
 {
 {
-	__be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
-	if (IS_ERR(p))
-		return PTR_ERR(p);
+	int error;
 
 
+	error = desc->decode(xdr, entry, desc->plus);
+	if (error)
+		return error;
 	entry->fattr->time_start = desc->timestamp;
 	entry->fattr->time_start = desc->timestamp;
 	entry->fattr->gencount = desc->gencount;
 	entry->fattr->gencount = desc->gencount;
 	return 0;
 	return 0;
@@ -459,25 +460,26 @@ out:
 /* Perform conversion from xdr to cache array */
 /* Perform conversion from xdr to cache array */
 static
 static
 int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
 int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
-				void *xdr_page, struct page *page, unsigned int buflen)
+				struct page **xdr_pages, struct page *page, unsigned int buflen)
 {
 {
 	struct xdr_stream stream;
 	struct xdr_stream stream;
-	struct xdr_buf buf;
-	__be32 *ptr = xdr_page;
+	struct xdr_buf buf = {
+		.pages = xdr_pages,
+		.page_len = buflen,
+		.buflen = buflen,
+		.len = buflen,
+	};
+	struct page *scratch;
 	struct nfs_cache_array *array;
 	struct nfs_cache_array *array;
 	unsigned int count = 0;
 	unsigned int count = 0;
 	int status;
 	int status;
 
 
-	buf.head->iov_base = xdr_page;
-	buf.head->iov_len = buflen;
-	buf.tail->iov_len = 0;
-	buf.page_base = 0;
-	buf.page_len = 0;
-	buf.buflen = buf.head->iov_len;
-	buf.len = buf.head->iov_len;
-
-	xdr_init_decode(&stream, &buf, ptr);
+	scratch = alloc_page(GFP_KERNEL);
+	if (scratch == NULL)
+		return -ENOMEM;
 
 
+	xdr_init_decode(&stream, &buf, NULL);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 
 	do {
 	do {
 		status = xdr_decode(desc, entry, &stream);
 		status = xdr_decode(desc, entry, &stream);
@@ -506,6 +508,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 		} else
 		} else
 			status = PTR_ERR(array);
 			status = PTR_ERR(array);
 	}
 	}
+
+	put_page(scratch);
 	return status;
 	return status;
 }
 }
 
 
@@ -521,7 +525,6 @@ static
 void nfs_readdir_free_large_page(void *ptr, struct page **pages,
 void nfs_readdir_free_large_page(void *ptr, struct page **pages,
 		unsigned int npages)
 		unsigned int npages)
 {
 {
-	vm_unmap_ram(ptr, npages);
 	nfs_readdir_free_pagearray(pages, npages);
 	nfs_readdir_free_pagearray(pages, npages);
 }
 }
 
 
@@ -530,9 +533,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages,
  * to nfs_readdir_free_large_page
  * to nfs_readdir_free_large_page
  */
  */
 static
 static
-void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
+int nfs_readdir_large_page(struct page **pages, unsigned int npages)
 {
 {
-	void *ptr;
 	unsigned int i;
 	unsigned int i;
 
 
 	for (i = 0; i < npages; i++) {
 	for (i = 0; i < npages; i++) {
@@ -541,13 +543,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
 			goto out_freepages;
 			goto out_freepages;
 		pages[i] = page;
 		pages[i] = page;
 	}
 	}
+	return 0;
 
 
-	ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
-	if (!IS_ERR_OR_NULL(ptr))
-		return ptr;
 out_freepages:
 out_freepages:
 	nfs_readdir_free_pagearray(pages, i);
 	nfs_readdir_free_pagearray(pages, i);
-	return NULL;
+	return -ENOMEM;
 }
 }
 
 
 static
 static
@@ -566,6 +566,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	entry.eof = 0;
 	entry.eof = 0;
 	entry.fh = nfs_alloc_fhandle();
 	entry.fh = nfs_alloc_fhandle();
 	entry.fattr = nfs_alloc_fattr();
 	entry.fattr = nfs_alloc_fattr();
+	entry.server = NFS_SERVER(inode);
 	if (entry.fh == NULL || entry.fattr == NULL)
 	if (entry.fh == NULL || entry.fattr == NULL)
 		goto out;
 		goto out;
 
 
@@ -577,8 +578,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	memset(array, 0, sizeof(struct nfs_cache_array));
 	memset(array, 0, sizeof(struct nfs_cache_array));
 	array->eof_index = -1;
 	array->eof_index = -1;
 
 
-	pages_ptr = nfs_readdir_large_page(pages, array_size);
-	if (!pages_ptr)
+	status = nfs_readdir_large_page(pages, array_size);
+	if (status < 0)
 		goto out_release_array;
 		goto out_release_array;
 	do {
 	do {
 		unsigned int pglen;
 		unsigned int pglen;
@@ -587,7 +588,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		if (status < 0)
 		if (status < 0)
 			break;
 			break;
 		pglen = status;
 		pglen = status;
-		status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
+		status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
 		if (status < 0) {
 		if (status < 0) {
 			if (status == -ENOSPC)
 			if (status == -ENOSPC)
 				status = 0;
 				status = 0;
@@ -1221,7 +1222,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		goto out_unblock_sillyrename;
 		goto out_unblock_sillyrename;
 	}
 	}
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
-	res = (struct dentry *)inode;
+	res = ERR_CAST(inode);
 	if (IS_ERR(res))
 	if (IS_ERR(res))
 		goto out_unblock_sillyrename;
 		goto out_unblock_sillyrename;
 
 
@@ -1355,8 +1356,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 	if (nd->flags & LOOKUP_CREATE) {
 	if (nd->flags & LOOKUP_CREATE) {
 		attr.ia_mode = nd->intent.open.create_mode;
 		attr.ia_mode = nd->intent.open.create_mode;
 		attr.ia_valid = ATTR_MODE;
 		attr.ia_valid = ATTR_MODE;
-		if (!IS_POSIXACL(dir))
-			attr.ia_mode &= ~current_umask();
+		attr.ia_mode &= ~current_umask();
 	} else {
 	} else {
 		open_flags &= ~(O_EXCL | O_CREAT);
 		open_flags &= ~(O_EXCL | O_CREAT);
 		attr.ia_valid = 0;
 		attr.ia_valid = 0;

+ 1 - 1
fs/nfs/idmap.c

@@ -238,7 +238,7 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
 	return nfs_idmap_lookup_name(gid, "group", buf, buflen);
 	return nfs_idmap_lookup_name(gid, "group", buf, buflen);
 }
 }
 
 
-#else  /* CONFIG_NFS_USE_IDMAPPER not defined */
+#else  /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
 
 
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/mutex.h>

+ 2 - 1
fs/nfs/inode.c

@@ -1410,9 +1410,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  */
  */
 void nfs4_evict_inode(struct inode *inode)
 void nfs4_evict_inode(struct inode *inode)
 {
 {
+	pnfs_destroy_layout(NFS_I(inode));
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
 	end_writeback(inode);
 	end_writeback(inode);
-	pnfs_destroy_layout(NFS_I(inode));
 	/* If we are holding a delegation, return it! */
 	/* If we are holding a delegation, return it! */
 	nfs_inode_return_delegation_noreclaim(inode);
 	nfs_inode_return_delegation_noreclaim(inode);
 	/* First call standard NFS clear_inode() code */
 	/* First call standard NFS clear_inode() code */
@@ -1619,6 +1619,7 @@ static void __exit exit_nfs_fs(void)
 #ifdef CONFIG_PROC_FS
 #ifdef CONFIG_PROC_FS
 	rpc_proc_unregister("nfs");
 	rpc_proc_unregister("nfs");
 #endif
 #endif
+	nfs_cleanup_cb_ident_idr();
 	unregister_nfs_fs();
 	unregister_nfs_fs();
 	nfs_fs_proc_exit();
 	nfs_fs_proc_exit();
 	nfsiod_stop();
 	nfsiod_stop();

+ 13 - 6
fs/nfs/internal.h

@@ -128,9 +128,13 @@ extern void nfs_umount(const struct nfs_mount_request *info);
 /* client.c */
 /* client.c */
 extern struct rpc_program nfs_program;
 extern struct rpc_program nfs_program;
 
 
+extern void nfs_cleanup_cb_ident_idr(void);
 extern void nfs_put_client(struct nfs_client *);
 extern void nfs_put_client(struct nfs_client *);
-extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32);
-extern struct nfs_client *nfs_find_client_next(struct nfs_client *);
+extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
+extern struct nfs_client *nfs4_find_client_ident(int);
+extern struct nfs_client *
+nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *,
+			   int);
 extern struct nfs_server *nfs_create_server(
 extern struct nfs_server *nfs_create_server(
 					const struct nfs_parsed_mount_data *,
 					const struct nfs_parsed_mount_data *,
 					struct nfs_fh *);
 					struct nfs_fh *);
@@ -185,17 +189,20 @@ extern int __init nfs_init_directcache(void);
 extern void nfs_destroy_directcache(void);
 extern void nfs_destroy_directcache(void);
 
 
 /* nfs2xdr.c */
 /* nfs2xdr.c */
-extern int nfs_stat_to_errno(int);
+extern int nfs_stat_to_errno(enum nfs_stat);
 extern struct rpc_procinfo nfs_procedures[];
 extern struct rpc_procinfo nfs_procedures[];
-extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs2_decode_dirent(struct xdr_stream *,
+				struct nfs_entry *, int);
 
 
 /* nfs3xdr.c */
 /* nfs3xdr.c */
 extern struct rpc_procinfo nfs3_procedures[];
 extern struct rpc_procinfo nfs3_procedures[];
-extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs3_decode_dirent(struct xdr_stream *,
+				struct nfs_entry *, int);
 
 
 /* nfs4xdr.c */
 /* nfs4xdr.c */
 #ifdef CONFIG_NFS_V4
 #ifdef CONFIG_NFS_V4
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
+extern int nfs4_decode_dirent(struct xdr_stream *,
+				struct nfs_entry *, int);
 #endif
 #endif
 #ifdef CONFIG_NFS_V4_1
 #ifdef CONFIG_NFS_V4_1
 extern const u32 nfs41_maxread_overhead;
 extern const u32 nfs41_maxread_overhead;

+ 34 - 49
fs/nfs/mount_clnt.c

@@ -236,10 +236,8 @@ void nfs_umount(const struct nfs_mount_request *info)
 		.authflavor	= RPC_AUTH_UNIX,
 		.authflavor	= RPC_AUTH_UNIX,
 		.flags		= RPC_CLNT_CREATE_NOPING,
 		.flags		= RPC_CLNT_CREATE_NOPING,
 	};
 	};
-	struct mountres	result;
 	struct rpc_message msg	= {
 	struct rpc_message msg	= {
 		.rpc_argp	= info->dirpath,
 		.rpc_argp	= info->dirpath,
-		.rpc_resp	= &result,
 	};
 	};
 	struct rpc_clnt *clnt;
 	struct rpc_clnt *clnt;
 	int status;
 	int status;
@@ -248,7 +246,7 @@ void nfs_umount(const struct nfs_mount_request *info)
 		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 
 
 	clnt = rpc_create(&args);
 	clnt = rpc_create(&args);
-	if (unlikely(IS_ERR(clnt)))
+	if (IS_ERR(clnt))
 		goto out_clnt_err;
 		goto out_clnt_err;
 
 
 	dprintk("NFS: sending UMNT request for %s:%s\n",
 	dprintk("NFS: sending UMNT request for %s:%s\n",
@@ -280,29 +278,20 @@ out_call_err:
  * XDR encode/decode functions for MOUNT
  * XDR encode/decode functions for MOUNT
  */
  */
 
 
-static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
+static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
 {
 {
 	const u32 pathname_len = strlen(pathname);
 	const u32 pathname_len = strlen(pathname);
 	__be32 *p;
 	__be32 *p;
 
 
-	if (unlikely(pathname_len > MNTPATHLEN))
-		return -EIO;
-
-	p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len);
-	if (unlikely(p == NULL))
-		return -EIO;
+	BUG_ON(pathname_len > MNTPATHLEN);
+	p = xdr_reserve_space(xdr, 4 + pathname_len);
 	xdr_encode_opaque(p, pathname, pathname_len);
 	xdr_encode_opaque(p, pathname, pathname_len);
-
-	return 0;
 }
 }
 
 
-static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p,
-			   const char *dirpath)
+static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const char *dirpath)
 {
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	return encode_mntdirpath(&xdr, dirpath);
+	encode_mntdirpath(xdr, dirpath);
 }
 }
 
 
 /*
 /*
@@ -320,10 +309,10 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res)
 	u32 status;
 	u32 status;
 	__be32 *p;
 	__be32 *p;
 
 
-	p = xdr_inline_decode(xdr, sizeof(status));
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		return -EIO;
 		return -EIO;
-	status = ntohl(*p);
+	status = be32_to_cpup(p);
 
 
 	for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
 	for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
 		if (mnt_errtbl[i].status == status) {
 		if (mnt_errtbl[i].status == status) {
@@ -351,18 +340,16 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
 	return 0;
 	return 0;
 }
 }
 
 
-static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p,
-			    struct mountres *res)
+static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
+				struct mountres *res)
 {
 {
-	struct xdr_stream xdr;
 	int status;
 	int status;
 
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
-	status = decode_status(&xdr, res);
+	status = decode_status(xdr, res);
 	if (unlikely(status != 0 || res->errno != 0))
 	if (unlikely(status != 0 || res->errno != 0))
 		return status;
 		return status;
-	return decode_fhandle(&xdr, res);
+	return decode_fhandle(xdr, res);
 }
 }
 
 
 static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
 static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
@@ -371,10 +358,10 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
 	u32 status;
 	u32 status;
 	__be32 *p;
 	__be32 *p;
 
 
-	p = xdr_inline_decode(xdr, sizeof(status));
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		return -EIO;
 		return -EIO;
-	status = ntohl(*p);
+	status = be32_to_cpup(p);
 
 
 	for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
 	for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
 		if (mnt3_errtbl[i].status == status) {
 		if (mnt3_errtbl[i].status == status) {
@@ -394,11 +381,11 @@ static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res)
 	u32 size;
 	u32 size;
 	__be32 *p;
 	__be32 *p;
 
 
-	p = xdr_inline_decode(xdr, sizeof(size));
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		return -EIO;
 		return -EIO;
 
 
-	size = ntohl(*p++);
+	size = be32_to_cpup(p);
 	if (size > NFS3_FHSIZE || size == 0)
 	if (size > NFS3_FHSIZE || size == 0)
 		return -EIO;
 		return -EIO;
 
 
@@ -421,15 +408,15 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
 	if (*count == 0)
 	if (*count == 0)
 		return 0;
 		return 0;
 
 
-	p = xdr_inline_decode(xdr, sizeof(entries));
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		return -EIO;
 		return -EIO;
-	entries = ntohl(*p);
+	entries = be32_to_cpup(p);
 	dprintk("NFS: received %u auth flavors\n", entries);
 	dprintk("NFS: received %u auth flavors\n", entries);
 	if (entries > NFS_MAX_SECFLAVORS)
 	if (entries > NFS_MAX_SECFLAVORS)
 		entries = NFS_MAX_SECFLAVORS;
 		entries = NFS_MAX_SECFLAVORS;
 
 
-	p = xdr_inline_decode(xdr, sizeof(u32) * entries);
+	p = xdr_inline_decode(xdr, 4 * entries);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		return -EIO;
 		return -EIO;
 
 
@@ -437,7 +424,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
 		entries = *count;
 		entries = *count;
 
 
 	for (i = 0; i < entries; i++) {
 	for (i = 0; i < entries; i++) {
-		flavors[i] = ntohl(*p++);
+		flavors[i] = be32_to_cpup(p++);
 		dprintk("NFS:   auth flavor[%u]: %d\n", i, flavors[i]);
 		dprintk("NFS:   auth flavor[%u]: %d\n", i, flavors[i]);
 	}
 	}
 	*count = i;
 	*count = i;
@@ -445,30 +432,28 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
 	return 0;
 	return 0;
 }
 }
 
 
-static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p,
-			     struct mountres *res)
+static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 struct mountres *res)
 {
 {
-	struct xdr_stream xdr;
 	int status;
 	int status;
 
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
-	status = decode_fhs_status(&xdr, res);
+	status = decode_fhs_status(xdr, res);
 	if (unlikely(status != 0 || res->errno != 0))
 	if (unlikely(status != 0 || res->errno != 0))
 		return status;
 		return status;
-	status = decode_fhandle3(&xdr, res);
+	status = decode_fhandle3(xdr, res);
 	if (unlikely(status != 0)) {
 	if (unlikely(status != 0)) {
 		res->errno = -EBADHANDLE;
 		res->errno = -EBADHANDLE;
 		return 0;
 		return 0;
 	}
 	}
-	return decode_auth_flavors(&xdr, res);
+	return decode_auth_flavors(xdr, res);
 }
 }
 
 
 static struct rpc_procinfo mnt_procedures[] = {
 static struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
 		.p_proc		= MOUNTPROC_MNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
-		.p_decode	= (kxdrproc_t)mnt_dec_mountres,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres_sz,
 		.p_replen	= MNT_dec_mountres_sz,
 		.p_statidx	= MOUNTPROC_MNT,
 		.p_statidx	= MOUNTPROC_MNT,
@@ -476,7 +461,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	},
 	},
 	[MOUNTPROC_UMNT] = {
 	[MOUNTPROC_UMNT] = {
 		.p_proc		= MOUNTPROC_UMNT,
 		.p_proc		= MOUNTPROC_UMNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC_UMNT,
 		.p_statidx	= MOUNTPROC_UMNT,
 		.p_name		= "UMOUNT",
 		.p_name		= "UMOUNT",
@@ -486,8 +471,8 @@ static struct rpc_procinfo mnt_procedures[] = {
 static struct rpc_procinfo mnt3_procedures[] = {
 static struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
 		.p_proc		= MOUNTPROC3_MNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
-		.p_decode	= (kxdrproc_t)mnt_dec_mountres3,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres3,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
 		.p_statidx	= MOUNTPROC3_MNT,
 		.p_statidx	= MOUNTPROC3_MNT,
@@ -495,7 +480,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	},
 	},
 	[MOUNTPROC3_UMNT] = {
 	[MOUNTPROC3_UMNT] = {
 		.p_proc		= MOUNTPROC3_UMNT,
 		.p_proc		= MOUNTPROC3_UMNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC3_UMNT,
 		.p_statidx	= MOUNTPROC3_UMNT,
 		.p_name		= "UMOUNT",
 		.p_name		= "UMOUNT",

File diff suppressed because it is too large
+ 834 - 410
fs/nfs/nfs2xdr.c


File diff suppressed because it is too large
+ 2082 - 799
fs/nfs/nfs3xdr.c


+ 4 - 9
fs/nfs/nfs4_fs.h

@@ -44,6 +44,7 @@ enum nfs4_client_state {
 	NFS4CLNT_RECLAIM_REBOOT,
 	NFS4CLNT_RECLAIM_REBOOT,
 	NFS4CLNT_RECLAIM_NOGRACE,
 	NFS4CLNT_RECLAIM_NOGRACE,
 	NFS4CLNT_DELEGRETURN,
 	NFS4CLNT_DELEGRETURN,
+	NFS4CLNT_LAYOUTRECALL,
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_RECALL_SLOT,
 	NFS4CLNT_RECALL_SLOT,
 };
 };
@@ -109,7 +110,7 @@ struct nfs_unique_id {
 struct nfs4_state_owner {
 struct nfs4_state_owner {
 	struct nfs_unique_id so_owner_id;
 	struct nfs_unique_id so_owner_id;
 	struct nfs_server    *so_server;
 	struct nfs_server    *so_server;
-	struct rb_node	     so_client_node;
+	struct rb_node	     so_server_node;
 
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
 
 
@@ -227,12 +228,6 @@ struct nfs4_state_maintenance_ops {
 extern const struct dentry_operations nfs4_dentry_operations;
 extern const struct dentry_operations nfs4_dentry_operations;
 extern const struct inode_operations nfs4_dir_inode_operations;
 extern const struct inode_operations nfs4_dir_inode_operations;
 
 
-/* inode.c */
-extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
-extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
-extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
-
-
 /* nfs4proc.c */
 /* nfs4proc.c */
 extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
 extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
@@ -241,11 +236,12 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
 extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 		struct nfs4_fs_locations *fs_locations, struct page *page);
 		struct nfs4_fs_locations *fs_locations, struct page *page);
 extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
 extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+extern const struct xattr_handler *nfs4_xattr_handlers[];
 
 
 #if defined(CONFIG_NFS_V4_1)
 #if defined(CONFIG_NFS_V4_1)
 static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
 static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -331,7 +327,6 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
 extern const nfs4_stateid zero_stateid;
 extern const nfs4_stateid zero_stateid;
 
 
 /* nfs4xdr.c */
 /* nfs4xdr.c */
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 extern struct rpc_procinfo nfs4_procedures[];
 extern struct rpc_procinfo nfs4_procedures[];
 
 
 struct nfs4_mount_data;
 struct nfs4_mount_data;

+ 3 - 3
fs/nfs/nfs4filelayout.c

@@ -82,7 +82,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 {
 {
 	struct nfs4_file_layout_dsaddr *dsaddr;
 	struct nfs4_file_layout_dsaddr *dsaddr;
 	int status = -EINVAL;
 	int status = -EINVAL;
-	struct nfs_server *nfss = NFS_SERVER(lo->inode);
+	struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
 
 
 	dprintk("--> %s\n", __func__);
 	dprintk("--> %s\n", __func__);
 
 
@@ -101,7 +101,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 	/* find and reference the deviceid */
 	/* find and reference the deviceid */
 	dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
 	dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
 	if (dsaddr == NULL) {
 	if (dsaddr == NULL) {
-		dsaddr = get_device_info(lo->inode, id);
+		dsaddr = get_device_info(lo->plh_inode, id);
 		if (dsaddr == NULL)
 		if (dsaddr == NULL)
 			goto out;
 			goto out;
 	}
 	}
@@ -243,7 +243,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 static void
 static void
 filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 {
 {
-	struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode);
+	struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode);
 	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
 	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
 
 
 	dprintk("--> %s\n", __func__);
 	dprintk("--> %s\n", __func__);

+ 128 - 60
fs/nfs/nfs4proc.c

@@ -49,6 +49,7 @@
 #include <linux/mount.h>
 #include <linux/mount.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/sunrpc/bc_xprt.h>
 #include <linux/sunrpc/bc_xprt.h>
+#include <linux/xattr.h>
 
 
 #include "nfs4_fs.h"
 #include "nfs4_fs.h"
 #include "delegation.h"
 #include "delegation.h"
@@ -355,9 +356,9 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
 }
 }
 
 
 /*
 /*
- * Signal state manager thread if session is drained
+ * Signal state manager thread if session fore channel is drained
  */
  */
-static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
+static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
 {
 {
 	struct rpc_task *task;
 	struct rpc_task *task;
 
 
@@ -371,8 +372,20 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
 	if (ses->fc_slot_table.highest_used_slotid != -1)
 	if (ses->fc_slot_table.highest_used_slotid != -1)
 		return;
 		return;
 
 
-	dprintk("%s COMPLETE: Session Drained\n", __func__);
-	complete(&ses->complete);
+	dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
+	complete(&ses->fc_slot_table.complete);
+}
+
+/*
+ * Signal state manager thread if session back channel is drained
+ */
+void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
+{
+	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
+	    ses->bc_slot_table.highest_used_slotid != -1)
+		return;
+	dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
+	complete(&ses->bc_slot_table.complete);
 }
 }
 
 
 static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
@@ -389,7 +402,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 
 
 	spin_lock(&tbl->slot_tbl_lock);
 	spin_lock(&tbl->slot_tbl_lock);
 	nfs4_free_slot(tbl, res->sr_slot);
 	nfs4_free_slot(tbl, res->sr_slot);
-	nfs41_check_drain_session_complete(res->sr_session);
+	nfs4_check_drain_fc_complete(res->sr_session);
 	spin_unlock(&tbl->slot_tbl_lock);
 	spin_unlock(&tbl->slot_tbl_lock);
 	res->sr_slot = NULL;
 	res->sr_slot = NULL;
 }
 }
@@ -1826,6 +1839,8 @@ struct nfs4_closedata {
 	struct nfs_closeres res;
 	struct nfs_closeres res;
 	struct nfs_fattr fattr;
 	struct nfs_fattr fattr;
 	unsigned long timestamp;
 	unsigned long timestamp;
+	bool roc;
+	u32 roc_barrier;
 };
 };
 
 
 static void nfs4_free_closedata(void *data)
 static void nfs4_free_closedata(void *data)
@@ -1833,6 +1848,8 @@ static void nfs4_free_closedata(void *data)
 	struct nfs4_closedata *calldata = data;
 	struct nfs4_closedata *calldata = data;
 	struct nfs4_state_owner *sp = calldata->state->owner;
 	struct nfs4_state_owner *sp = calldata->state->owner;
 
 
+	if (calldata->roc)
+		pnfs_roc_release(calldata->state->inode);
 	nfs4_put_open_state(calldata->state);
 	nfs4_put_open_state(calldata->state);
 	nfs_free_seqid(calldata->arg.seqid);
 	nfs_free_seqid(calldata->arg.seqid);
 	nfs4_put_state_owner(sp);
 	nfs4_put_state_owner(sp);
@@ -1865,6 +1882,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 	 */
 	 */
 	switch (task->tk_status) {
 	switch (task->tk_status) {
 		case 0:
 		case 0:
+			if (calldata->roc)
+				pnfs_roc_set_barrier(state->inode,
+						     calldata->roc_barrier);
 			nfs_set_open_stateid(state, &calldata->res.stateid, 0);
 			nfs_set_open_stateid(state, &calldata->res.stateid, 0);
 			renew_lease(server, calldata->timestamp);
 			renew_lease(server, calldata->timestamp);
 			nfs4_close_clear_stateid_flags(state,
 			nfs4_close_clear_stateid_flags(state,
@@ -1917,8 +1937,15 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		return;
 		return;
 	}
 	}
 
 
-	if (calldata->arg.fmode == 0)
+	if (calldata->arg.fmode == 0) {
 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
+		if (calldata->roc &&
+		    pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
+			rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
+				     task, NULL);
+			return;
+		}
+	}
 
 
 	nfs_fattr_init(calldata->res.fattr);
 	nfs_fattr_init(calldata->res.fattr);
 	calldata->timestamp = jiffies;
 	calldata->timestamp = jiffies;
@@ -1946,7 +1973,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
  *
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
  */
-int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
+int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
 {
 {
 	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_closedata *calldata;
 	struct nfs4_closedata *calldata;
@@ -1981,11 +2008,12 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.seqid = calldata->arg.seqid;
 	calldata->res.seqid = calldata->arg.seqid;
 	calldata->res.server = server;
 	calldata->res.server = server;
+	calldata->roc = roc;
 	path_get(path);
 	path_get(path);
 	calldata->path = *path;
 	calldata->path = *path;
 
 
-	msg.rpc_argp = &calldata->arg,
-	msg.rpc_resp = &calldata->res,
+	msg.rpc_argp = &calldata->arg;
+	msg.rpc_resp = &calldata->res;
 	task_setup_data.callback_data = calldata;
 	task_setup_data.callback_data = calldata;
 	task = rpc_run_task(&task_setup_data);
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 	if (IS_ERR(task))
@@ -1998,6 +2026,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
 out_free_calldata:
 out_free_calldata:
 	kfree(calldata);
 	kfree(calldata);
 out:
 out:
+	if (roc)
+		pnfs_roc_release(state->inode);
 	nfs4_put_open_state(state);
 	nfs4_put_open_state(state);
 	nfs4_put_state_owner(sp);
 	nfs4_put_state_owner(sp);
 	return status;
 	return status;
@@ -2486,6 +2516,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		path = &ctx->path;
 		path = &ctx->path;
 		fmode = ctx->mode;
 		fmode = ctx->mode;
 	}
 	}
+	sattr->ia_mode &= ~current_umask();
 	state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
 	state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
 	d_drop(dentry);
 	d_drop(dentry);
 	if (IS_ERR(state)) {
 	if (IS_ERR(state)) {
@@ -2816,6 +2847,8 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 {
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_exception exception = { };
 	int err;
 	int err;
+
+	sattr->ia_mode &= ~current_umask();
 	do {
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
 		err = nfs4_handle_exception(NFS_SERVER(dir),
 				_nfs4_proc_mkdir(dir, dentry, sattr),
 				_nfs4_proc_mkdir(dir, dentry, sattr),
@@ -2916,6 +2949,8 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 {
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_exception exception = { };
 	int err;
 	int err;
+
+	sattr->ia_mode &= ~current_umask();
 	do {
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
 		err = nfs4_handle_exception(NFS_SERVER(dir),
 				_nfs4_proc_mknod(dir, dentry, sattr, rdev),
 				_nfs4_proc_mknod(dir, dentry, sattr, rdev),
@@ -3478,6 +3513,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 	struct nfs4_setclientid setclientid = {
 	struct nfs4_setclientid setclientid = {
 		.sc_verifier = &sc_verifier,
 		.sc_verifier = &sc_verifier,
 		.sc_prog = program,
 		.sc_prog = program,
+		.sc_cb_ident = clp->cl_cb_ident,
 	};
 	};
 	struct rpc_message msg = {
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
@@ -3517,7 +3553,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 		if (signalled())
 		if (signalled())
 			break;
 			break;
 		if (loop++ & 1)
 		if (loop++ & 1)
-			ssleep(clp->cl_lease_time + 1);
+			ssleep(clp->cl_lease_time / HZ + 1);
 		else
 		else
 			if (++clp->cl_id_uniquifier == 0)
 			if (++clp->cl_id_uniquifier == 0)
 				break;
 				break;
@@ -3663,8 +3699,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	data->rpc_status = 0;
 	data->rpc_status = 0;
 
 
 	task_setup_data.callback_data = data;
 	task_setup_data.callback_data = data;
-	msg.rpc_argp = &data->args,
-	msg.rpc_resp = &data->res,
+	msg.rpc_argp = &data->args;
+	msg.rpc_resp = &data->res;
 	task = rpc_run_task(&task_setup_data);
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 		return PTR_ERR(task);
@@ -3743,6 +3779,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		goto out;
 		goto out;
 	lsp = request->fl_u.nfs4_fl.owner;
 	lsp = request->fl_u.nfs4_fl.owner;
 	arg.lock_owner.id = lsp->ls_id.id;
 	arg.lock_owner.id = lsp->ls_id.id;
+	arg.lock_owner.s_dev = server->s_dev;
 	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	switch (status) {
 	switch (status) {
 		case 0:
 		case 0:
@@ -3908,8 +3945,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 	}
 	}
 
 
-	msg.rpc_argp = &data->arg,
-	msg.rpc_resp = &data->res,
+	msg.rpc_argp = &data->arg;
+	msg.rpc_resp = &data->res;
 	task_setup_data.callback_data = data;
 	task_setup_data.callback_data = data;
 	return rpc_run_task(&task_setup_data);
 	return rpc_run_task(&task_setup_data);
 }
 }
@@ -3988,6 +4025,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	p->arg.lock_stateid = &lsp->ls_stateid;
 	p->arg.lock_stateid = &lsp->ls_stateid;
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id.id;
 	p->arg.lock_owner.id = lsp->ls_id.id;
+	p->arg.lock_owner.s_dev = server->s_dev;
 	p->res.lock_seqid = p->arg.lock_seqid;
 	p->res.lock_seqid = p->arg.lock_seqid;
 	p->lsp = lsp;
 	p->lsp = lsp;
 	p->server = server;
 	p->server = server;
@@ -4145,8 +4183,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 			data->arg.reclaim = NFS_LOCK_RECLAIM;
 			data->arg.reclaim = NFS_LOCK_RECLAIM;
 		task_setup_data.callback_ops = &nfs4_recover_lock_ops;
 		task_setup_data.callback_ops = &nfs4_recover_lock_ops;
 	}
 	}
-	msg.rpc_argp = &data->arg,
-	msg.rpc_resp = &data->res,
+	msg.rpc_argp = &data->arg;
+	msg.rpc_resp = &data->res;
 	task_setup_data.callback_data = data;
 	task_setup_data.callback_data = data;
 	task = rpc_run_task(&task_setup_data);
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 	if (IS_ERR(task))
@@ -4392,48 +4430,43 @@ void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
 		return;
 		return;
 	args->lock_owner.clientid = server->nfs_client->cl_clientid;
 	args->lock_owner.clientid = server->nfs_client->cl_clientid;
 	args->lock_owner.id = lsp->ls_id.id;
 	args->lock_owner.id = lsp->ls_id.id;
+	args->lock_owner.s_dev = server->s_dev;
 	msg.rpc_argp = args;
 	msg.rpc_argp = args;
 	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
 	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
 }
 }
 
 
 #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
 #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
 
 
-int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
-		size_t buflen, int flags)
+static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key,
+				   const void *buf, size_t buflen,
+				   int flags, int type)
 {
 {
-	struct inode *inode = dentry->d_inode;
-
-	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
-		return -EOPNOTSUPP;
+	if (strcmp(key, "") != 0)
+		return -EINVAL;
 
 
-	return nfs4_proc_set_acl(inode, buf, buflen);
+	return nfs4_proc_set_acl(dentry->d_inode, buf, buflen);
 }
 }
 
 
-/* The getxattr man page suggests returning -ENODATA for unknown attributes,
- * and that's what we'll do for e.g. user attributes that haven't been set.
- * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
- * attributes in kernel-managed attribute namespaces. */
-ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
-		size_t buflen)
+static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
+				   void *buf, size_t buflen, int type)
 {
 {
-	struct inode *inode = dentry->d_inode;
-
-	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
-		return -EOPNOTSUPP;
+	if (strcmp(key, "") != 0)
+		return -EINVAL;
 
 
-	return nfs4_proc_get_acl(inode, buf, buflen);
+	return nfs4_proc_get_acl(dentry->d_inode, buf, buflen);
 }
 }
 
 
-ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
+static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
+				       size_t list_len, const char *name,
+				       size_t name_len, int type)
 {
 {
-	size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
+	size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
 
 
 	if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
 	if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
 		return 0;
 		return 0;
-	if (buf && buflen < len)
-		return -ERANGE;
-	if (buf)
-		memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
+
+	if (list && len <= list_len)
+		memcpy(list, XATTR_NAME_NFSV4_ACL, len);
 	return len;
 	return len;
 }
 }
 
 
@@ -4485,6 +4518,25 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 }
 }
 
 
 #ifdef CONFIG_NFS_V4_1
 #ifdef CONFIG_NFS_V4_1
+/*
+ * Check the exchange flags returned by the server for invalid flags, having
+ * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
+ * DS flags set.
+ */
+static int nfs4_check_cl_exchange_flags(u32 flags)
+{
+	if (flags & ~EXCHGID4_FLAG_MASK_R)
+		goto out_inval;
+	if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
+	    (flags & EXCHGID4_FLAG_USE_NON_PNFS))
+		goto out_inval;
+	if (!(flags & (EXCHGID4_FLAG_MASK_PNFS)))
+		goto out_inval;
+	return NFS_OK;
+out_inval:
+	return -NFS4ERR_INVAL;
+}
+
 /*
 /*
  * nfs4_proc_exchange_id()
  * nfs4_proc_exchange_id()
  *
  *
@@ -4498,7 +4550,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 	nfs4_verifier verifier;
 	nfs4_verifier verifier;
 	struct nfs41_exchange_id_args args = {
 	struct nfs41_exchange_id_args args = {
 		.client = clp,
 		.client = clp,
-		.flags = clp->cl_exchange_flags,
+		.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
 	};
 	};
 	struct nfs41_exchange_id_res res = {
 	struct nfs41_exchange_id_res res = {
 		.client = clp,
 		.client = clp,
@@ -4515,9 +4567,6 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 	dprintk("--> %s\n", __func__);
 	dprintk("--> %s\n", __func__);
 	BUG_ON(clp == NULL);
 	BUG_ON(clp == NULL);
 
 
-	/* Remove server-only flags */
-	args.flags &= ~EXCHGID4_FLAG_CONFIRMED_R;
-
 	p = (u32 *)verifier.data;
 	p = (u32 *)verifier.data;
 	*p++ = htonl((u32)clp->cl_boot_time.tv_sec);
 	*p++ = htonl((u32)clp->cl_boot_time.tv_sec);
 	*p = htonl((u32)clp->cl_boot_time.tv_nsec);
 	*p = htonl((u32)clp->cl_boot_time.tv_nsec);
@@ -4543,6 +4592,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 			break;
 			break;
 	}
 	}
 
 
+	status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
 	dprintk("<-- %s status= %d\n", __func__, status);
 	dprintk("<-- %s status= %d\n", __func__, status);
 	return status;
 	return status;
 }
 }
@@ -4776,17 +4826,17 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
 	if (!session)
 	if (!session)
 		return NULL;
 		return NULL;
 
 
-	init_completion(&session->complete);
-
 	tbl = &session->fc_slot_table;
 	tbl = &session->fc_slot_table;
 	tbl->highest_used_slotid = -1;
 	tbl->highest_used_slotid = -1;
 	spin_lock_init(&tbl->slot_tbl_lock);
 	spin_lock_init(&tbl->slot_tbl_lock);
 	rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
 	rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
+	init_completion(&tbl->complete);
 
 
 	tbl = &session->bc_slot_table;
 	tbl = &session->bc_slot_table;
 	tbl->highest_used_slotid = -1;
 	tbl->highest_used_slotid = -1;
 	spin_lock_init(&tbl->slot_tbl_lock);
 	spin_lock_init(&tbl->slot_tbl_lock);
 	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
 	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+	init_completion(&tbl->complete);
 
 
 	session->session_state = 1<<NFS4_SESSION_INITING;
 	session->session_state = 1<<NFS4_SESSION_INITING;
 
 
@@ -5280,13 +5330,23 @@ static void
 nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
 nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
 {
 {
 	struct nfs4_layoutget *lgp = calldata;
 	struct nfs4_layoutget *lgp = calldata;
-	struct inode *ino = lgp->args.inode;
-	struct nfs_server *server = NFS_SERVER(ino);
+	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
 
 
 	dprintk("--> %s\n", __func__);
 	dprintk("--> %s\n", __func__);
+	/* Note the is a race here, where a CB_LAYOUTRECALL can come in
+	 * right now covering the LAYOUTGET we are about to send.
+	 * However, that is not so catastrophic, and there seems
+	 * to be no way to prevent it completely.
+	 */
 	if (nfs4_setup_sequence(server, &lgp->args.seq_args,
 	if (nfs4_setup_sequence(server, &lgp->args.seq_args,
 				&lgp->res.seq_res, 0, task))
 				&lgp->res.seq_res, 0, task))
 		return;
 		return;
+	if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
+					  NFS_I(lgp->args.inode)->layout,
+					  lgp->args.ctx->state)) {
+		rpc_exit(task, NFS4_OK);
+		return;
+	}
 	rpc_call_start(task);
 	rpc_call_start(task);
 }
 }
 
 
@@ -5313,7 +5373,6 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
 			return;
 			return;
 		}
 		}
 	}
 	}
-	lgp->status = task->tk_status;
 	dprintk("<-- %s\n", __func__);
 	dprintk("<-- %s\n", __func__);
 }
 }
 
 
@@ -5322,7 +5381,6 @@ static void nfs4_layoutget_release(void *calldata)
 	struct nfs4_layoutget *lgp = calldata;
 	struct nfs4_layoutget *lgp = calldata;
 
 
 	dprintk("--> %s\n", __func__);
 	dprintk("--> %s\n", __func__);
-	put_layout_hdr(lgp->args.inode);
 	if (lgp->res.layout.buf != NULL)
 	if (lgp->res.layout.buf != NULL)
 		free_page((unsigned long) lgp->res.layout.buf);
 		free_page((unsigned long) lgp->res.layout.buf);
 	put_nfs_open_context(lgp->args.ctx);
 	put_nfs_open_context(lgp->args.ctx);
@@ -5367,13 +5425,10 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
 	if (IS_ERR(task))
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 		return PTR_ERR(task);
 	status = nfs4_wait_for_completion_rpc_task(task);
 	status = nfs4_wait_for_completion_rpc_task(task);
-	if (status != 0)
-		goto out;
-	status = lgp->status;
-	if (status != 0)
-		goto out;
-	status = pnfs_layout_process(lgp);
-out:
+	if (status == 0)
+		status = task->tk_status;
+	if (status == 0)
+		status = pnfs_layout_process(lgp);
 	rpc_put_task(task);
 	rpc_put_task(task);
 	dprintk("<-- %s status=%d\n", __func__, status);
 	dprintk("<-- %s status=%d\n", __func__, status);
 	return status;
 	return status;
@@ -5504,9 +5559,10 @@ static const struct inode_operations nfs4_file_inode_operations = {
 	.permission	= nfs_permission,
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
 	.setattr	= nfs_setattr,
-	.getxattr	= nfs4_getxattr,
-	.setxattr	= nfs4_setxattr,
-	.listxattr	= nfs4_listxattr,
+	.getxattr	= generic_getxattr,
+	.setxattr	= generic_setxattr,
+	.listxattr	= generic_listxattr,
+	.removexattr	= generic_removexattr,
 };
 };
 
 
 const struct nfs_rpc_ops nfs_v4_clientops = {
 const struct nfs_rpc_ops nfs_v4_clientops = {
@@ -5551,6 +5607,18 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.open_context	= nfs4_atomic_open,
 	.open_context	= nfs4_atomic_open,
 };
 };
 
 
+static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
+	.prefix	= XATTR_NAME_NFSV4_ACL,
+	.list	= nfs4_xattr_list_nfs4_acl,
+	.get	= nfs4_xattr_get_nfs4_acl,
+	.set	= nfs4_xattr_set_nfs4_acl,
+};
+
+const struct xattr_handler *nfs4_xattr_handlers[] = {
+	&nfs4_xattr_nfs4_acl_handler,
+	NULL
+};
+
 /*
 /*
  * Local variables:
  * Local variables:
  *  c-basic-offset: 8
  *  c-basic-offset: 8

+ 8 - 3
fs/nfs/nfs4renewd.c

@@ -63,9 +63,14 @@ nfs4_renew_state(struct work_struct *work)
 
 
 	ops = clp->cl_mvops->state_renewal_ops;
 	ops = clp->cl_mvops->state_renewal_ops;
 	dprintk("%s: start\n", __func__);
 	dprintk("%s: start\n", __func__);
-	/* Are there any active superblocks? */
-	if (list_empty(&clp->cl_superblocks))
+
+	rcu_read_lock();
+	if (list_empty(&clp->cl_superblocks)) {
+		rcu_read_unlock();
 		goto out;
 		goto out;
+	}
+	rcu_read_unlock();
+
 	spin_lock(&clp->cl_lock);
 	spin_lock(&clp->cl_lock);
 	lease = clp->cl_lease_time;
 	lease = clp->cl_lease_time;
 	last = clp->cl_last_renewal;
 	last = clp->cl_last_renewal;
@@ -75,7 +80,7 @@ nfs4_renew_state(struct work_struct *work)
 		cred = ops->get_state_renewal_cred_locked(clp);
 		cred = ops->get_state_renewal_cred_locked(clp);
 		spin_unlock(&clp->cl_lock);
 		spin_unlock(&clp->cl_lock);
 		if (cred == NULL) {
 		if (cred == NULL) {
-			if (list_empty(&clp->cl_delegations)) {
+			if (!nfs_delegations_present(clp)) {
 				set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 				set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 				goto out;
 				goto out;
 			}
 			}

+ 213 - 80
fs/nfs/nfs4state.c

@@ -105,14 +105,17 @@ static void nfs4_clear_machine_cred(struct nfs_client *clp)
 		put_rpccred(cred);
 		put_rpccred(cred);
 }
 }
 
 
-struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
+static struct rpc_cred *
+nfs4_get_renew_cred_server_locked(struct nfs_server *server)
 {
 {
+	struct rpc_cred *cred = NULL;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_owner *sp;
 	struct rb_node *pos;
 	struct rb_node *pos;
-	struct rpc_cred *cred = NULL;
 
 
-	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
-		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+	for (pos = rb_first(&server->state_owners);
+	     pos != NULL;
+	     pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
 		if (list_empty(&sp->so_states))
 		if (list_empty(&sp->so_states))
 			continue;
 			continue;
 		cred = get_rpccred(sp->so_cred);
 		cred = get_rpccred(sp->so_cred);
@@ -121,6 +124,28 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
 	return cred;
 	return cred;
 }
 }
 
 
+/**
+ * nfs4_get_renew_cred_locked - Acquire credential for a renew operation
+ * @clp: client state handle
+ *
+ * Returns an rpc_cred with reference count bumped, or NULL.
+ * Caller must hold clp->cl_lock.
+ */
+struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
+{
+	struct rpc_cred *cred = NULL;
+	struct nfs_server *server;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		cred = nfs4_get_renew_cred_server_locked(server);
+		if (cred != NULL)
+			break;
+	}
+	rcu_read_unlock();
+	return cred;
+}
+
 #if defined(CONFIG_NFS_V4_1)
 #if defined(CONFIG_NFS_V4_1)
 
 
 static int nfs41_setup_state_renewal(struct nfs_client *clp)
 static int nfs41_setup_state_renewal(struct nfs_client *clp)
@@ -142,6 +167,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
 	return status;
 	return status;
 }
 }
 
 
+/*
+ * Back channel returns NFS4ERR_DELAY for new requests when
+ * NFS4_SESSION_DRAINING is set so there is no work to be done when draining
+ * is ended.
+ */
 static void nfs4_end_drain_session(struct nfs_client *clp)
 static void nfs4_end_drain_session(struct nfs_client *clp)
 {
 {
 	struct nfs4_session *ses = clp->cl_session;
 	struct nfs4_session *ses = clp->cl_session;
@@ -165,22 +195,32 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
 	}
 	}
 }
 }
 
 
-static int nfs4_begin_drain_session(struct nfs_client *clp)
+static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
 {
 {
-	struct nfs4_session *ses = clp->cl_session;
-	struct nfs4_slot_table *tbl = &ses->fc_slot_table;
-
 	spin_lock(&tbl->slot_tbl_lock);
 	spin_lock(&tbl->slot_tbl_lock);
-	set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
 	if (tbl->highest_used_slotid != -1) {
 	if (tbl->highest_used_slotid != -1) {
-		INIT_COMPLETION(ses->complete);
+		INIT_COMPLETION(tbl->complete);
 		spin_unlock(&tbl->slot_tbl_lock);
 		spin_unlock(&tbl->slot_tbl_lock);
-		return wait_for_completion_interruptible(&ses->complete);
+		return wait_for_completion_interruptible(&tbl->complete);
 	}
 	}
 	spin_unlock(&tbl->slot_tbl_lock);
 	spin_unlock(&tbl->slot_tbl_lock);
 	return 0;
 	return 0;
 }
 }
 
 
+static int nfs4_begin_drain_session(struct nfs_client *clp)
+{
+	struct nfs4_session *ses = clp->cl_session;
+	int ret = 0;
+
+	set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+	/* back channel */
+	ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table);
+	if (ret)
+		return ret;
+	/* fore channel */
+	return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
+}
+
 int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
 {
 	int status;
 	int status;
@@ -192,6 +232,12 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 	status = nfs4_proc_create_session(clp);
 	status = nfs4_proc_create_session(clp);
 	if (status != 0)
 	if (status != 0)
 		goto out;
 		goto out;
+	status = nfs4_set_callback_sessionid(clp);
+	if (status != 0) {
+		printk(KERN_WARNING "Sessionid not set. No callback service\n");
+		nfs_callback_down(1);
+		status = 0;
+	}
 	nfs41_setup_state_renewal(clp);
 	nfs41_setup_state_renewal(clp);
 	nfs_mark_client_ready(clp, NFS_CS_READY);
 	nfs_mark_client_ready(clp, NFS_CS_READY);
 out:
 out:
@@ -210,28 +256,56 @@ struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
 
 
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* CONFIG_NFS_V4_1 */
 
 
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+static struct rpc_cred *
+nfs4_get_setclientid_cred_server(struct nfs_server *server)
 {
 {
+	struct nfs_client *clp = server->nfs_client;
+	struct rpc_cred *cred = NULL;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_owner *sp;
 	struct rb_node *pos;
 	struct rb_node *pos;
+
+	spin_lock(&clp->cl_lock);
+	pos = rb_first(&server->state_owners);
+	if (pos != NULL) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
+		cred = get_rpccred(sp->so_cred);
+	}
+	spin_unlock(&clp->cl_lock);
+	return cred;
+}
+
+/**
+ * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation
+ * @clp: client state handle
+ *
+ * Returns an rpc_cred with reference count bumped, or NULL.
+ */
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+{
+	struct nfs_server *server;
 	struct rpc_cred *cred;
 	struct rpc_cred *cred;
 
 
 	spin_lock(&clp->cl_lock);
 	spin_lock(&clp->cl_lock);
 	cred = nfs4_get_machine_cred_locked(clp);
 	cred = nfs4_get_machine_cred_locked(clp);
+	spin_unlock(&clp->cl_lock);
 	if (cred != NULL)
 	if (cred != NULL)
 		goto out;
 		goto out;
-	pos = rb_first(&clp->cl_state_owners);
-	if (pos != NULL) {
-		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
-		cred = get_rpccred(sp->so_cred);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		cred = nfs4_get_setclientid_cred_server(server);
+		if (cred != NULL)
+			break;
 	}
 	}
+	rcu_read_unlock();
+
 out:
 out:
-	spin_unlock(&clp->cl_lock);
 	return cred;
 	return cred;
 }
 }
 
 
-static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
-		__u64 minval, int maxbits)
+static void nfs_alloc_unique_id_locked(struct rb_root *root,
+				       struct nfs_unique_id *new,
+				       __u64 minval, int maxbits)
 {
 {
 	struct rb_node **p, *parent;
 	struct rb_node **p, *parent;
 	struct nfs_unique_id *pos;
 	struct nfs_unique_id *pos;
@@ -286,16 +360,15 @@ static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
 }
 }
 
 
 static struct nfs4_state_owner *
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
 {
 {
-	struct nfs_client *clp = server->nfs_client;
-	struct rb_node **p = &clp->cl_state_owners.rb_node,
+	struct rb_node **p = &server->state_owners.rb_node,
 		       *parent = NULL;
 		       *parent = NULL;
 	struct nfs4_state_owner *sp, *res = NULL;
 	struct nfs4_state_owner *sp, *res = NULL;
 
 
 	while (*p != NULL) {
 	while (*p != NULL) {
 		parent = *p;
 		parent = *p;
-		sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+		sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
 
 
 		if (server < sp->so_server) {
 		if (server < sp->so_server) {
 			p = &parent->rb_left;
 			p = &parent->rb_left;
@@ -319,24 +392,17 @@ nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 }
 }
 
 
 static struct nfs4_state_owner *
 static struct nfs4_state_owner *
-nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
+nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
 {
 {
-	struct rb_node **p = &clp->cl_state_owners.rb_node,
+	struct nfs_server *server = new->so_server;
+	struct rb_node **p = &server->state_owners.rb_node,
 		       *parent = NULL;
 		       *parent = NULL;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_owner *sp;
 
 
 	while (*p != NULL) {
 	while (*p != NULL) {
 		parent = *p;
 		parent = *p;
-		sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+		sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
 
 
-		if (new->so_server < sp->so_server) {
-			p = &parent->rb_left;
-			continue;
-		}
-		if (new->so_server > sp->so_server) {
-			p = &parent->rb_right;
-			continue;
-		}
 		if (new->so_cred < sp->so_cred)
 		if (new->so_cred < sp->so_cred)
 			p = &parent->rb_left;
 			p = &parent->rb_left;
 		else if (new->so_cred > sp->so_cred)
 		else if (new->so_cred > sp->so_cred)
@@ -346,18 +412,21 @@ nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
 			return sp;
 			return sp;
 		}
 		}
 	}
 	}
-	nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
-	rb_link_node(&new->so_client_node, parent, p);
-	rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
+	nfs_alloc_unique_id_locked(&server->openowner_id,
+					&new->so_owner_id, 1, 64);
+	rb_link_node(&new->so_server_node, parent, p);
+	rb_insert_color(&new->so_server_node, &server->state_owners);
 	return new;
 	return new;
 }
 }
 
 
 static void
 static void
-nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
+nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
 {
 {
-	if (!RB_EMPTY_NODE(&sp->so_client_node))
-		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
-	nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
+	struct nfs_server *server = sp->so_server;
+
+	if (!RB_EMPTY_NODE(&sp->so_server_node))
+		rb_erase(&sp->so_server_node, &server->state_owners);
+	nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id);
 }
 }
 
 
 /*
 /*
@@ -386,23 +455,32 @@ nfs4_alloc_state_owner(void)
 static void
 static void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
 {
-	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
-		struct nfs_client *clp = sp->so_server->nfs_client;
+	if (!RB_EMPTY_NODE(&sp->so_server_node)) {
+		struct nfs_server *server = sp->so_server;
+		struct nfs_client *clp = server->nfs_client;
 
 
 		spin_lock(&clp->cl_lock);
 		spin_lock(&clp->cl_lock);
-		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
-		RB_CLEAR_NODE(&sp->so_client_node);
+		rb_erase(&sp->so_server_node, &server->state_owners);
+		RB_CLEAR_NODE(&sp->so_server_node);
 		spin_unlock(&clp->cl_lock);
 		spin_unlock(&clp->cl_lock);
 	}
 	}
 }
 }
 
 
-struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+/**
+ * nfs4_get_state_owner - Look up a state owner given a credential
+ * @server: nfs_server to search
+ * @cred: RPC credential to match
+ *
+ * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
+ */
+struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
+					      struct rpc_cred *cred)
 {
 {
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp, *new;
 	struct nfs4_state_owner *sp, *new;
 
 
 	spin_lock(&clp->cl_lock);
 	spin_lock(&clp->cl_lock);
-	sp = nfs4_find_state_owner(server, cred);
+	sp = nfs4_find_state_owner_locked(server, cred);
 	spin_unlock(&clp->cl_lock);
 	spin_unlock(&clp->cl_lock);
 	if (sp != NULL)
 	if (sp != NULL)
 		return sp;
 		return sp;
@@ -412,7 +490,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	new->so_server = server;
 	new->so_server = server;
 	new->so_cred = cred;
 	new->so_cred = cred;
 	spin_lock(&clp->cl_lock);
 	spin_lock(&clp->cl_lock);
-	sp = nfs4_insert_state_owner(clp, new);
+	sp = nfs4_insert_state_owner_locked(new);
 	spin_unlock(&clp->cl_lock);
 	spin_unlock(&clp->cl_lock);
 	if (sp == new)
 	if (sp == new)
 		get_rpccred(cred);
 		get_rpccred(cred);
@@ -423,6 +501,11 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	return sp;
 	return sp;
 }
 }
 
 
+/**
+ * nfs4_put_state_owner - Release a nfs4_state_owner
+ * @sp: state owner data to release
+ *
+ */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
 {
 	struct nfs_client *clp = sp->so_server->nfs_client;
 	struct nfs_client *clp = sp->so_server->nfs_client;
@@ -430,7 +513,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 		return;
 		return;
-	nfs4_remove_state_owner(clp, sp);
+	nfs4_remove_state_owner_locked(sp);
 	spin_unlock(&clp->cl_lock);
 	spin_unlock(&clp->cl_lock);
 	rpc_destroy_wait_queue(&sp->so_sequence.wait);
 	rpc_destroy_wait_queue(&sp->so_sequence.wait);
 	put_rpccred(cred);
 	put_rpccred(cred);
@@ -585,8 +668,11 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
 	if (!call_close) {
 	if (!call_close) {
 		nfs4_put_open_state(state);
 		nfs4_put_open_state(state);
 		nfs4_put_state_owner(owner);
 		nfs4_put_state_owner(owner);
-	} else
-		nfs4_do_close(path, state, gfp_mask, wait);
+	} else {
+		bool roc = pnfs_roc(state->inode);
+
+		nfs4_do_close(path, state, gfp_mask, wait, roc);
+	}
 }
 }
 
 
 void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
 void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
@@ -633,7 +719,8 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
 {
 {
 	struct nfs4_lock_state *lsp;
 	struct nfs4_lock_state *lsp;
-	struct nfs_client *clp = state->owner->so_server->nfs_client;
+	struct nfs_server *server = state->owner->so_server;
+	struct nfs_client *clp = server->nfs_client;
 
 
 	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
 	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
 	if (lsp == NULL)
 	if (lsp == NULL)
@@ -657,7 +744,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 		return NULL;
 		return NULL;
 	}
 	}
 	spin_lock(&clp->cl_lock);
 	spin_lock(&clp->cl_lock);
-	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+	nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64);
 	spin_unlock(&clp->cl_lock);
 	spin_unlock(&clp->cl_lock);
 	INIT_LIST_HEAD(&lsp->ls_locks);
 	INIT_LIST_HEAD(&lsp->ls_locks);
 	return lsp;
 	return lsp;
@@ -665,10 +752,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 
 
 static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
 static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
 {
 {
-	struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
+	struct nfs_server *server = lsp->ls_state->owner->so_server;
+	struct nfs_client *clp = server->nfs_client;
 
 
 	spin_lock(&clp->cl_lock);
 	spin_lock(&clp->cl_lock);
-	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+	nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id);
 	spin_unlock(&clp->cl_lock);
 	spin_unlock(&clp->cl_lock);
 	rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
 	rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
 	kfree(lsp);
 	kfree(lsp);
@@ -1114,15 +1202,19 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
 	}
 	}
 }
 }
 
 
-static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
+static void nfs4_reset_seqids(struct nfs_server *server,
+	int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
 {
 {
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_owner *sp;
 	struct rb_node *pos;
 	struct rb_node *pos;
 	struct nfs4_state *state;
 	struct nfs4_state *state;
 
 
-	/* Reset all sequence ids to zero */
-	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
-		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+	spin_lock(&clp->cl_lock);
+	for (pos = rb_first(&server->state_owners);
+	     pos != NULL;
+	     pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
 		sp->so_seqid.flags = 0;
 		sp->so_seqid.flags = 0;
 		spin_lock(&sp->so_lock);
 		spin_lock(&sp->so_lock);
 		list_for_each_entry(state, &sp->so_states, open_states) {
 		list_for_each_entry(state, &sp->so_states, open_states) {
@@ -1131,6 +1223,18 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_re
 		}
 		}
 		spin_unlock(&sp->so_lock);
 		spin_unlock(&sp->so_lock);
 	}
 	}
+	spin_unlock(&clp->cl_lock);
+}
+
+static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
+	int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
+{
+	struct nfs_server *server;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		nfs4_reset_seqids(server, mark_reclaim);
+	rcu_read_unlock();
 }
 }
 
 
 static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
 static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
@@ -1148,25 +1252,41 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
 		(void)ops->reclaim_complete(clp);
 		(void)ops->reclaim_complete(clp);
 }
 }
 
 
-static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
+static void nfs4_clear_reclaim_server(struct nfs_server *server)
 {
 {
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_owner *sp;
 	struct rb_node *pos;
 	struct rb_node *pos;
 	struct nfs4_state *state;
 	struct nfs4_state *state;
 
 
-	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
-		return 0;
-
-	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
-		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+	spin_lock(&clp->cl_lock);
+	for (pos = rb_first(&server->state_owners);
+	     pos != NULL;
+	     pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
 		spin_lock(&sp->so_lock);
 		spin_lock(&sp->so_lock);
 		list_for_each_entry(state, &sp->so_states, open_states) {
 		list_for_each_entry(state, &sp->so_states, open_states) {
-			if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags))
+			if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT,
+						&state->flags))
 				continue;
 				continue;
 			nfs4_state_mark_reclaim_nograce(clp, state);
 			nfs4_state_mark_reclaim_nograce(clp, state);
 		}
 		}
 		spin_unlock(&sp->so_lock);
 		spin_unlock(&sp->so_lock);
 	}
 	}
+	spin_unlock(&clp->cl_lock);
+}
+
+static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
+{
+	struct nfs_server *server;
+
+	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+		return 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+		nfs4_clear_reclaim_server(server);
+	rcu_read_unlock();
 
 
 	nfs_delegation_reap_unclaimed(clp);
 	nfs_delegation_reap_unclaimed(clp);
 	return 1;
 	return 1;
@@ -1238,27 +1358,40 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
 
 
 static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
 static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
 {
 {
+	struct nfs4_state_owner *sp;
+	struct nfs_server *server;
 	struct rb_node *pos;
 	struct rb_node *pos;
 	int status = 0;
 	int status = 0;
 
 
 restart:
 restart:
-	spin_lock(&clp->cl_lock);
-	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
-		struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
-		if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags))
-			continue;
-		atomic_inc(&sp->so_count);
-		spin_unlock(&clp->cl_lock);
-		status = nfs4_reclaim_open_state(sp, ops);
-		if (status < 0) {
-			set_bit(ops->owner_flag_bit, &sp->so_flags);
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		spin_lock(&clp->cl_lock);
+		for (pos = rb_first(&server->state_owners);
+		     pos != NULL;
+		     pos = rb_next(pos)) {
+			sp = rb_entry(pos,
+				struct nfs4_state_owner, so_server_node);
+			if (!test_and_clear_bit(ops->owner_flag_bit,
+							&sp->so_flags))
+				continue;
+			atomic_inc(&sp->so_count);
+			spin_unlock(&clp->cl_lock);
+			rcu_read_unlock();
+
+			status = nfs4_reclaim_open_state(sp, ops);
+			if (status < 0) {
+				set_bit(ops->owner_flag_bit, &sp->so_flags);
+				nfs4_put_state_owner(sp);
+				return nfs4_recovery_handle_error(clp, status);
+			}
+
 			nfs4_put_state_owner(sp);
 			nfs4_put_state_owner(sp);
-			return nfs4_recovery_handle_error(clp, status);
+			goto restart;
 		}
 		}
-		nfs4_put_state_owner(sp);
-		goto restart;
+		spin_unlock(&clp->cl_lock);
 	}
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 	return status;
 	return status;
 }
 }
 
 

File diff suppressed because it is too large
+ 286 - 361
fs/nfs/nfs4xdr.c


+ 2 - 5
fs/nfs/pagelist.c

@@ -26,12 +26,9 @@ static struct kmem_cache *nfs_page_cachep;
 static inline struct nfs_page *
 static inline struct nfs_page *
 nfs_page_alloc(void)
 nfs_page_alloc(void)
 {
 {
-	struct nfs_page	*p;
-	p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL);
-	if (p) {
-		memset(p, 0, sizeof(*p));
+	struct nfs_page	*p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
+	if (p)
 		INIT_LIST_HEAD(&p->wb_list);
 		INIT_LIST_HEAD(&p->wb_list);
-	}
 	return p;
 	return p;
 }
 }
 
 

+ 353 - 171
fs/nfs/pnfs.c

@@ -177,105 +177,149 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
  * pNFS client layout cache
  * pNFS client layout cache
  */
  */
 
 
+/* Need to hold i_lock if caller does not already hold reference */
+void
+get_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+	atomic_inc(&lo->plh_refcount);
+}
+
 static void
 static void
-get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
+destroy_layout_hdr(struct pnfs_layout_hdr *lo)
 {
 {
-	assert_spin_locked(&lo->inode->i_lock);
-	lo->refcount++;
+	dprintk("%s: freeing layout cache %p\n", __func__, lo);
+	BUG_ON(!list_empty(&lo->plh_layouts));
+	NFS_I(lo->plh_inode)->layout = NULL;
+	kfree(lo);
 }
 }
 
 
 static void
 static void
 put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
 put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
 {
 {
-	assert_spin_locked(&lo->inode->i_lock);
-	BUG_ON(lo->refcount == 0);
-
-	lo->refcount--;
-	if (!lo->refcount) {
-		dprintk("%s: freeing layout cache %p\n", __func__, lo);
-		BUG_ON(!list_empty(&lo->layouts));
-		NFS_I(lo->inode)->layout = NULL;
-		kfree(lo);
-	}
+	if (atomic_dec_and_test(&lo->plh_refcount))
+		destroy_layout_hdr(lo);
 }
 }
 
 
 void
 void
-put_layout_hdr(struct inode *inode)
+put_layout_hdr(struct pnfs_layout_hdr *lo)
 {
 {
-	spin_lock(&inode->i_lock);
-	put_layout_hdr_locked(NFS_I(inode)->layout);
-	spin_unlock(&inode->i_lock);
+	struct inode *inode = lo->plh_inode;
+
+	if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
+		destroy_layout_hdr(lo);
+		spin_unlock(&inode->i_lock);
+	}
 }
 }
 
 
 static void
 static void
 init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
 init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
 {
 {
-	INIT_LIST_HEAD(&lseg->fi_list);
-	kref_init(&lseg->kref);
-	lseg->layout = lo;
+	INIT_LIST_HEAD(&lseg->pls_list);
+	atomic_set(&lseg->pls_refcount, 1);
+	smp_mb();
+	set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
+	lseg->pls_layout = lo;
 }
 }
 
 
-/* Called without i_lock held, as the free_lseg call may sleep */
-static void
-destroy_lseg(struct kref *kref)
+static void free_lseg(struct pnfs_layout_segment *lseg)
 {
 {
-	struct pnfs_layout_segment *lseg =
-		container_of(kref, struct pnfs_layout_segment, kref);
-	struct inode *ino = lseg->layout->inode;
+	struct inode *ino = lseg->pls_layout->plh_inode;
 
 
-	dprintk("--> %s\n", __func__);
 	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
 	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
-	/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
-	put_layout_hdr(ino);
+	/* Matched by get_layout_hdr in pnfs_insert_layout */
+	put_layout_hdr(NFS_I(ino)->layout);
 }
 }
 
 
-static void
-put_lseg(struct pnfs_layout_segment *lseg)
+/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
+ * could sleep, so must be called outside of the lock.
+ * Returns 1 if object was removed, otherwise return 0.
+ */
+static int
+put_lseg_locked(struct pnfs_layout_segment *lseg,
+		struct list_head *tmp_list)
+{
+	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
+		atomic_read(&lseg->pls_refcount),
+		test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+	if (atomic_dec_and_test(&lseg->pls_refcount)) {
+		struct inode *ino = lseg->pls_layout->plh_inode;
+
+		BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+		list_del(&lseg->pls_list);
+		if (list_empty(&lseg->pls_layout->plh_segs)) {
+			struct nfs_client *clp;
+
+			clp = NFS_SERVER(ino)->nfs_client;
+			spin_lock(&clp->cl_lock);
+			/* List does not take a reference, so no need for put here */
+			list_del_init(&lseg->pls_layout->plh_layouts);
+			spin_unlock(&clp->cl_lock);
+			clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
+		}
+		rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
+		list_add(&lseg->pls_list, tmp_list);
+		return 1;
+	}
+	return 0;
+}
+
+static bool
+should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
 {
 {
-	if (!lseg)
-		return;
+	return (recall_iomode == IOMODE_ANY ||
+		lseg_iomode == recall_iomode);
+}
 
 
-	dprintk("%s: lseg %p ref %d\n", __func__, lseg,
-		atomic_read(&lseg->kref.refcount));
-	kref_put(&lseg->kref, destroy_lseg);
+/* Returns 1 if lseg is removed from list, 0 otherwise */
+static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
+			     struct list_head *tmp_list)
+{
+	int rv = 0;
+
+	if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
+		/* Remove the reference keeping the lseg in the
+		 * list.  It will now be removed when all
+		 * outstanding io is finished.
+		 */
+		rv = put_lseg_locked(lseg, tmp_list);
+	}
+	return rv;
 }
 }
 
 
-static void
-pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
+/* Returns count of number of matching invalid lsegs remaining in list
+ * after call.
+ */
+int
+mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+			    struct list_head *tmp_list,
+			    u32 iomode)
 {
 {
 	struct pnfs_layout_segment *lseg, *next;
 	struct pnfs_layout_segment *lseg, *next;
-	struct nfs_client *clp;
+	int invalid = 0, removed = 0;
 
 
 	dprintk("%s:Begin lo %p\n", __func__, lo);
 	dprintk("%s:Begin lo %p\n", __func__, lo);
 
 
-	assert_spin_locked(&lo->inode->i_lock);
-	list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
-		dprintk("%s: freeing lseg %p\n", __func__, lseg);
-		list_move(&lseg->fi_list, tmp_list);
-	}
-	clp = NFS_SERVER(lo->inode)->nfs_client;
-	spin_lock(&clp->cl_lock);
-	/* List does not take a reference, so no need for put here */
-	list_del_init(&lo->layouts);
-	spin_unlock(&clp->cl_lock);
-	write_seqlock(&lo->seqlock);
-	clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
-	write_sequnlock(&lo->seqlock);
-
-	dprintk("%s:Return\n", __func__);
+	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
+		if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
+			dprintk("%s: freeing lseg %p iomode %d "
+				"offset %llu length %llu\n", __func__,
+				lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
+				lseg->pls_range.length);
+			invalid++;
+			removed += mark_lseg_invalid(lseg, tmp_list);
+		}
+	dprintk("%s:Return %i\n", __func__, invalid - removed);
+	return invalid - removed;
 }
 }
 
 
-static void
-pnfs_free_lseg_list(struct list_head *tmp_list)
+void
+pnfs_free_lseg_list(struct list_head *free_me)
 {
 {
-	struct pnfs_layout_segment *lseg;
+	struct pnfs_layout_segment *lseg, *tmp;
 
 
-	while (!list_empty(tmp_list)) {
-		lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
-				fi_list);
-		dprintk("%s calling put_lseg on %p\n", __func__, lseg);
-		list_del(&lseg->fi_list);
-		put_lseg(lseg);
+	list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
+		list_del(&lseg->pls_list);
+		free_lseg(lseg);
 	}
 	}
 }
 }
 
 
@@ -288,7 +332,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 	spin_lock(&nfsi->vfs_inode.i_lock);
 	spin_lock(&nfsi->vfs_inode.i_lock);
 	lo = nfsi->layout;
 	lo = nfsi->layout;
 	if (lo) {
 	if (lo) {
-		pnfs_clear_lseg_list(lo, &tmp_list);
+		set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
+		mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
 		/* Matched by refcount set to 1 in alloc_init_layout_hdr */
 		/* Matched by refcount set to 1 in alloc_init_layout_hdr */
 		put_layout_hdr_locked(lo);
 		put_layout_hdr_locked(lo);
 	}
 	}
@@ -312,76 +357,80 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
 
 
 	while (!list_empty(&tmp_list)) {
 	while (!list_empty(&tmp_list)) {
 		lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
 		lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
-				layouts);
+				plh_layouts);
 		dprintk("%s freeing layout for inode %lu\n", __func__,
 		dprintk("%s freeing layout for inode %lu\n", __func__,
-			lo->inode->i_ino);
-		pnfs_destroy_layout(NFS_I(lo->inode));
+			lo->plh_inode->i_ino);
+		pnfs_destroy_layout(NFS_I(lo->plh_inode));
 	}
 	}
 }
 }
 
 
-/* update lo->stateid with new if is more recent
- *
- * lo->stateid could be the open stateid, in which case we just use what given.
- */
-static void
-pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
-			const nfs4_stateid *new)
-{
-	nfs4_stateid *old = &lo->stateid;
-	bool overwrite = false;
-
-	write_seqlock(&lo->seqlock);
-	if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
-	    memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
-		overwrite = true;
-	else {
-		u32 oldseq, newseq;
-
-		oldseq = be32_to_cpu(old->stateid.seqid);
-		newseq = be32_to_cpu(new->stateid.seqid);
-		if ((int)(newseq - oldseq) > 0)
-			overwrite = true;
+/* update lo->plh_stateid with new if is more recent */
+void
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
+			bool update_barrier)
+{
+	u32 oldseq, newseq;
+
+	oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+	newseq = be32_to_cpu(new->stateid.seqid);
+	if ((int)(newseq - oldseq) > 0) {
+		memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
+		if (update_barrier) {
+			u32 new_barrier = be32_to_cpu(new->stateid.seqid);
+
+			if ((int)(new_barrier - lo->plh_barrier))
+				lo->plh_barrier = new_barrier;
+		} else {
+			/* Because of wraparound, we want to keep the barrier
+			 * "close" to the current seqids.  It needs to be
+			 * within 2**31 to count as "behind", so if it
+			 * gets too near that limit, give us a litle leeway
+			 * and bring it to within 2**30.
+			 * NOTE - and yes, this is all unsigned arithmetic.
+			 */
+			if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
+				lo->plh_barrier = newseq - (1 << 30);
+		}
 	}
 	}
-	if (overwrite)
-		memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
-	write_sequnlock(&lo->seqlock);
 }
 }
 
 
-static void
-pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
-			      struct nfs4_state *state)
+/* lget is set to 1 if called from inside send_layoutget call chain */
+static bool
+pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
+			int lget)
 {
 {
-	int seq;
-
-	dprintk("--> %s\n", __func__);
-	write_seqlock(&lo->seqlock);
-	do {
-		seq = read_seqbegin(&state->seqlock);
-		memcpy(lo->stateid.data, state->stateid.data,
-		       sizeof(state->stateid.data));
-	} while (read_seqretry(&state->seqlock, seq));
-	set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
-	write_sequnlock(&lo->seqlock);
-	dprintk("<-- %s\n", __func__);
+	if ((stateid) &&
+	    (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
+		return true;
+	return lo->plh_block_lgets ||
+		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+		(list_empty(&lo->plh_segs) &&
+		 (atomic_read(&lo->plh_outstanding) > lget));
 }
 }
 
 
-void
-pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
-			struct nfs4_state *open_state)
+int
+pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+			      struct nfs4_state *open_state)
 {
 {
-	int seq;
+	int status = 0;
 
 
 	dprintk("--> %s\n", __func__);
 	dprintk("--> %s\n", __func__);
-	do {
-		seq = read_seqbegin(&lo->seqlock);
-		if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
-			/* This will trigger retry of the read */
-			pnfs_layout_from_open_stateid(lo, open_state);
-		} else
-			memcpy(dst->data, lo->stateid.data,
-			       sizeof(lo->stateid.data));
-	} while (read_seqretry(&lo->seqlock, seq));
+	spin_lock(&lo->plh_inode->i_lock);
+	if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
+		status = -EAGAIN;
+	} else if (list_empty(&lo->plh_segs)) {
+		int seq;
+
+		do {
+			seq = read_seqbegin(&open_state->seqlock);
+			memcpy(dst->data, open_state->stateid.data,
+			       sizeof(open_state->stateid.data));
+		} while (read_seqretry(&open_state->seqlock, seq));
+	} else
+		memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
+	spin_unlock(&lo->plh_inode->i_lock);
 	dprintk("<-- %s\n", __func__);
 	dprintk("<-- %s\n", __func__);
+	return status;
 }
 }
 
 
 /*
 /*
@@ -395,7 +444,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	   struct nfs_open_context *ctx,
 	   struct nfs_open_context *ctx,
 	   u32 iomode)
 	   u32 iomode)
 {
 {
-	struct inode *ino = lo->inode;
+	struct inode *ino = lo->plh_inode;
 	struct nfs_server *server = NFS_SERVER(ino);
 	struct nfs_server *server = NFS_SERVER(ino);
 	struct nfs4_layoutget *lgp;
 	struct nfs4_layoutget *lgp;
 	struct pnfs_layout_segment *lseg = NULL;
 	struct pnfs_layout_segment *lseg = NULL;
@@ -404,10 +453,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 
 
 	BUG_ON(ctx == NULL);
 	BUG_ON(ctx == NULL);
 	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
 	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
-	if (lgp == NULL) {
-		put_layout_hdr(lo->inode);
+	if (lgp == NULL)
 		return NULL;
 		return NULL;
-	}
 	lgp->args.minlength = NFS4_MAX_UINT64;
 	lgp->args.minlength = NFS4_MAX_UINT64;
 	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
 	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
 	lgp->args.range.iomode = iomode;
 	lgp->args.range.iomode = iomode;
@@ -424,11 +471,88 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	nfs4_proc_layoutget(lgp);
 	nfs4_proc_layoutget(lgp);
 	if (!lseg) {
 	if (!lseg) {
 		/* remember that LAYOUTGET failed and suspend trying */
 		/* remember that LAYOUTGET failed and suspend trying */
-		set_bit(lo_fail_bit(iomode), &lo->state);
+		set_bit(lo_fail_bit(iomode), &lo->plh_flags);
 	}
 	}
 	return lseg;
 	return lseg;
 }
 }
 
 
+bool pnfs_roc(struct inode *ino)
+{
+	struct pnfs_layout_hdr *lo;
+	struct pnfs_layout_segment *lseg, *tmp;
+	LIST_HEAD(tmp_list);
+	bool found = false;
+
+	spin_lock(&ino->i_lock);
+	lo = NFS_I(ino)->layout;
+	if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
+	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+		goto out_nolayout;
+	list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
+		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+			mark_lseg_invalid(lseg, &tmp_list);
+			found = true;
+		}
+	if (!found)
+		goto out_nolayout;
+	lo->plh_block_lgets++;
+	get_layout_hdr(lo); /* matched in pnfs_roc_release */
+	spin_unlock(&ino->i_lock);
+	pnfs_free_lseg_list(&tmp_list);
+	return true;
+
+out_nolayout:
+	spin_unlock(&ino->i_lock);
+	return false;
+}
+
+void pnfs_roc_release(struct inode *ino)
+{
+	struct pnfs_layout_hdr *lo;
+
+	spin_lock(&ino->i_lock);
+	lo = NFS_I(ino)->layout;
+	lo->plh_block_lgets--;
+	put_layout_hdr_locked(lo);
+	spin_unlock(&ino->i_lock);
+}
+
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
+{
+	struct pnfs_layout_hdr *lo;
+
+	spin_lock(&ino->i_lock);
+	lo = NFS_I(ino)->layout;
+	if ((int)(barrier - lo->plh_barrier) > 0)
+		lo->plh_barrier = barrier;
+	spin_unlock(&ino->i_lock);
+}
+
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
+{
+	struct nfs_inode *nfsi = NFS_I(ino);
+	struct pnfs_layout_segment *lseg;
+	bool found = false;
+
+	spin_lock(&ino->i_lock);
+	list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
+		if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+			found = true;
+			break;
+		}
+	if (!found) {
+		struct pnfs_layout_hdr *lo = nfsi->layout;
+		u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+
+		/* Since close does not return a layout stateid for use as
+		 * a barrier, we choose the worst-case barrier.
+		 */
+		*barrier = current_seqid + atomic_read(&lo->plh_outstanding);
+	}
+	spin_unlock(&ino->i_lock);
+	return found;
+}
+
 /*
 /*
  * Compare two layout segments for sorting into layout cache.
  * Compare two layout segments for sorting into layout cache.
  * We want to preferentially return RW over RO layouts, so ensure those
  * We want to preferentially return RW over RO layouts, so ensure those
@@ -450,37 +574,29 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
 
 
 	dprintk("%s:Begin\n", __func__);
 	dprintk("%s:Begin\n", __func__);
 
 
-	assert_spin_locked(&lo->inode->i_lock);
-	if (list_empty(&lo->segs)) {
-		struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
-
-		spin_lock(&clp->cl_lock);
-		BUG_ON(!list_empty(&lo->layouts));
-		list_add_tail(&lo->layouts, &clp->cl_layouts);
-		spin_unlock(&clp->cl_lock);
-	}
-	list_for_each_entry(lp, &lo->segs, fi_list) {
-		if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
+	assert_spin_locked(&lo->plh_inode->i_lock);
+	list_for_each_entry(lp, &lo->plh_segs, pls_list) {
+		if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
 			continue;
 			continue;
-		list_add_tail(&lseg->fi_list, &lp->fi_list);
+		list_add_tail(&lseg->pls_list, &lp->pls_list);
 		dprintk("%s: inserted lseg %p "
 		dprintk("%s: inserted lseg %p "
 			"iomode %d offset %llu length %llu before "
 			"iomode %d offset %llu length %llu before "
 			"lp %p iomode %d offset %llu length %llu\n",
 			"lp %p iomode %d offset %llu length %llu\n",
-			__func__, lseg, lseg->range.iomode,
-			lseg->range.offset, lseg->range.length,
-			lp, lp->range.iomode, lp->range.offset,
-			lp->range.length);
+			__func__, lseg, lseg->pls_range.iomode,
+			lseg->pls_range.offset, lseg->pls_range.length,
+			lp, lp->pls_range.iomode, lp->pls_range.offset,
+			lp->pls_range.length);
 		found = 1;
 		found = 1;
 		break;
 		break;
 	}
 	}
 	if (!found) {
 	if (!found) {
-		list_add_tail(&lseg->fi_list, &lo->segs);
+		list_add_tail(&lseg->pls_list, &lo->plh_segs);
 		dprintk("%s: inserted lseg %p "
 		dprintk("%s: inserted lseg %p "
 			"iomode %d offset %llu length %llu at tail\n",
 			"iomode %d offset %llu length %llu at tail\n",
-			__func__, lseg, lseg->range.iomode,
-			lseg->range.offset, lseg->range.length);
+			__func__, lseg, lseg->pls_range.iomode,
+			lseg->pls_range.offset, lseg->pls_range.length);
 	}
 	}
-	get_layout_hdr_locked(lo);
+	get_layout_hdr(lo);
 
 
 	dprintk("%s:Return\n", __func__);
 	dprintk("%s:Return\n", __func__);
 }
 }
@@ -493,11 +609,11 @@ alloc_init_layout_hdr(struct inode *ino)
 	lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
 	lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
 	if (!lo)
 	if (!lo)
 		return NULL;
 		return NULL;
-	lo->refcount = 1;
-	INIT_LIST_HEAD(&lo->layouts);
-	INIT_LIST_HEAD(&lo->segs);
-	seqlock_init(&lo->seqlock);
-	lo->inode = ino;
+	atomic_set(&lo->plh_refcount, 1);
+	INIT_LIST_HEAD(&lo->plh_layouts);
+	INIT_LIST_HEAD(&lo->plh_segs);
+	INIT_LIST_HEAD(&lo->plh_bulk_recall);
+	lo->plh_inode = ino;
 	return lo;
 	return lo;
 }
 }
 
 
@@ -510,9 +626,12 @@ pnfs_find_alloc_layout(struct inode *ino)
 	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
 	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
 
 
 	assert_spin_locked(&ino->i_lock);
 	assert_spin_locked(&ino->i_lock);
-	if (nfsi->layout)
-		return nfsi->layout;
-
+	if (nfsi->layout) {
+		if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
+			return NULL;
+		else
+			return nfsi->layout;
+	}
 	spin_unlock(&ino->i_lock);
 	spin_unlock(&ino->i_lock);
 	new = alloc_init_layout_hdr(ino);
 	new = alloc_init_layout_hdr(ino);
 	spin_lock(&ino->i_lock);
 	spin_lock(&ino->i_lock);
@@ -538,31 +657,32 @@ pnfs_find_alloc_layout(struct inode *ino)
 static int
 static int
 is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
 is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
 {
 {
-	return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
+	return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
 }
 }
 
 
 /*
 /*
  * lookup range in layout
  * lookup range in layout
  */
  */
 static struct pnfs_layout_segment *
 static struct pnfs_layout_segment *
-pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
+pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
 {
 {
 	struct pnfs_layout_segment *lseg, *ret = NULL;
 	struct pnfs_layout_segment *lseg, *ret = NULL;
 
 
 	dprintk("%s:Begin\n", __func__);
 	dprintk("%s:Begin\n", __func__);
 
 
-	assert_spin_locked(&lo->inode->i_lock);
-	list_for_each_entry(lseg, &lo->segs, fi_list) {
-		if (is_matching_lseg(lseg, iomode)) {
+	assert_spin_locked(&lo->plh_inode->i_lock);
+	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
+		    is_matching_lseg(lseg, iomode)) {
 			ret = lseg;
 			ret = lseg;
 			break;
 			break;
 		}
 		}
-		if (cmp_layout(iomode, lseg->range.iomode) > 0)
+		if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
 			break;
 			break;
 	}
 	}
 
 
 	dprintk("%s:Return lseg %p ref %d\n",
 	dprintk("%s:Return lseg %p ref %d\n",
-		__func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
+		__func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -576,6 +696,7 @@ pnfs_update_layout(struct inode *ino,
 		   enum pnfs_iomode iomode)
 		   enum pnfs_iomode iomode)
 {
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct nfs_inode *nfsi = NFS_I(ino);
+	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
 	struct pnfs_layout_hdr *lo;
 	struct pnfs_layout_hdr *lo;
 	struct pnfs_layout_segment *lseg = NULL;
 	struct pnfs_layout_segment *lseg = NULL;
 
 
@@ -588,25 +709,53 @@ pnfs_update_layout(struct inode *ino,
 		goto out_unlock;
 		goto out_unlock;
 	}
 	}
 
 
-	/* Check to see if the layout for the given range already exists */
-	lseg = pnfs_has_layout(lo, iomode);
-	if (lseg) {
-		dprintk("%s: Using cached lseg %p for iomode %d)\n",
-			__func__, lseg, iomode);
+	/* Do we even need to bother with this? */
+	if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+		dprintk("%s matches recall, use MDS\n", __func__);
 		goto out_unlock;
 		goto out_unlock;
 	}
 	}
+	/* Check to see if the layout for the given range already exists */
+	lseg = pnfs_find_lseg(lo, iomode);
+	if (lseg)
+		goto out_unlock;
 
 
 	/* if LAYOUTGET already failed once we don't try again */
 	/* if LAYOUTGET already failed once we don't try again */
-	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
+	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
+		goto out_unlock;
+
+	if (pnfs_layoutgets_blocked(lo, NULL, 0))
 		goto out_unlock;
 		goto out_unlock;
+	atomic_inc(&lo->plh_outstanding);
 
 
-	get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
+	get_layout_hdr(lo);
+	if (list_empty(&lo->plh_segs)) {
+		/* The lo must be on the clp list if there is any
+		 * chance of a CB_LAYOUTRECALL(FILE) coming in.
+		 */
+		spin_lock(&clp->cl_lock);
+		BUG_ON(!list_empty(&lo->plh_layouts));
+		list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+		spin_unlock(&clp->cl_lock);
+	}
 	spin_unlock(&ino->i_lock);
 	spin_unlock(&ino->i_lock);
 
 
 	lseg = send_layoutget(lo, ctx, iomode);
 	lseg = send_layoutget(lo, ctx, iomode);
+	if (!lseg) {
+		spin_lock(&ino->i_lock);
+		if (list_empty(&lo->plh_segs)) {
+			spin_lock(&clp->cl_lock);
+			list_del_init(&lo->plh_layouts);
+			spin_unlock(&clp->cl_lock);
+			clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+		}
+		spin_unlock(&ino->i_lock);
+	}
+	atomic_dec(&lo->plh_outstanding);
+	put_layout_hdr(lo);
 out:
 out:
 	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
 	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
-		nfsi->layout->state, lseg);
+		nfsi->layout->plh_flags, lseg);
 	return lseg;
 	return lseg;
 out_unlock:
 out_unlock:
 	spin_unlock(&ino->i_lock);
 	spin_unlock(&ino->i_lock);
@@ -619,9 +768,21 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
 	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
 	struct nfs4_layoutget_res *res = &lgp->res;
 	struct nfs4_layoutget_res *res = &lgp->res;
 	struct pnfs_layout_segment *lseg;
 	struct pnfs_layout_segment *lseg;
-	struct inode *ino = lo->inode;
+	struct inode *ino = lo->plh_inode;
+	struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
 	int status = 0;
 	int status = 0;
 
 
+	/* Verify we got what we asked for.
+	 * Note that because the xdr parsing only accepts a single
+	 * element array, this can fail even if the server is behaving
+	 * correctly.
+	 */
+	if (lgp->args.range.iomode > res->range.iomode ||
+	    res->range.offset != 0 ||
+	    res->range.length != NFS4_MAX_UINT64) {
+		status = -EINVAL;
+		goto out;
+	}
 	/* Inject layout blob into I/O device driver */
 	/* Inject layout blob into I/O device driver */
 	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
 	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
 	if (!lseg || IS_ERR(lseg)) {
 	if (!lseg || IS_ERR(lseg)) {
@@ -635,16 +796,37 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 	}
 	}
 
 
 	spin_lock(&ino->i_lock);
 	spin_lock(&ino->i_lock);
+	if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
+	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+		dprintk("%s forget reply due to recall\n", __func__);
+		goto out_forget_reply;
+	}
+
+	if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
+		dprintk("%s forget reply due to state\n", __func__);
+		goto out_forget_reply;
+	}
 	init_lseg(lo, lseg);
 	init_lseg(lo, lseg);
-	lseg->range = res->range;
+	lseg->pls_range = res->range;
 	*lgp->lsegpp = lseg;
 	*lgp->lsegpp = lseg;
 	pnfs_insert_layout(lo, lseg);
 	pnfs_insert_layout(lo, lseg);
 
 
+	if (res->return_on_close) {
+		set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
+		set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
+	}
+
 	/* Done processing layoutget. Set the layout stateid */
 	/* Done processing layoutget. Set the layout stateid */
-	pnfs_set_layout_stateid(lo, &res->stateid);
+	pnfs_set_layout_stateid(lo, &res->stateid, false);
 	spin_unlock(&ino->i_lock);
 	spin_unlock(&ino->i_lock);
 out:
 out:
 	return status;
 	return status;
+
+out_forget_reply:
+	spin_unlock(&ino->i_lock);
+	lseg->pls_layout = lo;
+	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+	goto out;
 }
 }
 
 
 /*
 /*

+ 61 - 15
fs/nfs/pnfs.h

@@ -30,11 +30,17 @@
 #ifndef FS_NFS_PNFS_H
 #ifndef FS_NFS_PNFS_H
 #define FS_NFS_PNFS_H
 #define FS_NFS_PNFS_H
 
 
+enum {
+	NFS_LSEG_VALID = 0,	/* cleared when lseg is recalled/returned */
+	NFS_LSEG_ROC,		/* roc bit received from server */
+};
+
 struct pnfs_layout_segment {
 struct pnfs_layout_segment {
-	struct list_head fi_list;
-	struct pnfs_layout_range range;
-	struct kref kref;
-	struct pnfs_layout_hdr *layout;
+	struct list_head pls_list;
+	struct pnfs_layout_range pls_range;
+	atomic_t pls_refcount;
+	unsigned long pls_flags;
+	struct pnfs_layout_hdr *pls_layout;
 };
 };
 
 
 #ifdef CONFIG_NFS_V4_1
 #ifdef CONFIG_NFS_V4_1
@@ -44,7 +50,9 @@ struct pnfs_layout_segment {
 enum {
 enum {
 	NFS_LAYOUT_RO_FAILED = 0,	/* get ro layout failed stop trying */
 	NFS_LAYOUT_RO_FAILED = 0,	/* get ro layout failed stop trying */
 	NFS_LAYOUT_RW_FAILED,		/* get rw layout failed stop trying */
 	NFS_LAYOUT_RW_FAILED,		/* get rw layout failed stop trying */
-	NFS_LAYOUT_STATEID_SET,		/* have a valid layout stateid */
+	NFS_LAYOUT_BULK_RECALL,		/* bulk recall affecting layout */
+	NFS_LAYOUT_ROC,			/* some lseg had roc bit set */
+	NFS_LAYOUT_DESTROYED,		/* no new use of layout allowed */
 };
 };
 
 
 /* Per-layout driver specific registration structure */
 /* Per-layout driver specific registration structure */
@@ -60,13 +68,16 @@ struct pnfs_layoutdriver_type {
 };
 };
 
 
 struct pnfs_layout_hdr {
 struct pnfs_layout_hdr {
-	unsigned long		refcount;
-	struct list_head	layouts;   /* other client layouts */
-	struct list_head	segs;      /* layout segments list */
-	seqlock_t		seqlock;   /* Protects the stateid */
-	nfs4_stateid		stateid;
-	unsigned long		state;
-	struct inode		*inode;
+	atomic_t		plh_refcount;
+	struct list_head	plh_layouts;   /* other client layouts */
+	struct list_head	plh_bulk_recall; /* clnt list of bulk recalls */
+	struct list_head	plh_segs;      /* layout segments list */
+	nfs4_stateid		plh_stateid;
+	atomic_t		plh_outstanding; /* number of RPCs out */
+	unsigned long		plh_block_lgets; /* block LAYOUTGET if >0 */
+	u32			plh_barrier; /* ignore lower seqids */
+	unsigned long		plh_flags;
+	struct inode		*plh_inode;
 };
 };
 
 
 struct pnfs_device {
 struct pnfs_device {
@@ -134,17 +145,30 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
 extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
 extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
 
 
 /* pnfs.c */
 /* pnfs.c */
+void get_layout_hdr(struct pnfs_layout_hdr *lo);
 struct pnfs_layout_segment *
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
 		   enum pnfs_iomode access_type);
 		   enum pnfs_iomode access_type);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
+void pnfs_free_lseg_list(struct list_head *tmp_list);
 void pnfs_destroy_layout(struct nfs_inode *);
 void pnfs_destroy_layout(struct nfs_inode *);
 void pnfs_destroy_all_layouts(struct nfs_client *);
 void pnfs_destroy_all_layouts(struct nfs_client *);
-void put_layout_hdr(struct inode *inode);
-void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
-			     struct nfs4_state *open_state);
+void put_layout_hdr(struct pnfs_layout_hdr *lo);
+void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
+			     const nfs4_stateid *new,
+			     bool update_barrier);
+int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
+				  struct pnfs_layout_hdr *lo,
+				  struct nfs4_state *open_state);
+int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+				struct list_head *tmp_list,
+				u32 iomode);
+bool pnfs_roc(struct inode *ino);
+void pnfs_roc_release(struct inode *ino);
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
 
 
 
 
 static inline int lo_fail_bit(u32 iomode)
 static inline int lo_fail_bit(u32 iomode)
@@ -176,6 +200,28 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
 	return NULL;
 	return NULL;
 }
 }
 
 
+static inline bool
+pnfs_roc(struct inode *ino)
+{
+	return false;
+}
+
+static inline void
+pnfs_roc_release(struct inode *ino)
+{
+}
+
+static inline void
+pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
+{
+}
+
+static inline bool
+pnfs_roc_drain(struct inode *ino, u32 *barrier)
+{
+	return false;
+}
+
 static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
 static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
 {
 {
 }
 }

+ 3 - 2
fs/nfs/proc.c

@@ -458,7 +458,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
 	fattr = nfs_alloc_fattr();
 	fattr = nfs_alloc_fattr();
 	status = -ENOMEM;
 	status = -ENOMEM;
 	if (fh == NULL || fattr == NULL)
 	if (fh == NULL || fattr == NULL)
-		goto out;
+		goto out_free;
 
 
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	nfs_mark_for_revalidate(dir);
@@ -471,6 +471,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
 	if (status == 0)
 	if (status == 0)
 		status = nfs_instantiate(dentry, fh, fattr);
 		status = nfs_instantiate(dentry, fh, fattr);
 
 
+out_free:
 	nfs_free_fattr(fattr);
 	nfs_free_fattr(fattr);
 	nfs_free_fhandle(fh);
 	nfs_free_fhandle(fh);
 out:
 out:
@@ -731,7 +732,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.statfs		= nfs_proc_statfs,
 	.statfs		= nfs_proc_statfs,
 	.fsinfo		= nfs_proc_fsinfo,
 	.fsinfo		= nfs_proc_fsinfo,
 	.pathconf	= nfs_proc_pathconf,
 	.pathconf	= nfs_proc_pathconf,
-	.decode_dirent	= nfs_decode_dirent,
+	.decode_dirent	= nfs2_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
 	.read_setup	= nfs_proc_read_setup,
 	.read_done	= nfs_read_done,
 	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
 	.write_setup	= nfs_proc_write_setup,

+ 16 - 2
fs/nfs/super.c

@@ -598,7 +598,9 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
 
 
 	if (nfss->mountd_version || showdefaults)
 	if (nfss->mountd_version || showdefaults)
 		seq_printf(m, ",mountvers=%u", nfss->mountd_version);
 		seq_printf(m, ",mountvers=%u", nfss->mountd_version);
-	if (nfss->mountd_port || showdefaults)
+	if ((nfss->mountd_port &&
+		nfss->mountd_port != (unsigned short)NFS_UNSPEC_PORT) ||
+		showdefaults)
 		seq_printf(m, ",mountport=%u", nfss->mountd_port);
 		seq_printf(m, ",mountport=%u", nfss->mountd_port);
 
 
 	nfs_show_mountd_netid(m, nfss, showdefaults);
 	nfs_show_mountd_netid(m, nfss, showdefaults);
@@ -2494,7 +2496,13 @@ static void nfs4_clone_super(struct super_block *sb,
 	sb->s_maxbytes = old_sb->s_maxbytes;
 	sb->s_maxbytes = old_sb->s_maxbytes;
 	sb->s_time_gran = 1;
 	sb->s_time_gran = 1;
 	sb->s_op = old_sb->s_op;
 	sb->s_op = old_sb->s_op;
- 	nfs_initialise_sb(sb);
+	/*
+	 * The VFS shouldn't apply the umask to mode bits. We will do
+	 * so ourselves when necessary.
+	 */
+	sb->s_flags  |= MS_POSIXACL;
+	sb->s_xattr  = old_sb->s_xattr;
+	nfs_initialise_sb(sb);
 }
 }
 
 
 /*
 /*
@@ -2504,6 +2512,12 @@ static void nfs4_fill_super(struct super_block *sb)
 {
 {
 	sb->s_time_gran = 1;
 	sb->s_time_gran = 1;
 	sb->s_op = &nfs4_sops;
 	sb->s_op = &nfs4_sops;
+	/*
+	 * The VFS shouldn't apply the umask to mode bits. We will do
+	 * so ourselves when necessary.
+	 */
+	sb->s_flags  |= MS_POSIXACL;
+	sb->s_xattr = nfs4_xattr_handlers;
 	nfs_initialise_sb(sb);
 	nfs_initialise_sb(sb);
 }
 }
 
 

+ 1 - 1
fs/nfs/unlink.c

@@ -429,7 +429,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (data == NULL)
 	if (data == NULL)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
-	task_setup_data.callback_data = data,
+	task_setup_data.callback_data = data;
 
 
 	data->cred = rpc_lookup_cred();
 	data->cred = rpc_lookup_cred();
 	if (IS_ERR(data->cred)) {
 	if (IS_ERR(data->cred)) {

+ 424 - 266
fs/nfsd/nfs4callback.c

@@ -50,11 +50,6 @@ enum {
 	NFSPROC4_CLNT_CB_SEQUENCE,
 	NFSPROC4_CLNT_CB_SEQUENCE,
 };
 };
 
 
-enum nfs_cb_opnum4 {
-	OP_CB_RECALL            = 4,
-	OP_CB_SEQUENCE          = 11,
-};
-
 #define NFS4_MAXTAGLEN		20
 #define NFS4_MAXTAGLEN		20
 
 
 #define NFS4_enc_cb_null_sz		0
 #define NFS4_enc_cb_null_sz		0
@@ -79,61 +74,6 @@ enum nfs_cb_opnum4 {
 					cb_sequence_dec_sz +            \
 					cb_sequence_dec_sz +            \
 					op_dec_sz)
 					op_dec_sz)
 
 
-/*
-* Generic encode routines from fs/nfs/nfs4xdr.c
-*/
-static inline __be32 *
-xdr_writemem(__be32 *p, const void *ptr, int nbytes)
-{
-	int tmp = XDR_QUADLEN(nbytes);
-	if (!tmp)
-		return p;
-	p[tmp-1] = 0;
-	memcpy(p, ptr, nbytes);
-	return p + tmp;
-}
-
-#define WRITE32(n)               *p++ = htonl(n)
-#define WRITEMEM(ptr,nbytes)     do {                           \
-	p = xdr_writemem(p, ptr, nbytes);                       \
-} while (0)
-#define RESERVE_SPACE(nbytes)   do {                            \
-	p = xdr_reserve_space(xdr, nbytes);                     \
-	if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
-	BUG_ON(!p);                                             \
-} while (0)
-
-/*
- * Generic decode routines from fs/nfs/nfs4xdr.c
- */
-#define DECODE_TAIL                             \
-	status = 0;                             \
-out:                                            \
-	return status;                          \
-xdr_error:                                      \
-	dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \
-	status = -EIO;                          \
-	goto out
-
-#define READ32(x)         (x) = ntohl(*p++)
-#define READ64(x)         do {                  \
-	(x) = (u64)ntohl(*p++) << 32;           \
-	(x) |= ntohl(*p++);                     \
-} while (0)
-#define READTIME(x)       do {                  \
-	p++;                                    \
-	(x.tv_sec) = ntohl(*p++);               \
-	(x.tv_nsec) = ntohl(*p++);              \
-} while (0)
-#define READ_BUF(nbytes)  do { \
-	p = xdr_inline_decode(xdr, nbytes); \
-	if (!p) { \
-		dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
-			__func__, __LINE__); \
-		return -EIO; \
-	} \
-} while (0)
-
 struct nfs4_cb_compound_hdr {
 struct nfs4_cb_compound_hdr {
 	/* args */
 	/* args */
 	u32		ident;	/* minorversion 0 only */
 	u32		ident;	/* minorversion 0 only */
@@ -144,295 +84,513 @@ struct nfs4_cb_compound_hdr {
 	int		status;
 	int		status;
 };
 };
 
 
-static struct {
-int stat;
-int errno;
-} nfs_cb_errtbl[] = {
-	{ NFS4_OK,		0               },
-	{ NFS4ERR_PERM,		EPERM           },
-	{ NFS4ERR_NOENT,	ENOENT          },
-	{ NFS4ERR_IO,		EIO             },
-	{ NFS4ERR_NXIO,		ENXIO           },
-	{ NFS4ERR_ACCESS,	EACCES          },
-	{ NFS4ERR_EXIST,	EEXIST          },
-	{ NFS4ERR_XDEV,		EXDEV           },
-	{ NFS4ERR_NOTDIR,	ENOTDIR         },
-	{ NFS4ERR_ISDIR,	EISDIR          },
-	{ NFS4ERR_INVAL,	EINVAL          },
-	{ NFS4ERR_FBIG,		EFBIG           },
-	{ NFS4ERR_NOSPC,	ENOSPC          },
-	{ NFS4ERR_ROFS,		EROFS           },
-	{ NFS4ERR_MLINK,	EMLINK          },
-	{ NFS4ERR_NAMETOOLONG,	ENAMETOOLONG    },
-	{ NFS4ERR_NOTEMPTY,	ENOTEMPTY       },
-	{ NFS4ERR_DQUOT,	EDQUOT          },
-	{ NFS4ERR_STALE,	ESTALE          },
-	{ NFS4ERR_BADHANDLE,	EBADHANDLE      },
-	{ NFS4ERR_BAD_COOKIE,	EBADCOOKIE      },
-	{ NFS4ERR_NOTSUPP,	ENOTSUPP        },
-	{ NFS4ERR_TOOSMALL,	ETOOSMALL       },
-	{ NFS4ERR_SERVERFAULT,	ESERVERFAULT    },
-	{ NFS4ERR_BADTYPE,	EBADTYPE        },
-	{ NFS4ERR_LOCKED,	EAGAIN          },
-	{ NFS4ERR_RESOURCE,	EREMOTEIO       },
-	{ NFS4ERR_SYMLINK,	ELOOP           },
-	{ NFS4ERR_OP_ILLEGAL,	EOPNOTSUPP      },
-	{ NFS4ERR_DEADLOCK,	EDEADLK         },
-	{ -1,                   EIO             }
-};
+/*
+ * Handle decode buffer overflows out-of-line.
+ */
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+	dprintk("NFS: %s prematurely hit the end of our receive buffer. "
+		"Remaining buffer length is %tu words.\n",
+		func, xdr->end - xdr->p);
+}
 
 
-static int
-nfs_cb_stat_to_errno(int stat)
+static __be32 *xdr_encode_empty_array(__be32 *p)
 {
 {
-	int i;
-	for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
-		if (nfs_cb_errtbl[i].stat == stat)
-			return nfs_cb_errtbl[i].errno;
-	}
-	/* If we cannot translate the error, the recovery routines should
-	* handle it.
-	* Note: remaining NFSv4 error codes have values > 10000, so should
-	* not conflict with native Linux error codes.
-	*/
-	return stat;
+	*p++ = xdr_zero;
+	return p;
 }
 }
 
 
 /*
 /*
- * XDR encode
+ * Encode/decode NFSv4 CB basic data types
+ *
+ * Basic NFSv4 callback data types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section
+ * 20 of RFC 5661: "Network File System (NFS) Version 4 Minor Version
+ * 1 Protocol"
+ */
+
+/*
+ *	nfs_cb_opnum4
+ *
+ *	enum nfs_cb_opnum4 {
+ *		OP_CB_GETATTR		= 3,
+ *		  ...
+ *	};
  */
  */
+enum nfs_cb_opnum4 {
+	OP_CB_GETATTR			= 3,
+	OP_CB_RECALL			= 4,
+	OP_CB_LAYOUTRECALL		= 5,
+	OP_CB_NOTIFY			= 6,
+	OP_CB_PUSH_DELEG		= 7,
+	OP_CB_RECALL_ANY		= 8,
+	OP_CB_RECALLABLE_OBJ_AVAIL	= 9,
+	OP_CB_RECALL_SLOT		= 10,
+	OP_CB_SEQUENCE			= 11,
+	OP_CB_WANTS_CANCELLED		= 12,
+	OP_CB_NOTIFY_LOCK		= 13,
+	OP_CB_NOTIFY_DEVICEID		= 14,
+	OP_CB_ILLEGAL			= 10044
+};
 
 
-static void
-encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
+static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
 {
 {
 	__be32 *p;
 	__be32 *p;
 
 
-	RESERVE_SPACE(sizeof(stateid_t));
-	WRITE32(sid->si_generation);
-	WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
+	p = xdr_reserve_space(xdr, 4);
+	*p = cpu_to_be32(op);
 }
 }
 
 
-static void
-encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+/*
+ * nfs_fh4
+ *
+ *	typedef opaque nfs_fh4<NFS4_FHSIZE>;
+ */
+static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
 {
 {
-	__be32 * p;
+	u32 length = fh->fh_size;
+	__be32 *p;
 
 
-	RESERVE_SPACE(16);
-	WRITE32(0);            /* tag length is always 0 */
-	WRITE32(hdr->minorversion);
-	WRITE32(hdr->ident);
-	hdr->nops_p = p;
-	WRITE32(hdr->nops);
+	BUG_ON(length > NFS4_FHSIZE);
+	p = xdr_reserve_space(xdr, 4 + length);
+	xdr_encode_opaque(p, &fh->fh_base, length);
 }
 }
 
 
-static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
+/*
+ * stateid4
+ *
+ *	struct stateid4 {
+ *		uint32_t	seqid;
+ *		opaque		other[12];
+ *	};
+ */
+static void encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid)
 {
 {
-	*hdr->nops_p = htonl(hdr->nops);
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE);
+	*p++ = cpu_to_be32(sid->si_generation);
+	xdr_encode_opaque_fixed(p, &sid->si_opaque, NFS4_STATEID_OTHER_SIZE);
 }
 }
 
 
-static void
-encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
-		struct nfs4_cb_compound_hdr *hdr)
+/*
+ * sessionid4
+ *
+ *	typedef opaque sessionid4[NFS4_SESSIONID_SIZE];
+ */
+static void encode_sessionid4(struct xdr_stream *xdr,
+			      const struct nfsd4_session *session)
 {
 {
 	__be32 *p;
 	__be32 *p;
-	int len = dp->dl_fh.fh_size;
-
-	RESERVE_SPACE(4);
-	WRITE32(OP_CB_RECALL);
-	encode_stateid(xdr, &dp->dl_stateid);
-	RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
-	WRITE32(0); /* truncate optimization not implemented */
-	WRITE32(len);
-	WRITEMEM(&dp->dl_fh.fh_base, len);
-	hdr->nops++;
+
+	p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
+	xdr_encode_opaque_fixed(p, session->se_sessionid.data,
+					NFS4_MAX_SESSIONID_LEN);
 }
 }
 
 
-static void
-encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
-		   struct nfs4_cb_compound_hdr *hdr)
-{
-	__be32 *p;
-	struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
+/*
+ * nfsstat4
+ */
+static const struct {
+	int stat;
+	int errno;
+} nfs_cb_errtbl[] = {
+	{ NFS4_OK,		0		},
+	{ NFS4ERR_PERM,		-EPERM		},
+	{ NFS4ERR_NOENT,	-ENOENT		},
+	{ NFS4ERR_IO,		-EIO		},
+	{ NFS4ERR_NXIO,		-ENXIO		},
+	{ NFS4ERR_ACCESS,	-EACCES		},
+	{ NFS4ERR_EXIST,	-EEXIST		},
+	{ NFS4ERR_XDEV,		-EXDEV		},
+	{ NFS4ERR_NOTDIR,	-ENOTDIR	},
+	{ NFS4ERR_ISDIR,	-EISDIR		},
+	{ NFS4ERR_INVAL,	-EINVAL		},
+	{ NFS4ERR_FBIG,		-EFBIG		},
+	{ NFS4ERR_NOSPC,	-ENOSPC		},
+	{ NFS4ERR_ROFS,		-EROFS		},
+	{ NFS4ERR_MLINK,	-EMLINK		},
+	{ NFS4ERR_NAMETOOLONG,	-ENAMETOOLONG	},
+	{ NFS4ERR_NOTEMPTY,	-ENOTEMPTY	},
+	{ NFS4ERR_DQUOT,	-EDQUOT		},
+	{ NFS4ERR_STALE,	-ESTALE		},
+	{ NFS4ERR_BADHANDLE,	-EBADHANDLE	},
+	{ NFS4ERR_BAD_COOKIE,	-EBADCOOKIE	},
+	{ NFS4ERR_NOTSUPP,	-ENOTSUPP	},
+	{ NFS4ERR_TOOSMALL,	-ETOOSMALL	},
+	{ NFS4ERR_SERVERFAULT,	-ESERVERFAULT	},
+	{ NFS4ERR_BADTYPE,	-EBADTYPE	},
+	{ NFS4ERR_LOCKED,	-EAGAIN		},
+	{ NFS4ERR_RESOURCE,	-EREMOTEIO	},
+	{ NFS4ERR_SYMLINK,	-ELOOP		},
+	{ NFS4ERR_OP_ILLEGAL,	-EOPNOTSUPP	},
+	{ NFS4ERR_DEADLOCK,	-EDEADLK	},
+	{ -1,			-EIO		}
+};
 
 
-	if (hdr->minorversion == 0)
-		return;
+/*
+ * If we cannot translate the error, the recovery routines should
+ * handle it.
+ *
+ * Note: remaining NFSv4 error codes have values > 10000, so should
+ * not conflict with native Linux error codes.
+ */
+static int nfs_cb_stat_to_errno(int status)
+{
+	int i;
 
 
-	RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
+	for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
+		if (nfs_cb_errtbl[i].stat == status)
+			return nfs_cb_errtbl[i].errno;
+	}
 
 
-	WRITE32(OP_CB_SEQUENCE);
-	WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN);
-	WRITE32(ses->se_cb_seq_nr);
-	WRITE32(0);		/* slotid, always 0 */
-	WRITE32(0);		/* highest slotid always 0 */
-	WRITE32(0);		/* cachethis always 0 */
-	WRITE32(0); /* FIXME: support referring_call_lists */
-	hdr->nops++;
+	dprintk("NFSD: Unrecognized NFS CB status value: %u\n", status);
+	return -status;
 }
 }
 
 
-static int
-nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
+static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
+			       enum nfsstat4 *status)
 {
 {
-	struct xdr_stream xdrs, *xdr = &xdrs;
+	__be32 *p;
+	u32 op;
 
 
-	xdr_init_encode(&xdrs, &req->rq_snd_buf, p);
-        RESERVE_SPACE(0);
+	p = xdr_inline_decode(xdr, 4 + 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	op = be32_to_cpup(p++);
+	if (unlikely(op != expected))
+		goto out_unexpected;
+	*status = be32_to_cpup(p);
 	return 0;
 	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+out_unexpected:
+	dprintk("NFSD: Callback server returned operation %d but "
+		"we issued a request for %d\n", op, expected);
+	return -EIO;
 }
 }
 
 
-static int
-nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
-		struct nfsd4_callback *cb)
+/*
+ * CB_COMPOUND4args
+ *
+ *	struct CB_COMPOUND4args {
+ *		utf8str_cs	tag;
+ *		uint32_t	minorversion;
+ *		uint32_t	callback_ident;
+ *		nfs_cb_argop4	argarray<>;
+ *	};
+*/
+static void encode_cb_compound4args(struct xdr_stream *xdr,
+				    struct nfs4_cb_compound_hdr *hdr)
 {
 {
-	struct xdr_stream xdr;
-	struct nfs4_delegation *args = cb->cb_op;
-	struct nfs4_cb_compound_hdr hdr = {
-		.ident = cb->cb_clp->cl_cb_ident,
-		.minorversion = cb->cb_minorversion,
-	};
+	__be32 * p;
 
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cb_compound_hdr(&xdr, &hdr);
-	encode_cb_sequence(&xdr, cb, &hdr);
-	encode_cb_recall(&xdr, args, &hdr);
-	encode_cb_nops(&hdr);
+	p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
+	p = xdr_encode_empty_array(p);		/* empty tag */
+	*p++ = cpu_to_be32(hdr->minorversion);
+	*p++ = cpu_to_be32(hdr->ident);
+
+	hdr->nops_p = p;
+	*p = cpu_to_be32(hdr->nops);		/* argarray element count */
+}
+
+/*
+ * Update argarray element count
+ */
+static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
+{
+	BUG_ON(hdr->nops > NFS4_MAX_BACK_CHANNEL_OPS);
+	*hdr->nops_p = cpu_to_be32(hdr->nops);
+}
+
+/*
+ * CB_COMPOUND4res
+ *
+ *	struct CB_COMPOUND4res {
+ *		nfsstat4	status;
+ *		utf8str_cs	tag;
+ *		nfs_cb_resop4	resarray<>;
+ *	};
+ */
+static int decode_cb_compound4res(struct xdr_stream *xdr,
+				  struct nfs4_cb_compound_hdr *hdr)
+{
+	u32 length;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4 + 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	hdr->status = be32_to_cpup(p++);
+	/* Ignore the tag */
+	length = be32_to_cpup(p++);
+	p = xdr_inline_decode(xdr, length + 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
+	hdr->nops = be32_to_cpup(p);
 	return 0;
 	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
 }
 }
 
 
+/*
+ * CB_RECALL4args
+ *
+ *	struct CB_RECALL4args {
+ *		stateid4	stateid;
+ *		bool		truncate;
+ *		nfs_fh4		fh;
+ *	};
+ */
+static void encode_cb_recall4args(struct xdr_stream *xdr,
+				  const struct nfs4_delegation *dp,
+				  struct nfs4_cb_compound_hdr *hdr)
+{
+	__be32 *p;
+
+	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL);
+	encode_stateid4(xdr, &dp->dl_stateid);
+
+	p = xdr_reserve_space(xdr, 4);
+	*p++ = xdr_zero;			/* truncate */
 
 
-static int
-decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
-        __be32 *p;
-	u32 taglen;
+	encode_nfs_fh4(xdr, &dp->dl_fh);
 
 
-        READ_BUF(8);
-        READ32(hdr->status);
-	/* We've got no use for the tag; ignore it: */
-        READ32(taglen);
-        READ_BUF(taglen + 4);
-        p += XDR_QUADLEN(taglen);
-        READ32(hdr->nops);
-        return 0;
+	hdr->nops++;
 }
 }
 
 
-static int
-decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+/*
+ * CB_SEQUENCE4args
+ *
+ *	struct CB_SEQUENCE4args {
+ *		sessionid4		csa_sessionid;
+ *		sequenceid4		csa_sequenceid;
+ *		slotid4			csa_slotid;
+ *		slotid4			csa_highest_slotid;
+ *		bool			csa_cachethis;
+ *		referring_call_list4	csa_referring_call_lists<>;
+ *	};
+ */
+static void encode_cb_sequence4args(struct xdr_stream *xdr,
+				    const struct nfsd4_callback *cb,
+				    struct nfs4_cb_compound_hdr *hdr)
 {
 {
+	struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
 	__be32 *p;
 	__be32 *p;
-	u32 op;
-	int32_t nfserr;
-
-	READ_BUF(8);
-	READ32(op);
-	if (op != expected) {
-		dprintk("NFSD: decode_cb_op_hdr: Callback server returned "
-		         " operation %d but we issued a request for %d\n",
-		         op, expected);
-		return -EIO;
-	}
-	READ32(nfserr);
-	if (nfserr != NFS_OK)
-		return -nfs_cb_stat_to_errno(nfserr);
-	return 0;
+
+	if (hdr->minorversion == 0)
+		return;
+
+	encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
+	encode_sessionid4(xdr, session);
+
+	p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
+	*p++ = cpu_to_be32(session->se_cb_seq_nr);	/* csa_sequenceid */
+	*p++ = xdr_zero;			/* csa_slotid */
+	*p++ = xdr_zero;			/* csa_highest_slotid */
+	*p++ = xdr_zero;			/* csa_cachethis */
+	xdr_encode_empty_array(p);		/* csa_referring_call_lists */
+
+	hdr->nops++;
 }
 }
 
 
 /*
 /*
+ * CB_SEQUENCE4resok
+ *
+ *	struct CB_SEQUENCE4resok {
+ *		sessionid4	csr_sessionid;
+ *		sequenceid4	csr_sequenceid;
+ *		slotid4		csr_slotid;
+ *		slotid4		csr_highest_slotid;
+ *		slotid4		csr_target_highest_slotid;
+ *	};
+ *
+ *	union CB_SEQUENCE4res switch (nfsstat4 csr_status) {
+ *	case NFS4_OK:
+ *		CB_SEQUENCE4resok	csr_resok4;
+ *	default:
+ *		void;
+ *	};
+ *
  * Our current back channel implmentation supports a single backchannel
  * Our current back channel implmentation supports a single backchannel
  * with a single slot.
  * with a single slot.
  */
  */
-static int
-decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
-		   struct rpc_rqst *rqstp)
+static int decode_cb_sequence4resok(struct xdr_stream *xdr,
+				    struct nfsd4_callback *cb)
 {
 {
-	struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
+	struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
 	struct nfs4_sessionid id;
 	struct nfs4_sessionid id;
 	int status;
 	int status;
-	u32 dummy;
 	__be32 *p;
 	__be32 *p;
+	u32 dummy;
 
 
-	if (cb->cb_minorversion == 0)
-		return 0;
-
-	status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
-	if (status)
-		return status;
+	status = -ESERVERFAULT;
 
 
 	/*
 	/*
 	 * If the server returns different values for sessionID, slotID or
 	 * If the server returns different values for sessionID, slotID or
 	 * sequence number, the server is looney tunes.
 	 * sequence number, the server is looney tunes.
 	 */
 	 */
-	status = -ESERVERFAULT;
-
-	READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
+	p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4);
+	if (unlikely(p == NULL))
+		goto out_overflow;
 	memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
 	memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
-	p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
-	if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
-		dprintk("%s Invalid session id\n", __func__);
+	if (memcmp(id.data, session->se_sessionid.data,
+					NFS4_MAX_SESSIONID_LEN) != 0) {
+		dprintk("NFS: %s Invalid session id\n", __func__);
 		goto out;
 		goto out;
 	}
 	}
-	READ32(dummy);
-	if (dummy != ses->se_cb_seq_nr) {
-		dprintk("%s Invalid sequence number\n", __func__);
+	p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
+
+	dummy = be32_to_cpup(p++);
+	if (dummy != session->se_cb_seq_nr) {
+		dprintk("NFS: %s Invalid sequence number\n", __func__);
 		goto out;
 		goto out;
 	}
 	}
-	READ32(dummy); 	/* slotid must be 0 */
+
+	dummy = be32_to_cpup(p++);
 	if (dummy != 0) {
 	if (dummy != 0) {
-		dprintk("%s Invalid slotid\n", __func__);
+		dprintk("NFS: %s Invalid slotid\n", __func__);
 		goto out;
 		goto out;
 	}
 	}
-	/* FIXME: process highest slotid and target highest slotid */
+
+	/*
+	 * FIXME: process highest slotid and target highest slotid
+	 */
 	status = 0;
 	status = 0;
 out:
 out:
 	return status;
 	return status;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
 }
 }
 
 
+static int decode_cb_sequence4res(struct xdr_stream *xdr,
+				  struct nfsd4_callback *cb)
+{
+	enum nfsstat4 nfserr;
+	int status;
+
+	if (cb->cb_minorversion == 0)
+		return 0;
+
+	status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr);
+	if (unlikely(status))
+		goto out;
+	if (unlikely(nfserr != NFS4_OK))
+		goto out_default;
+	status = decode_cb_sequence4resok(xdr, cb);
+out:
+	return status;
+out_default:
+	return nfs_cb_stat_to_errno(status);
+}
 
 
-static int
-nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
+/*
+ * NFSv4.0 and NFSv4.1 XDR encode functions
+ *
+ * NFSv4.0 callback argument types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section 20
+ * of RFC 5661:  "Network File System (NFS) Version 4 Minor Version 1
+ * Protocol".
+ */
+
+/*
+ * NB: Without this zero space reservation, callbacks over krb5p fail
+ */
+static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 void *__unused)
+{
+	xdr_reserve_space(xdr, 0);
+}
+
+/*
+ * 20.2. Operation 4: CB_RECALL - Recall a Delegation
+ */
+static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
+				   const struct nfsd4_callback *cb)
+{
+	const struct nfs4_delegation *args = cb->cb_op;
+	struct nfs4_cb_compound_hdr hdr = {
+		.ident = cb->cb_clp->cl_cb_ident,
+		.minorversion = cb->cb_minorversion,
+	};
+
+	encode_cb_compound4args(xdr, &hdr);
+	encode_cb_sequence4args(xdr, cb, &hdr);
+	encode_cb_recall4args(xdr, args, &hdr);
+	encode_cb_nops(&hdr);
+}
+
+
+/*
+ * NFSv4.0 and NFSv4.1 XDR decode functions
+ *
+ * NFSv4.0 callback result types are defined in section 15 of RFC
+ * 3530: "Network File System (NFS) version 4 Protocol" and section 20
+ * of RFC 5661:  "Network File System (NFS) Version 4 Minor Version 1
+ * Protocol".
+ */
+
+static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+				void *__unused)
 {
 {
 	return 0;
 	return 0;
 }
 }
 
 
-static int
-nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
-		struct nfsd4_callback *cb)
+/*
+ * 20.2. Operation 4: CB_RECALL - Recall a Delegation
+ */
+static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
+				  struct xdr_stream *xdr,
+				  struct nfsd4_callback *cb)
 {
 {
-	struct xdr_stream xdr;
 	struct nfs4_cb_compound_hdr hdr;
 	struct nfs4_cb_compound_hdr hdr;
+	enum nfsstat4 nfserr;
 	int status;
 	int status;
 
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_cb_compound_hdr(&xdr, &hdr);
-	if (status)
+	status = decode_cb_compound4res(xdr, &hdr);
+	if (unlikely(status))
 		goto out;
 		goto out;
-	if (cb) {
-		status = decode_cb_sequence(&xdr, cb, rqstp);
-		if (status)
+
+	if (cb != NULL) {
+		status = decode_cb_sequence4res(xdr, cb);
+		if (unlikely(status))
 			goto out;
 			goto out;
 	}
 	}
-	status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
+
+	status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
+	if (unlikely(status))
+		goto out;
+	if (unlikely(nfserr != NFS4_OK))
+		goto out_default;
 out:
 out:
 	return status;
 	return status;
+out_default:
+	return nfs_cb_stat_to_errno(status);
 }
 }
 
 
 /*
 /*
  * RPC procedure tables
  * RPC procedure tables
  */
  */
-#define PROC(proc, call, argtype, restype)                              \
-[NFSPROC4_CLNT_##proc] = {                                      	\
-        .p_proc   = NFSPROC4_CB_##call,					\
-        .p_encode = (kxdrproc_t) nfs4_xdr_##argtype,                    \
-        .p_decode = (kxdrproc_t) nfs4_xdr_##restype,                    \
-        .p_arglen = NFS4_##argtype##_sz,                                \
-        .p_replen = NFS4_##restype##_sz,                                \
-        .p_statidx = NFSPROC4_CB_##call,				\
-	.p_name   = #proc,                                              \
-}
-
-static struct rpc_procinfo     nfs4_cb_procedures[] = {
-    PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
-    PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
+#define PROC(proc, call, argtype, restype)				\
+[NFSPROC4_CLNT_##proc] = {						\
+	.p_proc    = NFSPROC4_CB_##call,				\
+	.p_encode  = (kxdreproc_t)nfs4_xdr_enc_##argtype,		\
+	.p_decode  = (kxdrdproc_t)nfs4_xdr_dec_##restype,		\
+	.p_arglen  = NFS4_enc_##argtype##_sz,				\
+	.p_replen  = NFS4_dec_##restype##_sz,				\
+	.p_statidx = NFSPROC4_CB_##call,				\
+	.p_name    = #proc,						\
+}
+
+static struct rpc_procinfo nfs4_cb_procedures[] = {
+	PROC(CB_NULL,	NULL,		cb_null,	cb_null),
+	PROC(CB_RECALL,	COMPOUND,	cb_recall,	cb_recall),
 };
 };
 
 
-static struct rpc_version       nfs_cb_version4 = {
+static struct rpc_version nfs_cb_version4 = {
 /*
 /*
  * Note on the callback rpc program version number: despite language in rfc
  * Note on the callback rpc program version number: despite language in rfc
  * 5661 section 18.36.3 requiring servers to use 4 in this field, the
  * 5661 section 18.36.3 requiring servers to use 4 in this field, the
@@ -440,29 +598,29 @@ static struct rpc_version       nfs_cb_version4 = {
  * in practice that appears to be what implementations use.  The section
  * in practice that appears to be what implementations use.  The section
  * 18.36.3 language is expected to be fixed in an erratum.
  * 18.36.3 language is expected to be fixed in an erratum.
  */
  */
-        .number                 = 1,
-        .nrprocs                = ARRAY_SIZE(nfs4_cb_procedures),
-        .procs                  = nfs4_cb_procedures
+	.number			= 1,
+	.nrprocs		= ARRAY_SIZE(nfs4_cb_procedures),
+	.procs			= nfs4_cb_procedures
 };
 };
 
 
-static struct rpc_version *	nfs_cb_version[] = {
+static struct rpc_version *nfs_cb_version[] = {
 	&nfs_cb_version4,
 	&nfs_cb_version4,
 };
 };
 
 
 static struct rpc_program cb_program;
 static struct rpc_program cb_program;
 
 
 static struct rpc_stat cb_stats = {
 static struct rpc_stat cb_stats = {
-		.program	= &cb_program
+	.program		= &cb_program
 };
 };
 
 
 #define NFS4_CALLBACK 0x40000000
 #define NFS4_CALLBACK 0x40000000
 static struct rpc_program cb_program = {
 static struct rpc_program cb_program = {
-		.name 		= "nfs4_cb",
-		.number		= NFS4_CALLBACK,
-		.nrvers		= ARRAY_SIZE(nfs_cb_version),
-		.version	= nfs_cb_version,
-		.stats		= &cb_stats,
-		.pipe_dir_name  = "/nfsd4_cb",
+	.name			= "nfs4_cb",
+	.number			= NFS4_CALLBACK,
+	.nrvers			= ARRAY_SIZE(nfs_cb_version),
+	.version		= nfs_cb_version,
+	.stats			= &cb_stats,
+	.pipe_dir_name		= "/nfsd4_cb",
 };
 };
 
 
 static int max_cb_time(void)
 static int max_cb_time(void)

+ 0 - 10
include/linux/lockd/debug.h

@@ -44,14 +44,4 @@
 #define NLMDBG_XDR		0x0100
 #define NLMDBG_XDR		0x0100
 #define NLMDBG_ALL		0x7fff
 #define NLMDBG_ALL		0x7fff
 
 
-
-/*
- * Support for printing NLM cookies in dprintk()
- */
-#ifdef RPC_DEBUG
-struct nlm_cookie;
-/* Call this function with the BKL held (it uses a static buffer) */
-extern const char *nlmdbg_cookie2a(const struct nlm_cookie *);
-#endif
-
 #endif /* LINUX_LOCKD_DEBUG_H */
 #endif /* LINUX_LOCKD_DEBUG_H */

+ 4 - 2
include/linux/lockd/lockd.h

@@ -202,9 +202,9 @@ extern u32			nsm_local_state;
  * Lockd client functions
  * Lockd client functions
  */
  */
 struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
 struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
-void		  nlm_release_call(struct nlm_rqst *);
 int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+void		  nlmclnt_release_call(struct nlm_rqst *);
 struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
 struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
 void		  nlmclnt_finish_block(struct nlm_wait *block);
 void		  nlmclnt_finish_block(struct nlm_wait *block);
 int		  nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
 int		  nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
@@ -223,13 +223,14 @@ struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
 					const u32 version,
 					const u32 version,
 					const char *hostname,
 					const char *hostname,
 					int noresvport);
 					int noresvport);
+void		  nlmclnt_release_host(struct nlm_host *);
 struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 					const char *hostname,
 					const char *hostname,
 					const size_t hostname_len);
 					const size_t hostname_len);
+void		  nlmsvc_release_host(struct nlm_host *);
 struct rpc_clnt * nlm_bind_host(struct nlm_host *);
 struct rpc_clnt * nlm_bind_host(struct nlm_host *);
 void		  nlm_rebind_host(struct nlm_host *);
 void		  nlm_rebind_host(struct nlm_host *);
 struct nlm_host * nlm_get_host(struct nlm_host *);
 struct nlm_host * nlm_get_host(struct nlm_host *);
-void		  nlm_release_host(struct nlm_host *);
 void		  nlm_shutdown_hosts(void);
 void		  nlm_shutdown_hosts(void);
 void		  nlm_host_rebooted(const struct nlm_reboot *);
 void		  nlm_host_rebooted(const struct nlm_reboot *);
 
 
@@ -267,6 +268,7 @@ unsigned long	  nlmsvc_retry_blocked(void);
 void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
 void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
 					nlm_host_match_fn_t match);
 					nlm_host_match_fn_t match);
 void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
 void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
+void		  nlmsvc_release_call(struct nlm_rqst *);
 
 
 /*
 /*
  * File handling for the server personality
  * File handling for the server personality

+ 3 - 0
include/linux/nfs3.h

@@ -11,6 +11,9 @@
 #define NFS3_MAXGROUPS		16
 #define NFS3_MAXGROUPS		16
 #define NFS3_FHSIZE		64
 #define NFS3_FHSIZE		64
 #define NFS3_COOKIESIZE		4
 #define NFS3_COOKIESIZE		4
+#define NFS3_CREATEVERFSIZE	8
+#define NFS3_COOKIEVERFSIZE	8
+#define NFS3_WRITEVERFSIZE	8
 #define NFS3_FIFO_DEV		(-1)
 #define NFS3_FIFO_DEV		(-1)
 #define NFS3MODE_FMT		0170000
 #define NFS3MODE_FMT		0170000
 #define NFS3MODE_DIR		0040000
 #define NFS3MODE_DIR		0040000

+ 6 - 2
include/linux/nfs4.h

@@ -111,9 +111,13 @@
 
 
 #define EXCHGID4_FLAG_SUPP_MOVED_REFER		0x00000001
 #define EXCHGID4_FLAG_SUPP_MOVED_REFER		0x00000001
 #define EXCHGID4_FLAG_SUPP_MOVED_MIGR		0x00000002
 #define EXCHGID4_FLAG_SUPP_MOVED_MIGR		0x00000002
+#define EXCHGID4_FLAG_BIND_PRINC_STATEID	0x00000100
+
 #define EXCHGID4_FLAG_USE_NON_PNFS		0x00010000
 #define EXCHGID4_FLAG_USE_NON_PNFS		0x00010000
 #define EXCHGID4_FLAG_USE_PNFS_MDS		0x00020000
 #define EXCHGID4_FLAG_USE_PNFS_MDS		0x00020000
 #define EXCHGID4_FLAG_USE_PNFS_DS		0x00040000
 #define EXCHGID4_FLAG_USE_PNFS_DS		0x00040000
+#define EXCHGID4_FLAG_MASK_PNFS			0x00070000
+
 #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A	0x40000000
 #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A	0x40000000
 #define EXCHGID4_FLAG_CONFIRMED_R		0x80000000
 #define EXCHGID4_FLAG_CONFIRMED_R		0x80000000
 /*
 /*
@@ -121,8 +125,8 @@
  * they're set in the argument or response, have separate
  * they're set in the argument or response, have separate
  * invalid flag masks for arg (_A) and resp (_R).
  * invalid flag masks for arg (_A) and resp (_R).
  */
  */
-#define EXCHGID4_FLAG_MASK_A			0x40070003
-#define EXCHGID4_FLAG_MASK_R			0x80070003
+#define EXCHGID4_FLAG_MASK_A			0x40070103
+#define EXCHGID4_FLAG_MASK_R			0x80070103
 
 
 #define SEQ4_STATUS_CB_PATH_DOWN		0x00000001
 #define SEQ4_STATUS_CB_PATH_DOWN		0x00000001
 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING	0x00000002
 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING	0x00000002

+ 9 - 6
include/linux/nfs_fs_sb.h

@@ -47,11 +47,6 @@ struct nfs_client {
 	u64			cl_clientid;	/* constant */
 	u64			cl_clientid;	/* constant */
 	unsigned long		cl_state;
 	unsigned long		cl_state;
 
 
-	struct rb_root		cl_openowner_id;
-	struct rb_root		cl_lockowner_id;
-
-	struct list_head	cl_delegations;
-	struct rb_root		cl_state_owners;
 	spinlock_t		cl_lock;
 	spinlock_t		cl_lock;
 
 
 	unsigned long		cl_lease_time;
 	unsigned long		cl_lease_time;
@@ -71,6 +66,7 @@ struct nfs_client {
 	 */
 	 */
 	char			cl_ipaddr[48];
 	char			cl_ipaddr[48];
 	unsigned char		cl_id_uniquifier;
 	unsigned char		cl_id_uniquifier;
+	u32			cl_cb_ident;	/* v4.0 callback identifier */
 	const struct nfs4_minor_version_ops *cl_mvops;
 	const struct nfs4_minor_version_ops *cl_mvops;
 #endif /* CONFIG_NFS_V4 */
 #endif /* CONFIG_NFS_V4 */
 
 
@@ -148,7 +144,14 @@ struct nfs_server {
 						   that are supported on this
 						   that are supported on this
 						   filesystem */
 						   filesystem */
 	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
 	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
+	struct rpc_wait_queue	roc_rpcwaitq;
+
+	/* the following fields are protected by nfs_client->cl_lock */
+	struct rb_root		state_owners;
+	struct rb_root		openowner_id;
+	struct rb_root		lockowner_id;
 #endif
 #endif
+	struct list_head	delegations;
 	void (*destroy)(struct nfs_server *);
 	void (*destroy)(struct nfs_server *);
 
 
 	atomic_t active; /* Keep trace of any activity to this server */
 	atomic_t active; /* Keep trace of any activity to this server */
@@ -196,6 +199,7 @@ struct nfs4_slot_table {
 						 * op for dynamic resizing */
 						 * op for dynamic resizing */
 	int		target_max_slots;	/* Set by CB_RECALL_SLOT as
 	int		target_max_slots;	/* Set by CB_RECALL_SLOT as
 						 * the new max_slots */
 						 * the new max_slots */
+	struct completion complete;
 };
 };
 
 
 static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
 static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
@@ -212,7 +216,6 @@ struct nfs4_session {
 	unsigned long			session_state;
 	unsigned long			session_state;
 	u32				hash_alg;
 	u32				hash_alg;
 	u32				ssv_len;
 	u32				ssv_len;
-	struct completion		complete;
 
 
 	/* The fore and back channel */
 	/* The fore and back channel */
 	struct nfs4_channel_attrs	fc_attrs;
 	struct nfs4_channel_attrs	fc_attrs;

+ 4 - 2
include/linux/nfs_xdr.h

@@ -208,6 +208,7 @@ struct nfs4_layoutget_args {
 	struct inode *inode;
 	struct inode *inode;
 	struct nfs_open_context *ctx;
 	struct nfs_open_context *ctx;
 	struct nfs4_sequence_args seq_args;
 	struct nfs4_sequence_args seq_args;
+	nfs4_stateid stateid;
 };
 };
 
 
 struct nfs4_layoutget_res {
 struct nfs4_layoutget_res {
@@ -223,7 +224,6 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_args args;
 	struct nfs4_layoutget_args args;
 	struct nfs4_layoutget_res res;
 	struct nfs4_layoutget_res res;
 	struct pnfs_layout_segment **lsegpp;
 	struct pnfs_layout_segment **lsegpp;
-	int status;
 };
 };
 
 
 struct nfs4_getdeviceinfo_args {
 struct nfs4_getdeviceinfo_args {
@@ -317,6 +317,7 @@ struct nfs_closeres {
 struct nfs_lowner {
 struct nfs_lowner {
 	__u64			clientid;
 	__u64			clientid;
 	__u64			id;
 	__u64			id;
+	dev_t			s_dev;
 };
 };
 
 
 struct nfs_lock_args {
 struct nfs_lock_args {
@@ -484,6 +485,7 @@ struct nfs_entry {
 	struct nfs_fh *		fh;
 	struct nfs_fh *		fh;
 	struct nfs_fattr *	fattr;
 	struct nfs_fattr *	fattr;
 	unsigned char		d_type;
 	unsigned char		d_type;
+	struct nfs_server *	server;
 };
 };
 
 
 /*
 /*
@@ -1089,7 +1091,7 @@ struct nfs_rpc_ops {
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
-	__be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int plus);
+	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
 	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
 	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);

+ 4 - 4
include/linux/sunrpc/auth.h

@@ -110,9 +110,9 @@ struct rpc_credops {
 	__be32 *		(*crmarshal)(struct rpc_task *, __be32 *);
 	__be32 *		(*crmarshal)(struct rpc_task *, __be32 *);
 	int			(*crrefresh)(struct rpc_task *);
 	int			(*crrefresh)(struct rpc_task *);
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
-	int			(*crwrap_req)(struct rpc_task *, kxdrproc_t,
+	int			(*crwrap_req)(struct rpc_task *, kxdreproc_t,
 						void *, __be32 *, void *);
 						void *, __be32 *, void *);
-	int			(*crunwrap_resp)(struct rpc_task *, kxdrproc_t,
+	int			(*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
 						void *, __be32 *, void *);
 						void *, __be32 *, void *);
 };
 };
 
 
@@ -139,8 +139,8 @@ struct rpc_cred *	rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *
 void			put_rpccred(struct rpc_cred *);
 void			put_rpccred(struct rpc_cred *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
 __be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
 __be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
-int			rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj);
-int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj);
+int			rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
+int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
 int			rpcauth_refreshcred(struct rpc_task *);
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
 int			rpcauth_uptodatecred(struct rpc_task *);
 int			rpcauth_uptodatecred(struct rpc_task *);

+ 14 - 1
include/linux/sunrpc/bc_xprt.h

@@ -43,10 +43,18 @@ int bc_send(struct rpc_rqst *req);
  */
  */
 static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 {
 {
-	if (rqstp->rq_server->bc_xprt)
+	if (rqstp->rq_server->sv_bc_xprt)
 		return 1;
 		return 1;
 	return 0;
 	return 0;
 }
 }
+static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp)
+{
+	if (svc_is_backchannel(rqstp))
+		return (struct nfs4_sessionid *)
+			rqstp->rq_server->sv_bc_xprt->xpt_bc_sid;
+	return NULL;
+}
+
 #else /* CONFIG_NFS_V4_1 */
 #else /* CONFIG_NFS_V4_1 */
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
 					 unsigned int min_reqs)
 					 unsigned int min_reqs)
@@ -59,6 +67,11 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 	return 0;
 	return 0;
 }
 }
 
 
+static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp)
+{
+	return NULL;
+}
+
 static inline void xprt_free_bc_request(struct rpc_rqst *req)
 static inline void xprt_free_bc_request(struct rpc_rqst *req)
 {
 {
 }
 }

+ 2 - 2
include/linux/sunrpc/clnt.h

@@ -89,8 +89,8 @@ struct rpc_version {
  */
  */
 struct rpc_procinfo {
 struct rpc_procinfo {
 	u32			p_proc;		/* RPC procedure number */
 	u32			p_proc;		/* RPC procedure number */
-	kxdrproc_t		p_encode;	/* XDR encode function */
-	kxdrproc_t		p_decode;	/* XDR decode function */
+	kxdreproc_t		p_encode;	/* XDR encode function */
+	kxdrdproc_t		p_decode;	/* XDR decode function */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
 	unsigned int		p_count;	/* call count */
 	unsigned int		p_count;	/* call count */

+ 1 - 1
include/linux/sunrpc/svc.h

@@ -99,7 +99,7 @@ struct svc_serv {
 	spinlock_t		sv_cb_lock;	/* protects the svc_cb_list */
 	spinlock_t		sv_cb_lock;	/* protects the svc_cb_list */
 	wait_queue_head_t	sv_cb_waitq;	/* sleep here if there are no
 	wait_queue_head_t	sv_cb_waitq;	/* sleep here if there are no
 						 * entries in the svc_cb_list */
 						 * entries in the svc_cb_list */
-	struct svc_xprt		*bc_xprt;
+	struct svc_xprt		*sv_bc_xprt;	/* callback on fore channel */
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* CONFIG_NFS_V4_1 */
 };
 };
 
 

+ 1 - 0
include/linux/sunrpc/svc_xprt.h

@@ -78,6 +78,7 @@ struct svc_xprt {
 	size_t			xpt_remotelen;	/* length of address */
 	size_t			xpt_remotelen;	/* length of address */
 	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
 	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
 	struct list_head	xpt_users;	/* callbacks on free */
 	struct list_head	xpt_users;	/* callbacks on free */
+	void			*xpt_bc_sid;	/* back channel session ID */
 
 
 	struct net		*xpt_net;
 	struct net		*xpt_net;
 };
 };

+ 11 - 3
include/linux/sunrpc/xdr.h

@@ -33,8 +33,8 @@ struct xdr_netobj {
 };
 };
 
 
 /*
 /*
- * This is the generic XDR function. rqstp is either a rpc_rqst (client
- * side) or svc_rqst pointer (server side).
+ * This is the legacy generic XDR function. rqstp is either a rpc_rqst
+ * (client side) or svc_rqst pointer (server side).
  * Encode functions always assume there's enough room in the buffer.
  * Encode functions always assume there's enough room in the buffer.
  */
  */
 typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
 typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
@@ -201,14 +201,22 @@ struct xdr_stream {
 
 
 	__be32 *end;		/* end of available buffer space */
 	__be32 *end;		/* end of available buffer space */
 	struct kvec *iov;	/* pointer to the current kvec */
 	struct kvec *iov;	/* pointer to the current kvec */
+	struct kvec scratch;	/* Scratch buffer */
+	struct page **page_ptr;	/* pointer to the current page */
 };
 };
 
 
+/*
+ * These are the xdr_stream style generic XDR encode and decode functions.
+ */
+typedef void	(*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 		unsigned int base, unsigned int len);
 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);

+ 24 - 4
net/sunrpc/auth.c

@@ -563,8 +563,17 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p)
 	return cred->cr_ops->crvalidate(task, p);
 	return cred->cr_ops->crvalidate(task, p);
 }
 }
 
 
+static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
+				   __be32 *data, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
+	encode(rqstp, &xdr, obj);
+}
+
 int
 int
-rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
+rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
 		__be32 *data, void *obj)
 		__be32 *data, void *obj)
 {
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
@@ -574,11 +583,22 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
 	if (cred->cr_ops->crwrap_req)
 	if (cred->cr_ops->crwrap_req)
 		return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
 		return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
 	/* By default, we encode the arguments normally. */
 	/* By default, we encode the arguments normally. */
-	return encode(rqstp, data, obj);
+	rpcauth_wrap_req_encode(encode, rqstp, data, obj);
+	return 0;
+}
+
+static int
+rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
+			  __be32 *data, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
+	return decode(rqstp, &xdr, obj);
 }
 }
 
 
 int
 int
-rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
+rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
 		__be32 *data, void *obj)
 		__be32 *data, void *obj)
 {
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
@@ -589,7 +609,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 		return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
 		return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
 						   data, obj);
 						   data, obj);
 	/* By default, we decode the arguments normally. */
 	/* By default, we decode the arguments normally. */
-	return decode(rqstp, data, obj);
+	return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
 }
 }
 
 
 int
 int

+ 31 - 13
net/sunrpc/auth_gss/auth_gss.c

@@ -1231,9 +1231,19 @@ out_bad:
 	return NULL;
 	return NULL;
 }
 }
 
 
+static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
+				__be32 *p, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
+	encode(rqstp, &xdr, obj);
+}
+
 static inline int
 static inline int
 gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
+		   kxdreproc_t encode, struct rpc_rqst *rqstp,
+		   __be32 *p, void *obj)
 {
 {
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	struct xdr_buf	integ_buf;
 	struct xdr_buf	integ_buf;
@@ -1249,9 +1259,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	*p++ = htonl(rqstp->rq_seqno);
 	*p++ = htonl(rqstp->rq_seqno);
 
 
-	status = encode(rqstp, p, obj);
-	if (status)
-		return status;
+	gss_wrap_req_encode(encode, rqstp, p, obj);
 
 
 	if (xdr_buf_subsegment(snd_buf, &integ_buf,
 	if (xdr_buf_subsegment(snd_buf, &integ_buf,
 				offset, snd_buf->len - offset))
 				offset, snd_buf->len - offset))
@@ -1325,7 +1333,8 @@ out:
 
 
 static inline int
 static inline int
 gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
+		  kxdreproc_t encode, struct rpc_rqst *rqstp,
+		  __be32 *p, void *obj)
 {
 {
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	u32		offset;
 	u32		offset;
@@ -1342,9 +1351,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	*p++ = htonl(rqstp->rq_seqno);
 	*p++ = htonl(rqstp->rq_seqno);
 
 
-	status = encode(rqstp, p, obj);
-	if (status)
-		return status;
+	gss_wrap_req_encode(encode, rqstp, p, obj);
 
 
 	status = alloc_enc_pages(rqstp);
 	status = alloc_enc_pages(rqstp);
 	if (status)
 	if (status)
@@ -1394,7 +1401,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 
 
 static int
 static int
 gss_wrap_req(struct rpc_task *task,
 gss_wrap_req(struct rpc_task *task,
-	     kxdrproc_t encode, void *rqstp, __be32 *p, void *obj)
+	     kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
 {
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
@@ -1407,12 +1414,14 @@ gss_wrap_req(struct rpc_task *task,
 		/* The spec seems a little ambiguous here, but I think that not
 		/* The spec seems a little ambiguous here, but I think that not
 		 * wrapping context destruction requests makes the most sense.
 		 * wrapping context destruction requests makes the most sense.
 		 */
 		 */
-		status = encode(rqstp, p, obj);
+		gss_wrap_req_encode(encode, rqstp, p, obj);
+		status = 0;
 		goto out;
 		goto out;
 	}
 	}
 	switch (gss_cred->gc_service) {
 	switch (gss_cred->gc_service) {
 		case RPC_GSS_SVC_NONE:
 		case RPC_GSS_SVC_NONE:
-			status = encode(rqstp, p, obj);
+			gss_wrap_req_encode(encode, rqstp, p, obj);
+			status = 0;
 			break;
 			break;
 		case RPC_GSS_SVC_INTEGRITY:
 		case RPC_GSS_SVC_INTEGRITY:
 			status = gss_wrap_req_integ(cred, ctx, encode,
 			status = gss_wrap_req_integ(cred, ctx, encode,
@@ -1494,10 +1503,19 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	return 0;
 	return 0;
 }
 }
 
 
+static int
+gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
+		      __be32 *p, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	return decode(rqstp, &xdr, obj);
+}
 
 
 static int
 static int
 gss_unwrap_resp(struct rpc_task *task,
 gss_unwrap_resp(struct rpc_task *task,
-		kxdrproc_t decode, void *rqstp, __be32 *p, void *obj)
+		kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
 {
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
@@ -1528,7 +1546,7 @@ gss_unwrap_resp(struct rpc_task *task,
 	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
 	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
 						+ (savedlen - head->iov_len);
 						+ (savedlen - head->iov_len);
 out_decode:
 out_decode:
-	status = decode(rqstp, p, obj);
+	status = gss_unwrap_req_decode(decode, rqstp, p, obj);
 out:
 out:
 	gss_put_ctx(ctx);
 	gss_put_ctx(ctx);
 	dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
 	dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,

+ 1 - 1
net/sunrpc/bc_svc.c

@@ -59,8 +59,8 @@ int bc_send(struct rpc_rqst *req)
 		ret = task->tk_status;
 		ret = task->tk_status;
 		rpc_put_task(task);
 		rpc_put_task(task);
 	}
 	}
-	return ret;
 	dprintk("RPC:       bc_send ret= %d\n", ret);
 	dprintk("RPC:       bc_send ret= %d\n", ret);
+	return ret;
 }
 }
 
 
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* CONFIG_NFS_V4_1 */

+ 6 - 15
net/sunrpc/clnt.c

@@ -1095,7 +1095,7 @@ static void
 rpc_xdr_encode(struct rpc_task *task)
 rpc_xdr_encode(struct rpc_task *task)
 {
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
 	struct rpc_rqst	*req = task->tk_rqstp;
-	kxdrproc_t	encode;
+	kxdreproc_t	encode;
 	__be32		*p;
 	__be32		*p;
 
 
 	dprint_status(task);
 	dprint_status(task);
@@ -1535,7 +1535,7 @@ call_decode(struct rpc_task *task)
 {
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 	struct rpc_clnt	*clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
 	struct rpc_rqst	*req = task->tk_rqstp;
-	kxdrproc_t	decode = task->tk_msg.rpc_proc->p_decode;
+	kxdrdproc_t	decode = task->tk_msg.rpc_proc->p_decode;
 	__be32		*p;
 	__be32		*p;
 
 
 	dprintk("RPC: %5u call_decode (status %d)\n",
 	dprintk("RPC: %5u call_decode (status %d)\n",
@@ -1776,12 +1776,11 @@ out_overflow:
 	goto out_garbage;
 	goto out_garbage;
 }
 }
 
 
-static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj)
+static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
 {
 {
-	return 0;
 }
 }
 
 
-static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj)
+static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
 {
 {
 	return 0;
 	return 0;
 }
 }
@@ -1830,23 +1829,15 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
 			  const struct rpc_task *task)
 			  const struct rpc_task *task)
 {
 {
 	const char *rpc_waitq = "none";
 	const char *rpc_waitq = "none";
-	char *p, action[KSYM_SYMBOL_LEN];
 
 
 	if (RPC_IS_QUEUED(task))
 	if (RPC_IS_QUEUED(task))
 		rpc_waitq = rpc_qname(task->tk_waitqueue);
 		rpc_waitq = rpc_qname(task->tk_waitqueue);
 
 
-	/* map tk_action pointer to a function name; then trim off
-	 * the "+0x0 [sunrpc]" */
-	sprint_symbol(action, (unsigned long)task->tk_action);
-	p = strchr(action, '+');
-	if (p)
-		*p = '\0';
-
-	printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n",
+	printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
 		task->tk_pid, task->tk_flags, task->tk_status,
 		task->tk_pid, task->tk_flags, task->tk_status,
 		clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
 		clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
 		clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task),
 		clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task),
-		action, rpc_waitq);
+		task->tk_action, rpc_waitq);
 }
 }
 
 
 void rpc_show_tasks(void)
 void rpc_show_tasks(void)

+ 1 - 1
net/sunrpc/rpc_pipe.c

@@ -474,7 +474,7 @@ static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
 {
 {
 	struct inode *inode;
 	struct inode *inode;
 
 
-	BUG_ON(!d_unhashed(dentry));
+	d_drop(dentry);
 	inode = rpc_get_inode(dir->i_sb, mode);
 	inode = rpc_get_inode(dir->i_sb, mode);
 	if (!inode)
 	if (!inode)
 		goto out_err;
 		goto out_err;

+ 56 - 91
net/sunrpc/rpcb_clnt.c

@@ -57,10 +57,6 @@ enum {
 	RPCBPROC_GETSTAT,
 	RPCBPROC_GETSTAT,
 };
 };
 
 
-#define RPCB_HIGHPROC_2		RPCBPROC_CALLIT
-#define RPCB_HIGHPROC_3		RPCBPROC_TADDR2UADDR
-#define RPCB_HIGHPROC_4		RPCBPROC_GETSTAT
-
 /*
 /*
  * r_owner
  * r_owner
  *
  *
@@ -693,46 +689,37 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
  * XDR functions for rpcbind
  * XDR functions for rpcbind
  */
  */
 
 
-static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p,
-			    const struct rpcbind_args *rpcb)
+static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
+			     const struct rpcbind_args *rpcb)
 {
 {
 	struct rpc_task *task = req->rq_task;
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
+	__be32 *p;
 
 
 	dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
 	dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
 			task->tk_pid, task->tk_msg.rpc_proc->p_name,
 			task->tk_pid, task->tk_msg.rpc_proc->p_name,
 			rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
 			rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
 
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-
-	p = xdr_reserve_space(&xdr, sizeof(__be32) * RPCB_mappingargs_sz);
-	if (unlikely(p == NULL))
-		return -EIO;
-
-	*p++ = htonl(rpcb->r_prog);
-	*p++ = htonl(rpcb->r_vers);
-	*p++ = htonl(rpcb->r_prot);
-	*p   = htonl(rpcb->r_port);
-
-	return 0;
+	p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2);
+	*p++ = cpu_to_be32(rpcb->r_prog);
+	*p++ = cpu_to_be32(rpcb->r_vers);
+	*p++ = cpu_to_be32(rpcb->r_prot);
+	*p   = cpu_to_be32(rpcb->r_port);
 }
 }
 
 
-static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
 			    struct rpcbind_args *rpcb)
 			    struct rpcbind_args *rpcb)
 {
 {
 	struct rpc_task *task = req->rq_task;
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
 	unsigned long port;
 	unsigned long port;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	__be32 *p;
 
 
 	rpcb->r_port = 0;
 	rpcb->r_port = 0;
 
 
-	p = xdr_inline_decode(&xdr, sizeof(__be32));
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		return -EIO;
 		return -EIO;
 
 
-	port = ntohl(*p);
+	port = be32_to_cpup(p);
 	dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
 	dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
 			task->tk_msg.rpc_proc->p_name, port);
 			task->tk_msg.rpc_proc->p_name, port);
 	if (unlikely(port > USHRT_MAX))
 	if (unlikely(port > USHRT_MAX))
@@ -742,20 +729,18 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
 	return 0;
 	return 0;
 }
 }
 
 
-static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
 			unsigned int *boolp)
 			unsigned int *boolp)
 {
 {
 	struct rpc_task *task = req->rq_task;
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	__be32 *p;
 
 
-	p = xdr_inline_decode(&xdr, sizeof(__be32));
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		return -EIO;
 		return -EIO;
 
 
 	*boolp = 0;
 	*boolp = 0;
-	if (*p)
+	if (*p != xdr_zero)
 		*boolp = 1;
 		*boolp = 1;
 
 
 	dprintk("RPC: %5u RPCB_%s call %s\n",
 	dprintk("RPC: %5u RPCB_%s call %s\n",
@@ -764,73 +749,53 @@ static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p,
 	return 0;
 	return 0;
 }
 }
 
 
-static int encode_rpcb_string(struct xdr_stream *xdr, const char *string,
-				const u32 maxstrlen)
+static void encode_rpcb_string(struct xdr_stream *xdr, const char *string,
+			       const u32 maxstrlen)
 {
 {
-	u32 len;
 	__be32 *p;
 	__be32 *p;
+	u32 len;
 
 
-	if (unlikely(string == NULL))
-		return -EIO;
 	len = strlen(string);
 	len = strlen(string);
-	if (unlikely(len > maxstrlen))
-		return -EIO;
-
-	p = xdr_reserve_space(xdr, sizeof(__be32) + len);
-	if (unlikely(p == NULL))
-		return -EIO;
+	BUG_ON(len > maxstrlen);
+	p = xdr_reserve_space(xdr, 4 + len);
 	xdr_encode_opaque(p, string, len);
 	xdr_encode_opaque(p, string, len);
-
-	return 0;
 }
 }
 
 
-static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p,
-			    const struct rpcbind_args *rpcb)
+static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
+			     const struct rpcbind_args *rpcb)
 {
 {
 	struct rpc_task *task = req->rq_task;
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
+	__be32 *p;
 
 
 	dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
 	dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
 			task->tk_pid, task->tk_msg.rpc_proc->p_name,
 			task->tk_pid, task->tk_msg.rpc_proc->p_name,
 			rpcb->r_prog, rpcb->r_vers,
 			rpcb->r_prog, rpcb->r_vers,
 			rpcb->r_netid, rpcb->r_addr);
 			rpcb->r_netid, rpcb->r_addr);
 
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-
-	p = xdr_reserve_space(&xdr,
-			sizeof(__be32) * (RPCB_program_sz + RPCB_version_sz));
-	if (unlikely(p == NULL))
-		return -EIO;
-	*p++ = htonl(rpcb->r_prog);
-	*p = htonl(rpcb->r_vers);
-
-	if (encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN))
-		return -EIO;
-	if (encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN))
-		return -EIO;
-	if (encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN))
-		return -EIO;
+	p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2);
+	*p++ = cpu_to_be32(rpcb->r_prog);
+	*p = cpu_to_be32(rpcb->r_vers);
 
 
-	return 0;
+	encode_rpcb_string(xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN);
+	encode_rpcb_string(xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN);
+	encode_rpcb_string(xdr, rpcb->r_owner, RPCB_MAXOWNERLEN);
 }
 }
 
 
-static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 			    struct rpcbind_args *rpcb)
 			    struct rpcbind_args *rpcb)
 {
 {
 	struct sockaddr_storage address;
 	struct sockaddr_storage address;
 	struct sockaddr *sap = (struct sockaddr *)&address;
 	struct sockaddr *sap = (struct sockaddr *)&address;
 	struct rpc_task *task = req->rq_task;
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
+	__be32 *p;
 	u32 len;
 	u32 len;
 
 
 	rpcb->r_port = 0;
 	rpcb->r_port = 0;
 
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
-	p = xdr_inline_decode(&xdr, sizeof(__be32));
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		goto out_fail;
 		goto out_fail;
-	len = ntohl(*p);
+	len = be32_to_cpup(p);
 
 
 	/*
 	/*
 	 * If the returned universal address is a null string,
 	 * If the returned universal address is a null string,
@@ -845,7 +810,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
 	if (unlikely(len > RPCBIND_MAXUADDRLEN))
 	if (unlikely(len > RPCBIND_MAXUADDRLEN))
 		goto out_fail;
 		goto out_fail;
 
 
-	p = xdr_inline_decode(&xdr, len);
+	p = xdr_inline_decode(xdr, len);
 	if (unlikely(p == NULL))
 	if (unlikely(p == NULL))
 		goto out_fail;
 		goto out_fail;
 	dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid,
 	dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid,
@@ -871,8 +836,8 @@ out_fail:
 static struct rpc_procinfo rpcb_procedures2[] = {
 static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
 		.p_statidx	= RPCBPROC_SET,
@@ -881,8 +846,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	},
 	[RPCBPROC_UNSET] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -891,8 +856,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	},
 	[RPCBPROC_GETPORT] = {
 	[RPCBPROC_GETPORT] = {
 		.p_proc		= RPCBPROC_GETPORT,
 		.p_proc		= RPCBPROC_GETPORT,
-		.p_encode	= (kxdrproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrproc_t)rpcb_dec_getport,
+		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_getport,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_getportres_sz,
 		.p_replen	= RPCB_getportres_sz,
 		.p_statidx	= RPCBPROC_GETPORT,
 		.p_statidx	= RPCBPROC_GETPORT,
@@ -904,8 +869,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 static struct rpc_procinfo rpcb_procedures3[] = {
 static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
 		.p_statidx	= RPCBPROC_SET,
@@ -914,8 +879,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	},
 	[RPCBPROC_UNSET] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -924,8 +889,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	},
 	[RPCBPROC_GETADDR] = {
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
 		.p_statidx	= RPCBPROC_GETADDR,
@@ -937,8 +902,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 static struct rpc_procinfo rpcb_procedures4[] = {
 static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
 		.p_statidx	= RPCBPROC_SET,
@@ -947,8 +912,8 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	},
 	[RPCBPROC_UNSET] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -957,8 +922,8 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	},
 	[RPCBPROC_GETADDR] = {
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
 		.p_statidx	= RPCBPROC_GETADDR,
@@ -993,19 +958,19 @@ static struct rpcb_info rpcb_next_version6[] = {
 
 
 static struct rpc_version rpcb_version2 = {
 static struct rpc_version rpcb_version2 = {
 	.number		= RPCBVERS_2,
 	.number		= RPCBVERS_2,
-	.nrprocs	= RPCB_HIGHPROC_2,
+	.nrprocs	= ARRAY_SIZE(rpcb_procedures2),
 	.procs		= rpcb_procedures2
 	.procs		= rpcb_procedures2
 };
 };
 
 
 static struct rpc_version rpcb_version3 = {
 static struct rpc_version rpcb_version3 = {
 	.number		= RPCBVERS_3,
 	.number		= RPCBVERS_3,
-	.nrprocs	= RPCB_HIGHPROC_3,
+	.nrprocs	= ARRAY_SIZE(rpcb_procedures3),
 	.procs		= rpcb_procedures3
 	.procs		= rpcb_procedures3
 };
 };
 
 
 static struct rpc_version rpcb_version4 = {
 static struct rpc_version rpcb_version4 = {
 	.number		= RPCBVERS_4,
 	.number		= RPCBVERS_4,
-	.nrprocs	= RPCB_HIGHPROC_4,
+	.nrprocs	= ARRAY_SIZE(rpcb_procedures4),
 	.procs		= rpcb_procedures4
 	.procs		= rpcb_procedures4
 };
 };
 
 

+ 17 - 19
net/sunrpc/svc.c

@@ -488,10 +488,6 @@ svc_destroy(struct svc_serv *serv)
 	if (svc_serv_is_pooled(serv))
 	if (svc_serv_is_pooled(serv))
 		svc_pool_map_put();
 		svc_pool_map_put();
 
 
-#if defined(CONFIG_NFS_V4_1)
-	svc_sock_destroy(serv->bc_xprt);
-#endif /* CONFIG_NFS_V4_1 */
-
 	svc_unregister(serv);
 	svc_unregister(serv);
 	kfree(serv->sv_pools);
 	kfree(serv->sv_pools);
 	kfree(serv);
 	kfree(serv);
@@ -1147,7 +1143,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
  dropit:
  dropit:
 	svc_authorise(rqstp);	/* doesn't hurt to call this twice */
 	svc_authorise(rqstp);	/* doesn't hurt to call this twice */
 	dprintk("svc: svc_process dropit\n");
 	dprintk("svc: svc_process dropit\n");
-	svc_drop(rqstp);
 	return 0;
 	return 0;
 
 
 err_short_len:
 err_short_len:
@@ -1218,7 +1213,6 @@ svc_process(struct svc_rqst *rqstp)
 	struct kvec		*resv = &rqstp->rq_res.head[0];
 	struct kvec		*resv = &rqstp->rq_res.head[0];
 	struct svc_serv		*serv = rqstp->rq_server;
 	struct svc_serv		*serv = rqstp->rq_server;
 	u32			dir;
 	u32			dir;
-	int			error;
 
 
 	/*
 	/*
 	 * Setup response xdr_buf.
 	 * Setup response xdr_buf.
@@ -1246,11 +1240,13 @@ svc_process(struct svc_rqst *rqstp)
 		return 0;
 		return 0;
 	}
 	}
 
 
-	error = svc_process_common(rqstp, argv, resv);
-	if (error <= 0)
-		return error;
-
-	return svc_send(rqstp);
+	/* Returns 1 for send, 0 for drop */
+	if (svc_process_common(rqstp, argv, resv))
+		return svc_send(rqstp);
+	else {
+		svc_drop(rqstp);
+		return 0;
+	}
 }
 }
 
 
 #if defined(CONFIG_NFS_V4_1)
 #if defined(CONFIG_NFS_V4_1)
@@ -1264,10 +1260,9 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 {
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
-	int 		error;
 
 
 	/* Build the svc_rqst used by the common processing routine */
 	/* Build the svc_rqst used by the common processing routine */
-	rqstp->rq_xprt = serv->bc_xprt;
+	rqstp->rq_xprt = serv->sv_bc_xprt;
 	rqstp->rq_xid = req->rq_xid;
 	rqstp->rq_xid = req->rq_xid;
 	rqstp->rq_prot = req->rq_xprt->prot;
 	rqstp->rq_prot = req->rq_xprt->prot;
 	rqstp->rq_server = serv;
 	rqstp->rq_server = serv;
@@ -1292,12 +1287,15 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 	svc_getu32(argv);	/* XID */
 	svc_getu32(argv);	/* XID */
 	svc_getnl(argv);	/* CALLDIR */
 	svc_getnl(argv);	/* CALLDIR */
 
 
-	error = svc_process_common(rqstp, argv, resv);
-	if (error <= 0)
-		return error;
-
-	memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
-	return bc_send(req);
+	/* Returns 1 for send, 0 for drop */
+	if (svc_process_common(rqstp, argv, resv)) {
+		memcpy(&req->rq_snd_buf, &rqstp->rq_res,
+						sizeof(req->rq_snd_buf));
+		return bc_send(req);
+	} else {
+		/* Nothing to do to drop request */
+		return 0;
+	}
 }
 }
 EXPORT_SYMBOL(bc_svc_process);
 EXPORT_SYMBOL(bc_svc_process);
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* CONFIG_NFS_V4_1 */

+ 85 - 21
net/sunrpc/svcsock.c

@@ -66,6 +66,13 @@ static void		svc_sock_free(struct svc_xprt *);
 static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
 static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
 					  struct net *, struct sockaddr *,
 					  struct net *, struct sockaddr *,
 					  int, int);
 					  int, int);
+#if defined(CONFIG_NFS_V4_1)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
+					     struct net *, struct sockaddr *,
+					     int, int);
+static void svc_bc_sock_free(struct svc_xprt *xprt);
+#endif /* CONFIG_NFS_V4_1 */
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key svc_key[2];
 static struct lock_class_key svc_key[2];
 static struct lock_class_key svc_slock_key[2];
 static struct lock_class_key svc_slock_key[2];
@@ -1184,6 +1191,57 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
 	return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
 	return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
 }
 }
 
 
+#if defined(CONFIG_NFS_V4_1)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
+					     struct net *, struct sockaddr *,
+					     int, int);
+static void svc_bc_sock_free(struct svc_xprt *xprt);
+
+static struct svc_xprt *svc_bc_tcp_create(struct svc_serv *serv,
+				       struct net *net,
+				       struct sockaddr *sa, int salen,
+				       int flags)
+{
+	return svc_bc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
+}
+
+static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
+{
+}
+
+static struct svc_xprt_ops svc_tcp_bc_ops = {
+	.xpo_create = svc_bc_tcp_create,
+	.xpo_detach = svc_bc_tcp_sock_detach,
+	.xpo_free = svc_bc_sock_free,
+	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+};
+
+static struct svc_xprt_class svc_tcp_bc_class = {
+	.xcl_name = "tcp-bc",
+	.xcl_owner = THIS_MODULE,
+	.xcl_ops = &svc_tcp_bc_ops,
+	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+};
+
+static void svc_init_bc_xprt_sock(void)
+{
+	svc_reg_xprt_class(&svc_tcp_bc_class);
+}
+
+static void svc_cleanup_bc_xprt_sock(void)
+{
+	svc_unreg_xprt_class(&svc_tcp_bc_class);
+}
+#else /* CONFIG_NFS_V4_1 */
+static void svc_init_bc_xprt_sock(void)
+{
+}
+
+static void svc_cleanup_bc_xprt_sock(void)
+{
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 static struct svc_xprt_ops svc_tcp_ops = {
 static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_create = svc_tcp_create,
 	.xpo_create = svc_tcp_create,
 	.xpo_recvfrom = svc_tcp_recvfrom,
 	.xpo_recvfrom = svc_tcp_recvfrom,
@@ -1207,12 +1265,14 @@ void svc_init_xprt_sock(void)
 {
 {
 	svc_reg_xprt_class(&svc_tcp_class);
 	svc_reg_xprt_class(&svc_tcp_class);
 	svc_reg_xprt_class(&svc_udp_class);
 	svc_reg_xprt_class(&svc_udp_class);
+	svc_init_bc_xprt_sock();
 }
 }
 
 
 void svc_cleanup_xprt_sock(void)
 void svc_cleanup_xprt_sock(void)
 {
 {
 	svc_unreg_xprt_class(&svc_tcp_class);
 	svc_unreg_xprt_class(&svc_tcp_class);
 	svc_unreg_xprt_class(&svc_udp_class);
 	svc_unreg_xprt_class(&svc_udp_class);
+	svc_cleanup_bc_xprt_sock();
 }
 }
 
 
 static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
@@ -1509,41 +1569,45 @@ static void svc_sock_free(struct svc_xprt *xprt)
 	kfree(svsk);
 	kfree(svsk);
 }
 }
 
 
+#if defined(CONFIG_NFS_V4_1)
 /*
 /*
- * Create a svc_xprt.
- *
- * For internal use only (e.g. nfsv4.1 backchannel).
- * Callers should typically use the xpo_create() method.
+ * Create a back channel svc_xprt which shares the fore channel socket.
  */
  */
-struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
+					     int protocol,
+					     struct net *net,
+					     struct sockaddr *sin, int len,
+					     int flags)
 {
 {
 	struct svc_sock *svsk;
 	struct svc_sock *svsk;
-	struct svc_xprt *xprt = NULL;
+	struct svc_xprt *xprt;
+
+	if (protocol != IPPROTO_TCP) {
+		printk(KERN_WARNING "svc: only TCP sockets"
+			" supported on shared back channel\n");
+		return ERR_PTR(-EINVAL);
+	}
 
 
-	dprintk("svc: %s\n", __func__);
 	svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
 	svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
 	if (!svsk)
 	if (!svsk)
-		goto out;
+		return ERR_PTR(-ENOMEM);
 
 
 	xprt = &svsk->sk_xprt;
 	xprt = &svsk->sk_xprt;
-	if (prot == IPPROTO_TCP)
-		svc_xprt_init(&svc_tcp_class, xprt, serv);
-	else if (prot == IPPROTO_UDP)
-		svc_xprt_init(&svc_udp_class, xprt, serv);
-	else
-		BUG();
-out:
-	dprintk("svc: %s return %p\n", __func__, xprt);
+	svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
+
+	serv->sv_bc_xprt = xprt;
+
 	return xprt;
 	return xprt;
 }
 }
-EXPORT_SYMBOL_GPL(svc_sock_create);
 
 
 /*
 /*
- * Destroy a svc_sock.
+ * Free a back channel svc_sock.
  */
  */
-void svc_sock_destroy(struct svc_xprt *xprt)
+static void svc_bc_sock_free(struct svc_xprt *xprt)
 {
 {
-	if (xprt)
+	if (xprt) {
+		kfree(xprt->xpt_bc_sid);
 		kfree(container_of(xprt, struct svc_sock, sk_xprt));
 		kfree(container_of(xprt, struct svc_sock, sk_xprt));
+	}
 }
 }
-EXPORT_SYMBOL_GPL(svc_sock_destroy);
+#endif /* CONFIG_NFS_V4_1 */

+ 124 - 31
net/sunrpc/xdr.c

@@ -552,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
 }
 }
 EXPORT_SYMBOL_GPL(xdr_write_pages);
 EXPORT_SYMBOL_GPL(xdr_write_pages);
 
 
+static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
+		__be32 *p, unsigned int len)
+{
+	if (len > iov->iov_len)
+		len = iov->iov_len;
+	if (p == NULL)
+		p = (__be32*)iov->iov_base;
+	xdr->p = p;
+	xdr->end = (__be32*)(iov->iov_base + len);
+	xdr->iov = iov;
+	xdr->page_ptr = NULL;
+}
+
+static int xdr_set_page_base(struct xdr_stream *xdr,
+		unsigned int base, unsigned int len)
+{
+	unsigned int pgnr;
+	unsigned int maxlen;
+	unsigned int pgoff;
+	unsigned int pgend;
+	void *kaddr;
+
+	maxlen = xdr->buf->page_len;
+	if (base >= maxlen)
+		return -EINVAL;
+	maxlen -= base;
+	if (len > maxlen)
+		len = maxlen;
+
+	base += xdr->buf->page_base;
+
+	pgnr = base >> PAGE_SHIFT;
+	xdr->page_ptr = &xdr->buf->pages[pgnr];
+	kaddr = page_address(*xdr->page_ptr);
+
+	pgoff = base & ~PAGE_MASK;
+	xdr->p = (__be32*)(kaddr + pgoff);
+
+	pgend = pgoff + len;
+	if (pgend > PAGE_SIZE)
+		pgend = PAGE_SIZE;
+	xdr->end = (__be32*)(kaddr + pgend);
+	xdr->iov = NULL;
+	return 0;
+}
+
+static void xdr_set_next_page(struct xdr_stream *xdr)
+{
+	unsigned int newbase;
+
+	newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
+	newbase -= xdr->buf->page_base;
+
+	if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
+		xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+}
+
+static bool xdr_set_next_buffer(struct xdr_stream *xdr)
+{
+	if (xdr->page_ptr != NULL)
+		xdr_set_next_page(xdr);
+	else if (xdr->iov == xdr->buf->head) {
+		if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
+			xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+	}
+	return xdr->p != xdr->end;
+}
+
 /**
 /**
  * xdr_init_decode - Initialize an xdr_stream for decoding data.
  * xdr_init_decode - Initialize an xdr_stream for decoding data.
  * @xdr: pointer to xdr_stream struct
  * @xdr: pointer to xdr_stream struct
@@ -560,41 +628,67 @@ EXPORT_SYMBOL_GPL(xdr_write_pages);
  */
  */
 void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 {
 {
-	struct kvec *iov = buf->head;
-	unsigned int len = iov->iov_len;
-
-	if (len > buf->len)
-		len = buf->len;
 	xdr->buf = buf;
 	xdr->buf = buf;
-	xdr->iov = iov;
-	xdr->p = p;
-	xdr->end = (__be32 *)((char *)iov->iov_base + len);
+	xdr->scratch.iov_base = NULL;
+	xdr->scratch.iov_len = 0;
+	if (buf->head[0].iov_len != 0)
+		xdr_set_iov(xdr, buf->head, p, buf->len);
+	else if (buf->page_len != 0)
+		xdr_set_page_base(xdr, 0, buf->len);
 }
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
 
-/**
- * xdr_inline_peek - Allow read-ahead in the XDR data stream
- * @xdr: pointer to xdr_stream struct
- * @nbytes: number of bytes of data to decode
- *
- * Check if the input buffer is long enough to enable us to decode
- * 'nbytes' more bytes of data starting at the current position.
- * If so return the current pointer without updating the current
- * pointer position.
- */
-__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
+static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
 {
 	__be32 *p = xdr->p;
 	__be32 *p = xdr->p;
 	__be32 *q = p + XDR_QUADLEN(nbytes);
 	__be32 *q = p + XDR_QUADLEN(nbytes);
 
 
 	if (unlikely(q > xdr->end || q < p))
 	if (unlikely(q > xdr->end || q < p))
 		return NULL;
 		return NULL;
+	xdr->p = q;
 	return p;
 	return p;
 }
 }
-EXPORT_SYMBOL_GPL(xdr_inline_peek);
 
 
 /**
 /**
- * xdr_inline_decode - Retrieve non-page XDR data to decode
+ * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to an empty buffer
+ * @buflen: size of 'buf'
+ *
+ * The scratch buffer is used when decoding from an array of pages.
+ * If an xdr_inline_decode() call spans across page boundaries, then
+ * we copy the data into the scratch buffer in order to allow linear
+ * access.
+ */
+void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
+{
+	xdr->scratch.iov_base = buf;
+	xdr->scratch.iov_len = buflen;
+}
+EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
+
+static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
+{
+	__be32 *p;
+	void *cpdest = xdr->scratch.iov_base;
+	size_t cplen = (char *)xdr->end - (char *)xdr->p;
+
+	if (nbytes > xdr->scratch.iov_len)
+		return NULL;
+	memcpy(cpdest, xdr->p, cplen);
+	cpdest += cplen;
+	nbytes -= cplen;
+	if (!xdr_set_next_buffer(xdr))
+		return NULL;
+	p = __xdr_inline_decode(xdr, nbytes);
+	if (p == NULL)
+		return NULL;
+	memcpy(cpdest, p, nbytes);
+	return xdr->scratch.iov_base;
+}
+
+/**
+ * xdr_inline_decode - Retrieve XDR data to decode
  * @xdr: pointer to xdr_stream struct
  * @xdr: pointer to xdr_stream struct
  * @nbytes: number of bytes of data to decode
  * @nbytes: number of bytes of data to decode
  *
  *
@@ -605,13 +699,16 @@ EXPORT_SYMBOL_GPL(xdr_inline_peek);
  */
  */
 __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
 {
-	__be32 *p = xdr->p;
-	__be32 *q = p + XDR_QUADLEN(nbytes);
+	__be32 *p;
 
 
-	if (unlikely(q > xdr->end || q < p))
+	if (nbytes == 0)
+		return xdr->p;
+	if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
 		return NULL;
 		return NULL;
-	xdr->p = q;
-	return p;
+	p = __xdr_inline_decode(xdr, nbytes);
+	if (p != NULL)
+		return p;
+	return xdr_copy_to_scratch(xdr, nbytes);
 }
 }
 EXPORT_SYMBOL_GPL(xdr_inline_decode);
 EXPORT_SYMBOL_GPL(xdr_inline_decode);
 
 
@@ -671,16 +768,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages);
  */
  */
 void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
 void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
 {
 {
-	char * kaddr = page_address(xdr->buf->pages[0]);
 	xdr_read_pages(xdr, len);
 	xdr_read_pages(xdr, len);
 	/*
 	/*
 	 * Position current pointer at beginning of tail, and
 	 * Position current pointer at beginning of tail, and
 	 * set remaining message length.
 	 * set remaining message length.
 	 */
 	 */
-	if (len > PAGE_CACHE_SIZE - xdr->buf->page_base)
-		len = PAGE_CACHE_SIZE - xdr->buf->page_base;
-	xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
-	xdr->end = (__be32 *)((char *)xdr->p + len);
+	xdr_set_page_base(xdr, 0, len);
 }
 }
 EXPORT_SYMBOL_GPL(xdr_enter_page);
 EXPORT_SYMBOL_GPL(xdr_enter_page);
 
 

Some files were not shown because too many files changed in this diff