Browse Source

Merge rsync://client.linux-nfs.org/pub/linux/nfs-2.6

Linus Torvalds 20 years ago
parent
commit
9092131f7e
68 changed files with 4135 additions and 1004 deletions
  1. 35 0
      fs/Kconfig
  2. 1 0
      fs/Makefile
  3. 62 51
      fs/lockd/clntlock.c
  4. 32 8
      fs/lockd/clntproc.c
  5. 3 5
      fs/lockd/host.c
  6. 3 4
      fs/lockd/mon.c
  7. 6 0
      fs/locks.c
  8. 1 0
      fs/nfs/Makefile
  9. 1 0
      fs/nfs/callback.c
  10. 1 0
      fs/nfs/callback_proc.c
  11. 1 1
      fs/nfs/callback_xdr.c
  12. 1 0
      fs/nfs/delegation.c
  13. 130 30
      fs/nfs/dir.c
  14. 1 1
      fs/nfs/direct.c
  15. 41 7
      fs/nfs/file.c
  16. 1 0
      fs/nfs/idmap.c
  17. 255 172
      fs/nfs/inode.c
  18. 1 3
      fs/nfs/mount_clnt.c
  19. 403 0
      fs/nfs/nfs3acl.c
  20. 35 8
      fs/nfs/nfs3proc.c
  21. 147 0
      fs/nfs/nfs3xdr.c
  22. 253 0
      fs/nfs/nfs4_fs.h
  23. 339 90
      fs/nfs/nfs4proc.c
  24. 1 0
      fs/nfs/nfs4renewd.c
  25. 84 109
      fs/nfs/nfs4state.c
  26. 219 22
      fs/nfs/nfs4xdr.c
  27. 9 0
      fs/nfs/nfsroot.c
  28. 108 34
      fs/nfs/pagelist.c
  29. 1 0
      fs/nfs/proc.c
  30. 0 3
      fs/nfs/read.c
  31. 54 54
      fs/nfs/write.c
  32. 7 0
      fs/nfs_common/Makefile
  33. 257 0
      fs/nfs_common/nfsacl.c
  34. 2 0
      fs/nfsd/Makefile
  35. 336 0
      fs/nfsd/nfs2acl.c
  36. 267 0
      fs/nfsd/nfs3acl.c
  37. 13 0
      fs/nfsd/nfs3xdr.c
  38. 1 3
      fs/nfsd/nfs4callback.c
  39. 1 0
      fs/nfsd/nfsproc.c
  40. 28 0
      fs/nfsd/nfssvc.c
  41. 11 0
      fs/nfsd/nfsxdr.c
  42. 106 1
      fs/nfsd/vfs.c
  43. 1 0
      include/linux/fs.h
  44. 6 1
      include/linux/lockd/lockd.h
  45. 2 0
      include/linux/nfs4.h
  46. 59 247
      include/linux/nfs_fs.h
  47. 5 0
      include/linux/nfs_fs_i.h
  48. 1 0
      include/linux/nfs_fs_sb.h
  49. 1 0
      include/linux/nfs_mount.h
  50. 24 6
      include/linux/nfs_page.h
  51. 43 0
      include/linux/nfs_xdr.h
  52. 58 0
      include/linux/nfsacl.h
  53. 16 0
      include/linux/nfsd/nfsd.h
  54. 4 0
      include/linux/nfsd/xdr.h
  55. 26 0
      include/linux/nfsd/xdr3.h
  56. 6 0
      include/linux/sunrpc/clnt.h
  57. 0 1
      include/linux/sunrpc/sched.h
  58. 13 1
      include/linux/sunrpc/svc.h
  59. 19 2
      include/linux/sunrpc/xdr.h
  60. 3 3
      net/sunrpc/auth.c
  61. 11 7
      net/sunrpc/auth_gss/auth_gss.c
  62. 157 48
      net/sunrpc/clnt.c
  63. 5 4
      net/sunrpc/pmap_clnt.c
  64. 48 36
      net/sunrpc/sched.c
  65. 5 1
      net/sunrpc/sunrpc_syms.c
  66. 19 17
      net/sunrpc/svc.c
  67. 288 10
      net/sunrpc/xdr.c
  68. 57 14
      net/sunrpc/xprt.c

+ 35 - 0
fs/Kconfig

@@ -1268,6 +1268,7 @@ config NFS_FS
 	depends on INET
 	depends on INET
 	select LOCKD
 	select LOCKD
 	select SUNRPC
 	select SUNRPC
+	select NFS_ACL_SUPPORT if NFS_V3_ACL
 	help
 	help
 	  If you are connected to some other (usually local) Unix computer
 	  If you are connected to some other (usually local) Unix computer
 	  (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing
 	  (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing
@@ -1310,6 +1311,16 @@ config NFS_V3
 
 
 	  If unsure, say Y.
 	  If unsure, say Y.
 
 
+config NFS_V3_ACL
+	bool "Provide client support for the NFSv3 ACL protocol extension"
+	depends on NFS_V3
+	help
+	  Implement the NFSv3 ACL protocol extension for manipulating POSIX
+	  Access Control Lists.  The server should also be compiled with
+	  the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option.
+
+	  If unsure, say N.
+
 config NFS_V4
 config NFS_V4
 	bool "Provide NFSv4 client support (EXPERIMENTAL)"
 	bool "Provide NFSv4 client support (EXPERIMENTAL)"
 	depends on NFS_FS && EXPERIMENTAL
 	depends on NFS_FS && EXPERIMENTAL
@@ -1353,6 +1364,7 @@ config NFSD
 	select LOCKD
 	select LOCKD
 	select SUNRPC
 	select SUNRPC
 	select EXPORTFS
 	select EXPORTFS
+	select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL
 	help
 	help
 	  If you want your Linux box to act as an NFS *server*, so that other
 	  If you want your Linux box to act as an NFS *server*, so that other
 	  computers on your local network which support NFS can access certain
 	  computers on your local network which support NFS can access certain
@@ -1376,6 +1388,10 @@ config NFSD
 	  To compile the NFS server support as a module, choose M here: the
 	  To compile the NFS server support as a module, choose M here: the
 	  module will be called nfsd.  If unsure, say N.
 	  module will be called nfsd.  If unsure, say N.
 
 
+config NFSD_V2_ACL
+	bool
+	depends on NFSD
+
 config NFSD_V3
 config NFSD_V3
 	bool "Provide NFSv3 server support"
 	bool "Provide NFSv3 server support"
 	depends on NFSD
 	depends on NFSD
@@ -1383,6 +1399,16 @@ config NFSD_V3
 	  If you would like to include the NFSv3 server as well as the NFSv2
 	  If you would like to include the NFSv3 server as well as the NFSv2
 	  server, say Y here.  If unsure, say Y.
 	  server, say Y here.  If unsure, say Y.
 
 
+config NFSD_V3_ACL
+	bool "Provide server support for the NFSv3 ACL protocol extension"
+	depends on NFSD_V3
+	select NFSD_V2_ACL
+	help
+	  Implement the NFSv3 ACL protocol extension for manipulating POSIX
+	  Access Control Lists on exported file systems. NFS clients should
+	  be compiled with the NFSv3 ACL protocol extension; see the
+	  CONFIG_NFS_V3_ACL option.  If unsure, say N.
+
 config NFSD_V4
 config NFSD_V4
 	bool "Provide NFSv4 server support (EXPERIMENTAL)"
 	bool "Provide NFSv4 server support (EXPERIMENTAL)"
 	depends on NFSD_V3 && EXPERIMENTAL
 	depends on NFSD_V3 && EXPERIMENTAL
@@ -1427,6 +1453,15 @@ config LOCKD_V4
 config EXPORTFS
 config EXPORTFS
 	tristate
 	tristate
 
 
+config NFS_ACL_SUPPORT
+	tristate
+	select FS_POSIX_ACL
+
+config NFS_COMMON
+	bool
+	depends on NFSD || NFS_FS
+	default y
+
 config SUNRPC
 config SUNRPC
 	tristate
 	tristate
 
 

+ 1 - 0
fs/Makefile

@@ -31,6 +31,7 @@ obj-$(CONFIG_BINFMT_FLAT)	+= binfmt_flat.o
 
 
 obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o
 obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o
 obj-$(CONFIG_FS_POSIX_ACL)	+= posix_acl.o xattr_acl.o
 obj-$(CONFIG_FS_POSIX_ACL)	+= posix_acl.o xattr_acl.o
+obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
 
 
 obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o

+ 62 - 51
fs/lockd/clntlock.c

@@ -31,7 +31,7 @@ static int			reclaimer(void *ptr);
  * This is the representation of a blocked client lock.
  * This is the representation of a blocked client lock.
  */
  */
 struct nlm_wait {
 struct nlm_wait {
-	struct nlm_wait *	b_next;		/* linked list */
+	struct list_head	b_list;		/* linked list */
 	wait_queue_head_t	b_wait;		/* where to wait on */
 	wait_queue_head_t	b_wait;		/* where to wait on */
 	struct nlm_host *	b_host;
 	struct nlm_host *	b_host;
 	struct file_lock *	b_lock;		/* local file lock */
 	struct file_lock *	b_lock;		/* local file lock */
@@ -39,27 +39,54 @@ struct nlm_wait {
 	u32			b_status;	/* grant callback status */
 	u32			b_status;	/* grant callback status */
 };
 };
 
 
-static struct nlm_wait *	nlm_blocked;
+static LIST_HEAD(nlm_blocked);
 
 
 /*
 /*
- * Block on a lock
+ * Queue up a lock for blocking so that the GRANTED request can see it
  */
  */
-int
-nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
+int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
+{
+	struct nlm_wait *block;
+
+	BUG_ON(req->a_block != NULL);
+	block = kmalloc(sizeof(*block), GFP_KERNEL);
+	if (block == NULL)
+		return -ENOMEM;
+	block->b_host = host;
+	block->b_lock = fl;
+	init_waitqueue_head(&block->b_wait);
+	block->b_status = NLM_LCK_BLOCKED;
+
+	list_add(&block->b_list, &nlm_blocked);
+	req->a_block = block;
+
+	return 0;
+}
+
+void nlmclnt_finish_block(struct nlm_rqst *req)
 {
 {
-	struct nlm_wait	block, **head;
-	int		err;
-	u32		pstate;
+	struct nlm_wait *block = req->a_block;
 
 
-	block.b_host   = host;
-	block.b_lock   = fl;
-	init_waitqueue_head(&block.b_wait);
-	block.b_status = NLM_LCK_BLOCKED;
-	block.b_next   = nlm_blocked;
-	nlm_blocked    = █
+	if (block == NULL)
+		return;
+	req->a_block = NULL;
+	list_del(&block->b_list);
+	kfree(block);
+}
+
+/*
+ * Block on a lock
+ */
+long nlmclnt_block(struct nlm_rqst *req, long timeout)
+{
+	struct nlm_wait	*block = req->a_block;
+	long ret;
 
 
-	/* Remember pseudo nsm state */
-	pstate = host->h_state;
+	/* A borken server might ask us to block even if we didn't
+	 * request it. Just say no!
+	 */
+	if (!req->a_args.block)
+		return -EAGAIN;
 
 
 	/* Go to sleep waiting for GRANT callback. Some servers seem
 	/* Go to sleep waiting for GRANT callback. Some servers seem
 	 * to lose callbacks, however, so we're going to poll from
 	 * to lose callbacks, however, so we're going to poll from
@@ -69,28 +96,16 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
 	 * a 1 minute timeout would do. See the comment before
 	 * a 1 minute timeout would do. See the comment before
 	 * nlmclnt_lock for an explanation.
 	 * nlmclnt_lock for an explanation.
 	 */
 	 */
-	sleep_on_timeout(&block.b_wait, 30*HZ);
-
-	for (head = &nlm_blocked; *head; head = &(*head)->b_next) {
-		if (*head == &block) {
-			*head = block.b_next;
-			break;
-		}
-	}
+	ret = wait_event_interruptible_timeout(block->b_wait,
+			block->b_status != NLM_LCK_BLOCKED,
+			timeout);
 
 
-	if (!signalled()) {
-		*statp = block.b_status;
-		return 0;
+	if (block->b_status != NLM_LCK_BLOCKED) {
+		req->a_res.status = block->b_status;
+		block->b_status = NLM_LCK_BLOCKED;
 	}
 	}
 
 
-	/* Okay, we were interrupted. Cancel the pending request
-	 * unless the server has rebooted.
-	 */
-	if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0)
-		printk(KERN_NOTICE
-			"lockd: CANCEL call failed (errno %d)\n", -err);
-
-	return -ERESTARTSYS;
+	return ret;
 }
 }
 
 
 /*
 /*
@@ -100,27 +115,23 @@ u32
 nlmclnt_grant(struct nlm_lock *lock)
 nlmclnt_grant(struct nlm_lock *lock)
 {
 {
 	struct nlm_wait	*block;
 	struct nlm_wait	*block;
+	u32 res = nlm_lck_denied;
 
 
 	/*
 	/*
 	 * Look up blocked request based on arguments. 
 	 * Look up blocked request based on arguments. 
 	 * Warning: must not use cookie to match it!
 	 * Warning: must not use cookie to match it!
 	 */
 	 */
-	for (block = nlm_blocked; block; block = block->b_next) {
-		if (nlm_compare_locks(block->b_lock, &lock->fl))
-			break;
+	list_for_each_entry(block, &nlm_blocked, b_list) {
+		if (nlm_compare_locks(block->b_lock, &lock->fl)) {
+			/* Alright, we found a lock. Set the return status
+			 * and wake up the caller
+			 */
+			block->b_status = NLM_LCK_GRANTED;
+			wake_up(&block->b_wait);
+			res = nlm_granted;
+		}
 	}
 	}
-
-	/* Ooops, no blocked request found. */
-	if (block == NULL)
-		return nlm_lck_denied;
-
-	/* Alright, we found the lock. Set the return status and
-	 * wake up the caller.
-	 */
-	block->b_status = NLM_LCK_GRANTED;
-	wake_up(&block->b_wait);
-
-	return nlm_granted;
+	return res;
 }
 }
 
 
 /*
 /*
@@ -230,7 +241,7 @@ restart:
 	host->h_reclaiming = 0;
 	host->h_reclaiming = 0;
 
 
 	/* Now, wake up all processes that sleep on a blocked lock */
 	/* Now, wake up all processes that sleep on a blocked lock */
-	for (block = nlm_blocked; block; block = block->b_next) {
+	list_for_each_entry(block, &nlm_blocked, b_list) {
 		if (block->b_host == host) {
 		if (block->b_host == host) {
 			block->b_status = NLM_LCK_DENIED_GRACE_PERIOD;
 			block->b_status = NLM_LCK_DENIED_GRACE_PERIOD;
 			wake_up(&block->b_wait);
 			wake_up(&block->b_wait);

+ 32 - 8
fs/lockd/clntproc.c

@@ -21,6 +21,7 @@
 
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 #define NLMCLNT_GRACE_WAIT	(5*HZ)
 #define NLMCLNT_GRACE_WAIT	(5*HZ)
+#define NLMCLNT_POLL_TIMEOUT	(30*HZ)
 
 
 static int	nlmclnt_test(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_test(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
@@ -553,7 +554,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 {
 {
 	struct nlm_host	*host = req->a_host;
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
 	struct nlm_res	*resp = &req->a_res;
-	int		status;
+	long timeout;
+	int status;
 
 
 	if (!host->h_monitored && nsm_monitor(host) < 0) {
 	if (!host->h_monitored && nsm_monitor(host) < 0) {
 		printk(KERN_NOTICE "lockd: failed to monitor %s\n",
 		printk(KERN_NOTICE "lockd: failed to monitor %s\n",
@@ -562,15 +564,32 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 		goto out;
 		goto out;
 	}
 	}
 
 
-	do {
-		if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) {
-			if (resp->status != NLM_LCK_BLOCKED)
-				break;
-			status = nlmclnt_block(host, fl, &resp->status);
-		}
+	if (req->a_args.block) {
+		status = nlmclnt_prepare_block(req, host, fl);
 		if (status < 0)
 		if (status < 0)
 			goto out;
 			goto out;
-	} while (resp->status == NLM_LCK_BLOCKED && req->a_args.block);
+	}
+	for(;;) {
+		status = nlmclnt_call(req, NLMPROC_LOCK);
+		if (status < 0)
+			goto out_unblock;
+		if (resp->status != NLM_LCK_BLOCKED)
+			break;
+		/* Wait on an NLM blocking lock */
+		timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
+		/* Did a reclaimer thread notify us of a server reboot? */
+		if (resp->status ==  NLM_LCK_DENIED_GRACE_PERIOD)
+			continue;
+		if (resp->status != NLM_LCK_BLOCKED)
+			break;
+		if (timeout >= 0)
+			continue;
+		/* We were interrupted. Send a CANCEL request to the server
+		 * and exit
+		 */
+		status = (int)timeout;
+		goto out_unblock;
+	}
 
 
 	if (resp->status == NLM_LCK_GRANTED) {
 	if (resp->status == NLM_LCK_GRANTED) {
 		fl->fl_u.nfs_fl.state = host->h_state;
 		fl->fl_u.nfs_fl.state = host->h_state;
@@ -579,6 +598,11 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 		do_vfs_lock(fl);
 		do_vfs_lock(fl);
 	}
 	}
 	status = nlm_stat_to_errno(resp->status);
 	status = nlm_stat_to_errno(resp->status);
+out_unblock:
+	nlmclnt_finish_block(req);
+	/* Cancel the blocked request if it is still pending */
+	if (resp->status == NLM_LCK_BLOCKED)
+		nlmclnt_cancel(host, fl);
 out:
 out:
 	nlmclnt_release_lockargs(req);
 	nlmclnt_release_lockargs(req);
 	return status;
 	return status;

+ 3 - 5
fs/lockd/host.c

@@ -189,17 +189,15 @@ nlm_bind_host(struct nlm_host *host)
 			goto forgetit;
 			goto forgetit;
 
 
 		xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
 		xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
+		xprt->nocong = 1;	/* No congestion control for NLM */
+		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 
 		/* Existing NLM servers accept AUTH_UNIX only */
 		/* Existing NLM servers accept AUTH_UNIX only */
 		clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
 		clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
 					host->h_version, RPC_AUTH_UNIX);
 					host->h_version, RPC_AUTH_UNIX);
-		if (IS_ERR(clnt)) {
-			xprt_destroy(xprt);
+		if (IS_ERR(clnt))
 			goto forgetit;
 			goto forgetit;
-		}
 		clnt->cl_autobind = 1;	/* turn on pmap queries */
 		clnt->cl_autobind = 1;	/* turn on pmap queries */
-		xprt->nocong = 1;	/* No congestion control for NLM */
-		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 
 		host->h_rpcclnt = clnt;
 		host->h_rpcclnt = clnt;
 	}
 	}

+ 3 - 4
fs/lockd/mon.c

@@ -115,20 +115,19 @@ nsm_create(void)
 	xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL);
 	xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL);
 	if (IS_ERR(xprt))
 	if (IS_ERR(xprt))
 		return (struct rpc_clnt *)xprt;
 		return (struct rpc_clnt *)xprt;
+	xprt->resvport = 1;	/* NSM requires a reserved port */
 
 
 	clnt = rpc_create_client(xprt, "localhost",
 	clnt = rpc_create_client(xprt, "localhost",
 				&nsm_program, SM_VERSION,
 				&nsm_program, SM_VERSION,
 				RPC_AUTH_NULL);
 				RPC_AUTH_NULL);
 	if (IS_ERR(clnt))
 	if (IS_ERR(clnt))
-		goto out_destroy;
+		goto out_err;
 	clnt->cl_softrtry = 1;
 	clnt->cl_softrtry = 1;
 	clnt->cl_chatty   = 1;
 	clnt->cl_chatty   = 1;
 	clnt->cl_oneshot  = 1;
 	clnt->cl_oneshot  = 1;
-	xprt->resvport = 1;	/* NSM requires a reserved port */
 	return clnt;
 	return clnt;
 
 
-out_destroy:
-	xprt_destroy(xprt);
+out_err:
 	return clnt;
 	return clnt;
 }
 }
 
 

+ 6 - 0
fs/locks.c

@@ -1548,6 +1548,8 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
 
 
 	if (filp->f_op && filp->f_op->lock) {
 	if (filp->f_op && filp->f_op->lock) {
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
+		if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+			file_lock.fl_ops->fl_release_private(&file_lock);
 		if (error < 0)
 		if (error < 0)
 			goto out;
 			goto out;
 		else
 		else
@@ -1690,6 +1692,8 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 
 
 	if (filp->f_op && filp->f_op->lock) {
 	if (filp->f_op && filp->f_op->lock) {
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
+		if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+			file_lock.fl_ops->fl_release_private(&file_lock);
 		if (error < 0)
 		if (error < 0)
 			goto out;
 			goto out;
 		else
 		else
@@ -1873,6 +1877,8 @@ void locks_remove_flock(struct file *filp)
 			.fl_end = OFFSET_MAX,
 			.fl_end = OFFSET_MAX,
 		};
 		};
 		filp->f_op->flock(filp, F_SETLKW, &fl);
 		filp->f_op->flock(filp, F_SETLKW, &fl);
+		if (fl.fl_ops && fl.fl_ops->fl_release_private)
+			fl.fl_ops->fl_release_private(&fl);
 	}
 	}
 
 
 	lock_kernel();
 	lock_kernel();

+ 1 - 0
fs/nfs/Makefile

@@ -8,6 +8,7 @@ nfs-y 			:= dir.o file.o inode.o nfs2xdr.o pagelist.o \
 			   proc.o read.o symlink.o unlink.o write.o
 			   proc.o read.o symlink.o unlink.o write.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
+nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
 nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
 nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
 			   delegation.o idmap.o \
 			   delegation.o idmap.o \
 			   callback.o callback_xdr.o callback_proc.o
 			   callback.o callback_xdr.o callback_proc.o

+ 1 - 0
fs/nfs/callback.c

@@ -14,6 +14,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 #include "callback.h"
 #include "callback.h"
 
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 #define NFSDBG_FACILITY NFSDBG_CALLBACK

+ 1 - 0
fs/nfs/callback_proc.c

@@ -8,6 +8,7 @@
 #include <linux/config.h>
 #include <linux/config.h>
 #include <linux/nfs4.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 #include "callback.h"
 #include "callback.h"
 #include "delegation.h"
 #include "delegation.h"
 
 

+ 1 - 1
fs/nfs/callback_xdr.c

@@ -10,6 +10,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/nfs4.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 #include "callback.h"
 #include "callback.h"
 
 
 #define CB_OP_TAGLEN_MAXSZ	(512)
 #define CB_OP_TAGLEN_MAXSZ	(512)
@@ -410,7 +411,6 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
 	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
 	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
 
 
 	p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
 	p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
-	rqstp->rq_res.head[0].iov_len = PAGE_SIZE;
 	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
 	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
 
 
 	decode_compound_hdr_arg(&xdr_in, &hdr_arg);
 	decode_compound_hdr_arg(&xdr_in, &hdr_arg);

+ 1 - 0
fs/nfs/delegation.c

@@ -16,6 +16,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_xdr.h>
 #include <linux/nfs_xdr.h>
 
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 #include "delegation.h"
 
 
 static struct nfs_delegation *nfs_alloc_delegation(void)
 static struct nfs_delegation *nfs_alloc_delegation(void)

+ 130 - 30
fs/nfs/dir.c

@@ -32,6 +32,7 @@
 #include <linux/smp_lock.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
 #include <linux/namei.h>
 
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 #include "delegation.h"
 
 
 #define NFS_PARANOIA 1
 #define NFS_PARANOIA 1
@@ -50,8 +51,10 @@ static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
 static int nfs_rename(struct inode *, struct dentry *,
 static int nfs_rename(struct inode *, struct dentry *,
 		      struct inode *, struct dentry *);
 		      struct inode *, struct dentry *);
 static int nfs_fsync_dir(struct file *, struct dentry *, int);
 static int nfs_fsync_dir(struct file *, struct dentry *, int);
+static loff_t nfs_llseek_dir(struct file *, loff_t, int);
 
 
 struct file_operations nfs_dir_operations = {
 struct file_operations nfs_dir_operations = {
+	.llseek		= nfs_llseek_dir,
 	.read		= generic_read_dir,
 	.read		= generic_read_dir,
 	.readdir	= nfs_readdir,
 	.readdir	= nfs_readdir,
 	.open		= nfs_opendir,
 	.open		= nfs_opendir,
@@ -74,6 +77,27 @@ struct inode_operations nfs_dir_inode_operations = {
 	.setattr	= nfs_setattr,
 	.setattr	= nfs_setattr,
 };
 };
 
 
+#ifdef CONFIG_NFS_V3
+struct inode_operations nfs3_dir_inode_operations = {
+	.create		= nfs_create,
+	.lookup		= nfs_lookup,
+	.link		= nfs_link,
+	.unlink		= nfs_unlink,
+	.symlink	= nfs_symlink,
+	.mkdir		= nfs_mkdir,
+	.rmdir		= nfs_rmdir,
+	.mknod		= nfs_mknod,
+	.rename		= nfs_rename,
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+	.listxattr	= nfs3_listxattr,
+	.getxattr	= nfs3_getxattr,
+	.setxattr	= nfs3_setxattr,
+	.removexattr	= nfs3_removexattr,
+};
+#endif  /* CONFIG_NFS_V3 */
+
 #ifdef CONFIG_NFS_V4
 #ifdef CONFIG_NFS_V4
 
 
 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
@@ -90,6 +114,9 @@ struct inode_operations nfs4_dir_inode_operations = {
 	.permission	= nfs_permission,
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
 	.setattr	= nfs_setattr,
+	.getxattr       = nfs4_getxattr,
+	.setxattr       = nfs4_setxattr,
+	.listxattr      = nfs4_listxattr,
 };
 };
 
 
 #endif /* CONFIG_NFS_V4 */
 #endif /* CONFIG_NFS_V4 */
@@ -116,7 +143,8 @@ typedef struct {
 	struct page	*page;
 	struct page	*page;
 	unsigned long	page_index;
 	unsigned long	page_index;
 	u32		*ptr;
 	u32		*ptr;
-	u64		target;
+	u64		*dir_cookie;
+	loff_t		current_index;
 	struct nfs_entry *entry;
 	struct nfs_entry *entry;
 	decode_dirent_t	decode;
 	decode_dirent_t	decode;
 	int		plus;
 	int		plus;
@@ -164,12 +192,10 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
 	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
 	/* Ensure consistent page alignment of the data.
 	/* Ensure consistent page alignment of the data.
 	 * Note: assumes we have exclusive access to this mapping either
 	 * Note: assumes we have exclusive access to this mapping either
-	 *	 throught inode->i_sem or some other mechanism.
+	 *	 through inode->i_sem or some other mechanism.
 	 */
 	 */
-	if (page->index == 0) {
-		invalidate_inode_pages(inode->i_mapping);
-		NFS_I(inode)->readdir_timestamp = timestamp;
-	}
+	if (page->index == 0)
+		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
 	unlock_page(page);
 	unlock_page(page);
 	return 0;
 	return 0;
  error:
  error:
@@ -202,22 +228,22 @@ void dir_page_release(nfs_readdir_descriptor_t *desc)
 
 
 /*
 /*
  * Given a pointer to a buffer that has already been filled by a call
  * Given a pointer to a buffer that has already been filled by a call
- * to readdir, find the next entry.
+ * to readdir, find the next entry with cookie '*desc->dir_cookie'.
  *
  *
  * If the end of the buffer has been reached, return -EAGAIN, if not,
  * If the end of the buffer has been reached, return -EAGAIN, if not,
  * return the offset within the buffer of the next entry to be
  * return the offset within the buffer of the next entry to be
  * read.
  * read.
  */
  */
 static inline
 static inline
-int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page)
+int find_dirent(nfs_readdir_descriptor_t *desc)
 {
 {
 	struct nfs_entry *entry = desc->entry;
 	struct nfs_entry *entry = desc->entry;
 	int		loop_count = 0,
 	int		loop_count = 0,
 			status;
 			status;
 
 
 	while((status = dir_decode(desc)) == 0) {
 	while((status = dir_decode(desc)) == 0) {
-		dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie);
-		if (entry->prev_cookie == desc->target)
+		dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+		if (entry->prev_cookie == *desc->dir_cookie)
 			break;
 			break;
 		if (loop_count++ > 200) {
 		if (loop_count++ > 200) {
 			loop_count = 0;
 			loop_count = 0;
@@ -229,8 +255,44 @@ int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page)
 }
 }
 
 
 /*
 /*
- * Find the given page, and call find_dirent() in order to try to
- * return the next entry.
+ * Given a pointer to a buffer that has already been filled by a call
+ * to readdir, find the entry at offset 'desc->file->f_pos'.
+ *
+ * If the end of the buffer has been reached, return -EAGAIN, if not,
+ * return the offset within the buffer of the next entry to be
+ * read.
+ */
+static inline
+int find_dirent_index(nfs_readdir_descriptor_t *desc)
+{
+	struct nfs_entry *entry = desc->entry;
+	int		loop_count = 0,
+			status;
+
+	for(;;) {
+		status = dir_decode(desc);
+		if (status)
+			break;
+
+		dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+
+		if (desc->file->f_pos == desc->current_index) {
+			*desc->dir_cookie = entry->cookie;
+			break;
+		}
+		desc->current_index++;
+		if (loop_count++ > 200) {
+			loop_count = 0;
+			schedule();
+		}
+	}
+	dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
+	return status;
+}
+
+/*
+ * Find the given page, and call find_dirent() or find_dirent_index in
+ * order to try to return the next entry.
  */
  */
 static inline
 static inline
 int find_dirent_page(nfs_readdir_descriptor_t *desc)
 int find_dirent_page(nfs_readdir_descriptor_t *desc)
@@ -253,7 +315,10 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
 	/* NOTE: Someone else may have changed the READDIRPLUS flag */
 	/* NOTE: Someone else may have changed the READDIRPLUS flag */
 	desc->page = page;
 	desc->page = page;
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
-	status = find_dirent(desc, page);
+	if (*desc->dir_cookie != 0)
+		status = find_dirent(desc);
+	else
+		status = find_dirent_index(desc);
 	if (status < 0)
 	if (status < 0)
 		dir_page_release(desc);
 		dir_page_release(desc);
  out:
  out:
@@ -268,7 +333,8 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
  * Recurse through the page cache pages, and return a
  * Recurse through the page cache pages, and return a
  * filled nfs_entry structure of the next directory entry if possible.
  * filled nfs_entry structure of the next directory entry if possible.
  *
  *
- * The target for the search is 'desc->target'.
+ * The target for the search is '*desc->dir_cookie' if non-0,
+ * 'desc->file->f_pos' otherwise
  */
  */
 static inline
 static inline
 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
@@ -276,7 +342,16 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 	int		loop_count = 0;
 	int		loop_count = 0;
 	int		res;
 	int		res;
 
 
-	dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target);
+	/* Always search-by-index from the beginning of the cache */
+	if (*desc->dir_cookie == 0) {
+		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+		desc->page_index = 0;
+		desc->entry->cookie = desc->entry->prev_cookie = 0;
+		desc->entry->eof = 0;
+		desc->current_index = 0;
+	} else
+		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+
 	for (;;) {
 	for (;;) {
 		res = find_dirent_page(desc);
 		res = find_dirent_page(desc);
 		if (res != -EAGAIN)
 		if (res != -EAGAIN)
@@ -313,7 +388,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	int		loop_count = 0,
 	int		loop_count = 0,
 			res;
 			res;
 
 
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target);
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
 
 
 	for(;;) {
 	for(;;) {
 		unsigned d_type = DT_UNKNOWN;
 		unsigned d_type = DT_UNKNOWN;
@@ -333,10 +408,11 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 		}
 		}
 
 
 		res = filldir(dirent, entry->name, entry->len, 
 		res = filldir(dirent, entry->name, entry->len, 
-			      entry->prev_cookie, fileid, d_type);
+			      file->f_pos, fileid, d_type);
 		if (res < 0)
 		if (res < 0)
 			break;
 			break;
-		file->f_pos = desc->target = entry->cookie;
+		file->f_pos++;
+		*desc->dir_cookie = entry->cookie;
 		if (dir_decode(desc) != 0) {
 		if (dir_decode(desc) != 0) {
 			desc->page_index ++;
 			desc->page_index ++;
 			break;
 			break;
@@ -349,7 +425,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	dir_page_release(desc);
 	dir_page_release(desc);
 	if (dentry != NULL)
 	if (dentry != NULL)
 		dput(dentry);
 		dput(dentry);
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
+	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
 	return res;
 	return res;
 }
 }
 
 
@@ -375,14 +451,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct page	*page = NULL;
 	struct page	*page = NULL;
 	int		status;
 	int		status;
 
 
-	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target);
+	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
 
 
 	page = alloc_page(GFP_HIGHUSER);
 	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
 	if (!page) {
 		status = -ENOMEM;
 		status = -ENOMEM;
 		goto out;
 		goto out;
 	}
 	}
-	desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target,
+	desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, *desc->dir_cookie,
 						page,
 						page,
 						NFS_SERVER(inode)->dtsize,
 						NFS_SERVER(inode)->dtsize,
 						desc->plus);
 						desc->plus);
@@ -391,7 +467,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
 	if (desc->error >= 0) {
 	if (desc->error >= 0) {
 		if ((status = dir_decode(desc)) == 0)
 		if ((status = dir_decode(desc)) == 0)
-			desc->entry->prev_cookie = desc->target;
+			desc->entry->prev_cookie = *desc->dir_cookie;
 	} else
 	} else
 		status = -EIO;
 		status = -EIO;
 	if (status < 0)
 	if (status < 0)
@@ -412,8 +488,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	goto out;
 	goto out;
 }
 }
 
 
-/* The file offset position is now represented as a true offset into the
- * page cache as is the case in most of the other filesystems.
+/* The file offset position represents the dirent entry number.  A
+   last cookie cache takes care of the common case of reading the
+   whole directory.
  */
  */
 static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 {
@@ -435,15 +512,15 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	}
 	}
 
 
 	/*
 	/*
-	 * filp->f_pos points to the file offset in the page cache.
-	 * but if the cache has meanwhile been zapped, we need to
-	 * read from the last dirent to revalidate f_pos
-	 * itself.
+	 * filp->f_pos points to the dirent entry number.
+	 * *desc->dir_cookie has the cookie for the next entry. We have
+	 * to either find the entry with the appropriate number or
+	 * revalidate the cookie.
 	 */
 	 */
 	memset(desc, 0, sizeof(*desc));
 	memset(desc, 0, sizeof(*desc));
 
 
 	desc->file = filp;
 	desc->file = filp;
-	desc->target = filp->f_pos;
+	desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
 	desc->plus = NFS_USE_READDIRPLUS(inode);
 	desc->plus = NFS_USE_READDIRPLUS(inode);
 
 
@@ -455,9 +532,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 
 	while(!desc->entry->eof) {
 	while(!desc->entry->eof) {
 		res = readdir_search_pagecache(desc);
 		res = readdir_search_pagecache(desc);
+
 		if (res == -EBADCOOKIE) {
 		if (res == -EBADCOOKIE) {
 			/* This means either end of directory */
 			/* This means either end of directory */
-			if (desc->entry->cookie != desc->target) {
+			if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) {
 				/* Or that the server has 'lost' a cookie */
 				/* Or that the server has 'lost' a cookie */
 				res = uncached_readdir(desc, dirent, filldir);
 				res = uncached_readdir(desc, dirent, filldir);
 				if (res >= 0)
 				if (res >= 0)
@@ -490,6 +568,28 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	return 0;
 	return 0;
 }
 }
 
 
+loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
+{
+	down(&filp->f_dentry->d_inode->i_sem);
+	switch (origin) {
+		case 1:
+			offset += filp->f_pos;
+		case 0:
+			if (offset >= 0)
+				break;
+		default:
+			offset = -EINVAL;
+			goto out;
+	}
+	if (offset != filp->f_pos) {
+		filp->f_pos = offset;
+		((struct nfs_open_context *)filp->private_data)->dir_cookie = 0;
+	}
+out:
+	up(&filp->f_dentry->d_inode->i_sem);
+	return offset;
+}
+
 /*
 /*
  * All directory operations under NFS are synchronous, so fsync()
  * All directory operations under NFS are synchronous, so fsync()
  * is a dummy operation.
  * is a dummy operation.

+ 1 - 1
fs/nfs/direct.c

@@ -517,7 +517,7 @@ retry:
 	result = tot_bytes;
 	result = tot_bytes;
 
 
 out:
 out:
-	nfs_end_data_update_defer(inode);
+	nfs_end_data_update(inode);
 	nfs_writedata_free(wdata);
 	nfs_writedata_free(wdata);
 	return result;
 	return result;
 
 

+ 41 - 7
fs/nfs/file.c

@@ -71,6 +71,18 @@ struct inode_operations nfs_file_inode_operations = {
 	.setattr	= nfs_setattr,
 	.setattr	= nfs_setattr,
 };
 };
 
 
+#ifdef CONFIG_NFS_V3
+struct inode_operations nfs3_file_inode_operations = {
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+	.listxattr	= nfs3_listxattr,
+	.getxattr	= nfs3_getxattr,
+	.setxattr	= nfs3_setxattr,
+	.removexattr	= nfs3_removexattr,
+};
+#endif  /* CONFIG_NFS_v3 */
+
 /* Hack for future NFS swap support */
 /* Hack for future NFS swap support */
 #ifndef IS_SWAPFILE
 #ifndef IS_SWAPFILE
 # define IS_SWAPFILE(inode)	(0)
 # define IS_SWAPFILE(inode)	(0)
@@ -115,6 +127,21 @@ nfs_file_release(struct inode *inode, struct file *filp)
 	return NFS_PROTO(inode)->file_release(inode, filp);
 	return NFS_PROTO(inode)->file_release(inode, filp);
 }
 }
 
 
+/**
+ * nfs_revalidate_file - Revalidate the page cache & related metadata
+ * @inode - pointer to inode struct
+ * @file - pointer to file
+ */
+static int nfs_revalidate_file(struct inode *inode, struct file *filp)
+{
+	int retval = 0;
+
+	if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode))
+		retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	nfs_revalidate_mapping(inode, filp->f_mapping);
+	return 0;
+}
+
 /**
 /**
  * nfs_revalidate_size - Revalidate the file size
  * nfs_revalidate_size - Revalidate the file size
  * @inode - pointer to inode struct
  * @inode - pointer to inode struct
@@ -137,7 +164,8 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
 		goto force_reval;
 		goto force_reval;
 	if (nfsi->npages != 0)
 	if (nfsi->npages != 0)
 		return 0;
 		return 0;
-	return nfs_revalidate_inode(server, inode);
+	if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
+		return 0;
 force_reval:
 force_reval:
 	return __nfs_revalidate_inode(server, inode);
 	return __nfs_revalidate_inode(server, inode);
 }
 }
@@ -198,7 +226,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		(unsigned long) count, (unsigned long) pos);
 		(unsigned long) count, (unsigned long) pos);
 
 
-	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	result = nfs_revalidate_file(inode, iocb->ki_filp);
 	if (!result)
 	if (!result)
 		result = generic_file_aio_read(iocb, buf, count, pos);
 		result = generic_file_aio_read(iocb, buf, count, pos);
 	return result;
 	return result;
@@ -216,7 +244,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		(unsigned long) count, (unsigned long long) *ppos);
 		(unsigned long) count, (unsigned long long) *ppos);
 
 
-	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	res = nfs_revalidate_file(inode, filp);
 	if (!res)
 	if (!res)
 		res = generic_file_sendfile(filp, ppos, count, actor, target);
 		res = generic_file_sendfile(filp, ppos, count, actor, target);
 	return res;
 	return res;
@@ -232,7 +260,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
 	dfprintk(VFS, "nfs: mmap(%s/%s)\n",
 	dfprintk(VFS, "nfs: mmap(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
 
-	status = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	status = nfs_revalidate_file(inode, file);
 	if (!status)
 	if (!status)
 		status = generic_file_mmap(file, vma);
 		status = generic_file_mmap(file, vma);
 	return status;
 	return status;
@@ -321,9 +349,15 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
 	result = -EBUSY;
 	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
 	if (IS_SWAPFILE(inode))
 		goto out_swapfile;
 		goto out_swapfile;
-	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
-	if (result)
-		goto out;
+	/*
+	 * O_APPEND implies that we must revalidate the file length.
+	 */
+	if (iocb->ki_filp->f_flags & O_APPEND) {
+		result = nfs_revalidate_file_size(inode, iocb->ki_filp);
+		if (result)
+			goto out;
+	}
+	nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 
 
 	result = count;
 	result = count;
 	if (!count)
 	if (!count)

+ 1 - 0
fs/nfs/idmap.c

@@ -50,6 +50,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs.h>
 
 
 #include <linux/nfs_idmap.h>
 #include <linux/nfs_idmap.h>
+#include "nfs4_fs.h"
 
 
 #define IDMAP_HASH_SZ          128
 #define IDMAP_HASH_SZ          128
 
 

+ 255 - 172
fs/nfs/inode.c

@@ -39,6 +39,7 @@
 #include <asm/system.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 #include "delegation.h"
 
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 #define NFSDBG_FACILITY		NFSDBG_VFS
@@ -63,6 +64,7 @@ static void nfs_clear_inode(struct inode *);
 static void nfs_umount_begin(struct super_block *);
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct super_block *, struct kstatfs *);
 static int  nfs_statfs(struct super_block *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+static void nfs_zap_acl_cache(struct inode *);
 
 
 static struct rpc_program	nfs_program;
 static struct rpc_program	nfs_program;
 
 
@@ -106,6 +108,21 @@ static struct rpc_program	nfs_program = {
 	.pipe_dir_name		= "/nfs",
 	.pipe_dir_name		= "/nfs",
 };
 };
 
 
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
+static struct rpc_version *	nfsacl_version[] = {
+	[3]			= &nfsacl_version3,
+};
+
+struct rpc_program		nfsacl_program = {
+	.name =			"nfsacl",
+	.number =		NFS_ACL_PROGRAM,
+	.nrvers =		sizeof(nfsacl_version) / sizeof(nfsacl_version[0]),
+	.version =		nfsacl_version,
+	.stats =		&nfsacl_rpcstat,
+};
+#endif  /* CONFIG_NFS_V3_ACL */
+
 static inline unsigned long
 static inline unsigned long
 nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 {
 {
@@ -118,7 +135,7 @@ nfs_write_inode(struct inode *inode, int sync)
 	int flags = sync ? FLUSH_WAIT : 0;
 	int flags = sync ? FLUSH_WAIT : 0;
 	int ret;
 	int ret;
 
 
-	ret = nfs_commit_inode(inode, 0, 0, flags);
+	ret = nfs_commit_inode(inode, flags);
 	if (ret < 0)
 	if (ret < 0)
 		return ret;
 		return ret;
 	return 0;
 	return 0;
@@ -140,10 +157,6 @@ nfs_delete_inode(struct inode * inode)
 	clear_inode(inode);
 	clear_inode(inode);
 }
 }
 
 
-/*
- * For the moment, the only task for the NFS clear_inode method is to
- * release the mmap credential
- */
 static void
 static void
 nfs_clear_inode(struct inode *inode)
 nfs_clear_inode(struct inode *inode)
 {
 {
@@ -152,6 +165,7 @@ nfs_clear_inode(struct inode *inode)
 
 
 	nfs_wb_all(inode);
 	nfs_wb_all(inode);
 	BUG_ON (!list_empty(&nfsi->open_files));
 	BUG_ON (!list_empty(&nfsi->open_files));
+	nfs_zap_acl_cache(inode);
 	cred = nfsi->cache_access.cred;
 	cred = nfsi->cache_access.cred;
 	if (cred)
 	if (cred)
 		put_rpccred(cred);
 		put_rpccred(cred);
@@ -161,11 +175,13 @@ nfs_clear_inode(struct inode *inode)
 void
 void
 nfs_umount_begin(struct super_block *sb)
 nfs_umount_begin(struct super_block *sb)
 {
 {
-	struct nfs_server *server = NFS_SB(sb);
-	struct rpc_clnt	*rpc;
+	struct rpc_clnt	*rpc = NFS_SB(sb)->client;
 
 
 	/* -EIO all pending I/O */
 	/* -EIO all pending I/O */
-	if ((rpc = server->client) != NULL)
+	if (!IS_ERR(rpc))
+		rpc_killall_tasks(rpc);
+	rpc = NFS_SB(sb)->client_acl;
+	if (!IS_ERR(rpc))
 		rpc_killall_tasks(rpc);
 		rpc_killall_tasks(rpc);
 }
 }
 
 
@@ -366,13 +382,15 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP,
 	xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP,
 				 &server->addr, &timeparms);
 				 &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
 	if (IS_ERR(xprt)) {
-		printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+		dprintk("%s: cannot create RPC transport. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(xprt));
 		return (struct rpc_clnt *)xprt;
 		return (struct rpc_clnt *)xprt;
 	}
 	}
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 				 server->rpc_ops->version, data->pseudoflavor);
 				 server->rpc_ops->version, data->pseudoflavor);
 	if (IS_ERR(clnt)) {
 	if (IS_ERR(clnt)) {
-		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+		dprintk("%s: cannot create RPC client. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(xprt));
 		goto out_fail;
 		goto out_fail;
 	}
 	}
 
 
@@ -383,7 +401,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	return clnt;
 	return clnt;
 
 
 out_fail:
 out_fail:
-	xprt_destroy(xprt);
 	return clnt;
 	return clnt;
 }
 }
 
 
@@ -427,21 +444,16 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 
 
 	/* Check NFS protocol revision and initialize RPC op vector
 	/* Check NFS protocol revision and initialize RPC op vector
 	 * and file handle pool. */
 	 * and file handle pool. */
-	if (server->flags & NFS_MOUNT_VER3) {
 #ifdef CONFIG_NFS_V3
 #ifdef CONFIG_NFS_V3
+	if (server->flags & NFS_MOUNT_VER3) {
 		server->rpc_ops = &nfs_v3_clientops;
 		server->rpc_ops = &nfs_v3_clientops;
 		server->caps |= NFS_CAP_READDIRPLUS;
 		server->caps |= NFS_CAP_READDIRPLUS;
-		if (data->version < 4) {
-			printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
-			return -EIO;
-		}
-#else
-		printk(KERN_NOTICE "NFS: NFSv3 not supported.\n");
-		return -EIO;
-#endif
 	} else {
 	} else {
 		server->rpc_ops = &nfs_v2_clientops;
 		server->rpc_ops = &nfs_v2_clientops;
 	}
 	}
+#else
+	server->rpc_ops = &nfs_v2_clientops;
+#endif
 
 
 	/* Fill in pseudoflavor for mount version < 5 */
 	/* Fill in pseudoflavor for mount version < 5 */
 	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
 	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
@@ -455,17 +467,34 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 		return PTR_ERR(server->client);
 		return PTR_ERR(server->client);
 	/* RFC 2623, sec 2.3.2 */
 	/* RFC 2623, sec 2.3.2 */
 	if (authflavor != RPC_AUTH_UNIX) {
 	if (authflavor != RPC_AUTH_UNIX) {
+		struct rpc_auth *auth;
+
 		server->client_sys = rpc_clone_client(server->client);
 		server->client_sys = rpc_clone_client(server->client);
 		if (IS_ERR(server->client_sys))
 		if (IS_ERR(server->client_sys))
 			return PTR_ERR(server->client_sys);
 			return PTR_ERR(server->client_sys);
-		if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys))
-			return -ENOMEM;
+		auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
+		if (IS_ERR(auth))
+			return PTR_ERR(auth);
 	} else {
 	} else {
 		atomic_inc(&server->client->cl_count);
 		atomic_inc(&server->client->cl_count);
 		server->client_sys = server->client;
 		server->client_sys = server->client;
 	}
 	}
-
 	if (server->flags & NFS_MOUNT_VER3) {
 	if (server->flags & NFS_MOUNT_VER3) {
+#ifdef CONFIG_NFS_V3_ACL
+		if (!(server->flags & NFS_MOUNT_NOACL)) {
+			server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
+			/* No errors! Assume that Sun nfsacls are supported */
+			if (!IS_ERR(server->client_acl))
+				server->caps |= NFS_CAP_ACLS;
+		}
+#else
+		server->flags &= ~NFS_MOUNT_NOACL;
+#endif /* CONFIG_NFS_V3_ACL */
+		/*
+		 * The VFS shouldn't apply the umask to mode bits. We will
+		 * do so ourselves when necessary.
+		 */
+		sb->s_flags |= MS_POSIXACL;
 		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
 		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
 			server->namelen = NFS3_MAXNAMLEN;
 			server->namelen = NFS3_MAXNAMLEN;
 		sb->s_time_gran = 1;
 		sb->s_time_gran = 1;
@@ -549,6 +578,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
 		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
 		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
+		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 		{ 0, NULL, NULL }
 	};
 	};
 	struct proc_nfs_info *nfs_infop;
 	struct proc_nfs_info *nfs_infop;
@@ -590,9 +620,19 @@ nfs_zap_caches(struct inode *inode)
 
 
 	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
 	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
 	else
 	else
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
+}
+
+static void nfs_zap_acl_cache(struct inode *inode)
+{
+	void (*clear_acl_cache)(struct inode *);
+
+	clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache;
+	if (clear_acl_cache != NULL)
+		clear_acl_cache(inode);
+	NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL;
 }
 }
 
 
 /*
 /*
@@ -689,7 +729,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
 		 */
-		inode->i_op = &nfs_file_inode_operations;
+		inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = &nfs_file_operations;
 			inode->i_fop = &nfs_file_operations;
 			inode->i_data.a_ops = &nfs_file_aops;
 			inode->i_data.a_ops = &nfs_file_aops;
@@ -792,7 +832,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 		}
 		}
 	}
 	}
 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
-		NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
+		NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 	nfs_end_data_update(inode);
 	nfs_end_data_update(inode);
 	unlock_kernel();
 	unlock_kernel();
 	return error;
 	return error;
@@ -851,7 +891,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
 		ctx->state = NULL;
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
 		ctx->lockowner = current->files;
 		ctx->error = 0;
 		ctx->error = 0;
-		init_waitqueue_head(&ctx->waitq);
+		ctx->dir_cookie = 0;
 	}
 	}
 	return ctx;
 	return ctx;
 }
 }
@@ -1015,6 +1055,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		goto out;
 		goto out;
 	}
 	}
 	flags = nfsi->flags;
 	flags = nfsi->flags;
+	nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE;
 	/*
 	/*
 	 * We may need to keep the attributes marked as invalid if
 	 * We may need to keep the attributes marked as invalid if
 	 * we raced with nfs_end_attr_update().
 	 * we raced with nfs_end_attr_update().
@@ -1022,21 +1063,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	if (verifier == nfsi->cache_change_attribute)
 	if (verifier == nfsi->cache_change_attribute)
 		nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
 		nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
 	/* Do the page cache invalidation */
 	/* Do the page cache invalidation */
-	if (flags & NFS_INO_INVALID_DATA) {
-		if (S_ISREG(inode->i_mode)) {
-			if (filemap_fdatawrite(inode->i_mapping) == 0)
-				filemap_fdatawait(inode->i_mapping);
-			nfs_wb_all(inode);
-		}
-		nfsi->flags &= ~NFS_INO_INVALID_DATA;
-		invalidate_inode_pages2(inode->i_mapping);
-		memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
-		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
-				inode->i_sb->s_id,
-				(long long)NFS_FILEID(inode));
-		/* This ensures we revalidate dentries */
-		nfsi->cache_change_attribute++;
-	}
+	nfs_revalidate_mapping(inode, inode->i_mapping);
+	if (flags & NFS_INO_INVALID_ACL)
+		nfs_zap_acl_cache(inode);
 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
 		inode->i_sb->s_id,
 		inode->i_sb->s_id,
 		(long long)NFS_FILEID(inode));
 		(long long)NFS_FILEID(inode));
@@ -1073,6 +1102,34 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	return __nfs_revalidate_inode(server, inode);
 	return __nfs_revalidate_inode(server, inode);
 }
 }
 
 
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ */
+void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	if (nfsi->flags & NFS_INO_INVALID_DATA) {
+		if (S_ISREG(inode->i_mode)) {
+			if (filemap_fdatawrite(mapping) == 0)
+				filemap_fdatawait(mapping);
+			nfs_wb_all(inode);
+		}
+		invalidate_inode_pages2(mapping);
+		nfsi->flags &= ~NFS_INO_INVALID_DATA;
+		if (S_ISDIR(inode->i_mode)) {
+			memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+			/* This ensures we revalidate child dentries */
+			nfsi->cache_change_attribute++;
+		}
+		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode));
+	}
+}
+
 /**
 /**
  * nfs_begin_data_update
  * nfs_begin_data_update
  * @inode - pointer to inode
  * @inode - pointer to inode
@@ -1105,27 +1162,6 @@ void nfs_end_data_update(struct inode *inode)
 	atomic_dec(&nfsi->data_updates);
 	atomic_dec(&nfsi->data_updates);
 }
 }
 
 
-/**
- * nfs_end_data_update_defer
- * @inode - pointer to inode
- * Declare end of the operations that will update file data
- * This will defer marking the inode as needing revalidation
- * unless there are no other pending updates.
- */
-void nfs_end_data_update_defer(struct inode *inode)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	if (atomic_dec_and_test(&nfsi->data_updates)) {
-		/* Mark the attribute cache for revalidation */
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
-		/* Directories and symlinks: invalidate page cache too */
-		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-			nfsi->flags |= NFS_INO_INVALID_DATA;
-		nfsi->cache_change_attribute ++;
-	}
-}
-
 /**
 /**
  * nfs_refresh_inode - verify consistency of the inode attribute cache
  * nfs_refresh_inode - verify consistency of the inode attribute cache
  * @inode - pointer to inode
  * @inode - pointer to inode
@@ -1152,8 +1188,11 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
 		if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
 				&& nfsi->change_attr == fattr->pre_change_attr)
 				&& nfsi->change_attr == fattr->pre_change_attr)
 			nfsi->change_attr = fattr->change_attr;
 			nfsi->change_attr = fattr->change_attr;
-		if (!data_unstable && nfsi->change_attr != fattr->change_attr)
+		if (nfsi->change_attr != fattr->change_attr) {
 			nfsi->flags |= NFS_INO_INVALID_ATTR;
 			nfsi->flags |= NFS_INO_INVALID_ATTR;
+			if (!data_unstable)
+				nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+		}
 	}
 	}
 
 
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -1176,18 +1215,22 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 	}
 	}
 
 
 	/* Verify a few of the more important attributes */
 	/* Verify a few of the more important attributes */
-	if (!data_unstable) {
-		if (!timespec_equal(&inode->i_mtime, &fattr->mtime)
-				|| cur_size != new_isize)
-			nfsi->flags |= NFS_INO_INVALID_ATTR;
-	} else if (S_ISREG(inode->i_mode) && new_isize > cur_size)
-			nfsi->flags |= NFS_INO_INVALID_ATTR;
+	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
+		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		if (!data_unstable)
+			nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+	}
+	if (cur_size != new_isize) {
+		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		if (nfsi->npages == 0)
+			nfsi->flags |= NFS_INO_REVAL_PAGECACHE;
+	}
 
 
 	/* Have any file permissions changed? */
 	/* Have any file permissions changed? */
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
 			|| inode->i_uid != fattr->uid
 			|| inode->i_uid != fattr->uid
 			|| inode->i_gid != fattr->gid)
 			|| inode->i_gid != fattr->gid)
-		nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
+		nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
 
 
 	/* Has the link count changed? */
 	/* Has the link count changed? */
 	if (inode->i_nlink != fattr->nlink)
 	if (inode->i_nlink != fattr->nlink)
@@ -1215,10 +1258,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier)
 static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier)
 {
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
-	__u64		new_size;
-	loff_t		new_isize;
+	loff_t cur_isize, new_isize;
 	unsigned int	invalid = 0;
 	unsigned int	invalid = 0;
-	loff_t		cur_isize;
 	int data_unstable;
 	int data_unstable;
 
 
 	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
 	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
@@ -1251,61 +1292,56 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 	/* Are we racing with known updates of the metadata on the server? */
 	/* Are we racing with known updates of the metadata on the server? */
 	data_unstable = ! nfs_verify_change_attribute(inode, verifier);
 	data_unstable = ! nfs_verify_change_attribute(inode, verifier);
 
 
-	/* Check if the file size agrees */
-	new_size = fattr->size;
+	/* Check if our cached file size is stale */
  	new_isize = nfs_size_to_loff_t(fattr->size);
  	new_isize = nfs_size_to_loff_t(fattr->size);
 	cur_isize = i_size_read(inode);
 	cur_isize = i_size_read(inode);
-	if (cur_isize != new_size) {
-#ifdef NFS_DEBUG_VERBOSE
-		printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
-#endif
-		/*
-		 * If we have pending writebacks, things can get
-		 * messy.
-		 */
-		if (S_ISREG(inode->i_mode) && data_unstable) {
-			if (new_isize > cur_isize) {
+	if (new_isize != cur_isize) {
+		/* Do we perhaps have any outstanding writes? */
+		if (nfsi->npages == 0) {
+			/* No, but did we race with nfs_end_data_update()? */
+			if (verifier  ==  nfsi->cache_change_attribute) {
 				inode->i_size = new_isize;
 				inode->i_size = new_isize;
-				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+				invalid |= NFS_INO_INVALID_DATA;
 			}
 			}
-		} else {
+			invalid |= NFS_INO_INVALID_ATTR;
+		} else if (new_isize > cur_isize) {
 			inode->i_size = new_isize;
 			inode->i_size = new_isize;
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 		}
 		}
+		dprintk("NFS: isize change on server for file %s/%ld\n",
+				inode->i_sb->s_id, inode->i_ino);
 	}
 	}
 
 
-	/*
-	 * Note: we don't check inode->i_mtime since pipes etc.
-	 *       can change this value in VFS without requiring a
-	 *	 cache revalidation.
-	 */
+	/* Check if the mtime agrees */
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
 		memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
 		memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
-#ifdef NFS_DEBUG_VERBOSE
-		printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
-#endif
+		dprintk("NFS: mtime change on server for file %s/%ld\n",
+				inode->i_sb->s_id, inode->i_ino);
 		if (!data_unstable)
 		if (!data_unstable)
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 	}
 	}
 
 
 	if ((fattr->valid & NFS_ATTR_FATTR_V4)
 	if ((fattr->valid & NFS_ATTR_FATTR_V4)
 	    && nfsi->change_attr != fattr->change_attr) {
 	    && nfsi->change_attr != fattr->change_attr) {
-#ifdef NFS_DEBUG_VERBOSE
-		printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n",
+		dprintk("NFS: change_attr change on server for file %s/%ld\n",
 		       inode->i_sb->s_id, inode->i_ino);
 		       inode->i_sb->s_id, inode->i_ino);
-#endif
 		nfsi->change_attr = fattr->change_attr;
 		nfsi->change_attr = fattr->change_attr;
 		if (!data_unstable)
 		if (!data_unstable)
-			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 	}
 	}
 
 
-	memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+	/* If ctime has changed we should definitely clear access+acl caches */
+	if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
+		if (!data_unstable)
+			invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+		memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+	}
 	memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
 	memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
 
 
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
 	    inode->i_uid != fattr->uid ||
 	    inode->i_uid != fattr->uid ||
 	    inode->i_gid != fattr->gid)
 	    inode->i_gid != fattr->gid)
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
+		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 
 
 	inode->i_mode = fattr->mode;
 	inode->i_mode = fattr->mode;
 	inode->i_nlink = fattr->nlink;
 	inode->i_nlink = fattr->nlink;
@@ -1385,74 +1421,95 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data)
 	int flags, const char *dev_name, void *raw_data)
 {
 {
 	int error;
 	int error;
-	struct nfs_server *server;
+	struct nfs_server *server = NULL;
 	struct super_block *s;
 	struct super_block *s;
 	struct nfs_fh *root;
 	struct nfs_fh *root;
 	struct nfs_mount_data *data = raw_data;
 	struct nfs_mount_data *data = raw_data;
 
 
-	if (!data) {
-		printk("nfs_read_super: missing data argument\n");
-		return ERR_PTR(-EINVAL);
+	s = ERR_PTR(-EINVAL);
+	if (data == NULL) {
+		dprintk("%s: missing data argument\n", __FUNCTION__);
+		goto out_err;
+	}
+	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
+		dprintk("%s: bad mount version\n", __FUNCTION__);
+		goto out_err;
 	}
 	}
+	switch (data->version) {
+		case 1:
+			data->namlen = 0;
+		case 2:
+			data->bsize  = 0;
+		case 3:
+			if (data->flags & NFS_MOUNT_VER3) {
+				dprintk("%s: mount structure version %d does not support NFSv3\n",
+						__FUNCTION__,
+						data->version);
+				goto out_err;
+			}
+			data->root.size = NFS2_FHSIZE;
+			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+		case 4:
+			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
+				dprintk("%s: mount structure version %d does not support strong security\n",
+						__FUNCTION__,
+						data->version);
+				goto out_err;
+			}
+		case 5:
+			memset(data->context, 0, sizeof(data->context));
+	}
+#ifndef CONFIG_NFS_V3
+	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
+	s = ERR_PTR(-EPROTONOSUPPORT);
+	if (data->flags & NFS_MOUNT_VER3) {
+		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
+		goto out_err;
+	}
+#endif /* CONFIG_NFS_V3 */
 
 
+	s = ERR_PTR(-ENOMEM);
 	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
 	if (!server)
-		return ERR_PTR(-ENOMEM);
+		goto out_err;
 	memset(server, 0, sizeof(struct nfs_server));
 	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	init_nfsv4_state(server);
-
-	if (data->version != NFS_MOUNT_VERSION) {
-		printk("nfs warning: mount version %s than kernel\n",
-			data->version < NFS_MOUNT_VERSION ? "older" : "newer");
-		if (data->version < 2)
-			data->namlen = 0;
-		if (data->version < 3)
-			data->bsize  = 0;
-		if (data->version < 4) {
-			data->flags &= ~NFS_MOUNT_VER3;
-			data->root.size = NFS2_FHSIZE;
-			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-		}
-		if (data->version < 5)
-			data->flags &= ~NFS_MOUNT_SECFLAVOUR;
-	}
+	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
 
 
 	root = &server->fh;
 	root = &server->fh;
 	if (data->flags & NFS_MOUNT_VER3)
 	if (data->flags & NFS_MOUNT_VER3)
 		root->size = data->root.size;
 		root->size = data->root.size;
 	else
 	else
 		root->size = NFS2_FHSIZE;
 		root->size = NFS2_FHSIZE;
+	s = ERR_PTR(-EINVAL);
 	if (root->size > sizeof(root->data)) {
 	if (root->size > sizeof(root->data)) {
-		printk("nfs_get_sb: invalid root filehandle\n");
-		kfree(server);
-		return ERR_PTR(-EINVAL);
+		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+		goto out_err;
 	}
 	}
 	memcpy(root->data, data->root.data, root->size);
 	memcpy(root->data, data->root.data, root->size);
 
 
 	/* We now require that the mount process passes the remote address */
 	/* We now require that the mount process passes the remote address */
 	memcpy(&server->addr, &data->addr, sizeof(server->addr));
 	memcpy(&server->addr, &data->addr, sizeof(server->addr));
 	if (server->addr.sin_addr.s_addr == INADDR_ANY) {
 	if (server->addr.sin_addr.s_addr == INADDR_ANY) {
-		printk("NFS: mount program didn't pass remote address!\n");
-		kfree(server);
-		return ERR_PTR(-EINVAL);
+		dprintk("%s: mount program didn't pass remote address!\n",
+				__FUNCTION__);
+		goto out_err;
 	}
 	}
 
 
-	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
-
-	if (IS_ERR(s) || s->s_root) {
-		kfree(server);
-		return s;
+	/* Fire up rpciod if not yet running */
+	s = ERR_PTR(rpciod_up());
+	if (IS_ERR(s)) {
+		dprintk("%s: couldn't start rpciod! Error = %ld\n",
+				__FUNCTION__, PTR_ERR(s));
+		goto out_err;
 	}
 	}
 
 
-	s->s_flags = flags;
+	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s) || s->s_root)
+		goto out_rpciod_down;
 
 
-	/* Fire up rpciod if not yet running */
-	if (rpciod_up() != 0) {
-		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
-		kfree(server);
-		return ERR_PTR(-EIO);
-	}
+	s->s_flags = flags;
 
 
 	error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
 	error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
 	if (error) {
 	if (error) {
@@ -1462,6 +1519,11 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 	}
 	}
 	s->s_flags |= MS_ACTIVE;
 	s->s_flags |= MS_ACTIVE;
 	return s;
 	return s;
+out_rpciod_down:
+	rpciod_down();
+out_err:
+	kfree(server);
+	return s;
 }
 }
 
 
 static void nfs_kill_super(struct super_block *s)
 static void nfs_kill_super(struct super_block *s)
@@ -1470,10 +1532,12 @@ static void nfs_kill_super(struct super_block *s)
 
 
 	kill_anon_super(s);
 	kill_anon_super(s);
 
 
-	if (server->client != NULL && !IS_ERR(server->client))
+	if (!IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
 		rpc_shutdown_client(server->client);
-	if (server->client_sys != NULL && !IS_ERR(server->client_sys))
+	if (!IS_ERR(server->client_sys))
 		rpc_shutdown_client(server->client_sys);
 		rpc_shutdown_client(server->client_sys);
+	if (!IS_ERR(server->client_acl))
+		rpc_shutdown_client(server->client_acl);
 
 
 	if (!(server->flags & NFS_MOUNT_NONLM))
 	if (!(server->flags & NFS_MOUNT_NONLM))
 		lockd_down();	/* release rpc.lockd */
 		lockd_down();	/* release rpc.lockd */
@@ -1594,15 +1658,19 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 
 
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	if (!clp) {
 	if (!clp) {
-		printk(KERN_WARNING "NFS: failed to create NFS4 client.\n");
+		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
 		return -EIO;
 		return -EIO;
 	}
 	}
 
 
 	/* Now create transport and client */
 	/* Now create transport and client */
 	authflavour = RPC_AUTH_UNIX;
 	authflavour = RPC_AUTH_UNIX;
 	if (data->auth_flavourlen != 0) {
 	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen > 1)
-			printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n");
+		if (data->auth_flavourlen != 1) {
+			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+					__FUNCTION__, data->auth_flavourlen);
+			err = -EINVAL;
+			goto out_fail;
+		}
 		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
 		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
 			err = -EFAULT;
 			err = -EFAULT;
 			goto out_fail;
 			goto out_fail;
@@ -1610,21 +1678,22 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	}
 	}
 
 
 	down_write(&clp->cl_sem);
 	down_write(&clp->cl_sem);
-	if (clp->cl_rpcclient == NULL) {
+	if (IS_ERR(clp->cl_rpcclient)) {
 		xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 		xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 		if (IS_ERR(xprt)) {
 		if (IS_ERR(xprt)) {
 			up_write(&clp->cl_sem);
 			up_write(&clp->cl_sem);
-			printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
 			err = PTR_ERR(xprt);
 			err = PTR_ERR(xprt);
+			dprintk("%s: cannot create RPC transport. Error = %d\n",
+					__FUNCTION__, err);
 			goto out_fail;
 			goto out_fail;
 		}
 		}
 		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 				server->rpc_ops->version, authflavour);
 				server->rpc_ops->version, authflavour);
 		if (IS_ERR(clnt)) {
 		if (IS_ERR(clnt)) {
 			up_write(&clp->cl_sem);
 			up_write(&clp->cl_sem);
-			printk(KERN_WARNING "NFS: cannot create RPC client.\n");
-			xprt_destroy(xprt);
 			err = PTR_ERR(clnt);
 			err = PTR_ERR(clnt);
+			dprintk("%s: cannot create RPC client. Error = %d\n",
+					__FUNCTION__, err);
 			goto out_fail;
 			goto out_fail;
 		}
 		}
 		clnt->cl_intr     = 1;
 		clnt->cl_intr     = 1;
@@ -1656,21 +1725,26 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	clp = NULL;
 	clp = NULL;
 
 
 	if (IS_ERR(clnt)) {
 	if (IS_ERR(clnt)) {
-		printk(KERN_WARNING "NFS: cannot create RPC client.\n");
-		return PTR_ERR(clnt);
+		err = PTR_ERR(clnt);
+		dprintk("%s: cannot create RPC client. Error = %d\n",
+				__FUNCTION__, err);
+		return err;
 	}
 	}
 
 
 	server->client    = clnt;
 	server->client    = clnt;
 
 
 	if (server->nfs4_state->cl_idmap == NULL) {
 	if (server->nfs4_state->cl_idmap == NULL) {
-		printk(KERN_WARNING "NFS: failed to create idmapper.\n");
+		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
 	if (clnt->cl_auth->au_flavor != authflavour) {
 	if (clnt->cl_auth->au_flavor != authflavour) {
-		if (rpcauth_create(authflavour, clnt) == NULL) {
-			printk(KERN_WARNING "NFS: couldn't create credcache!\n");
-			return -ENOMEM;
+		struct rpc_auth *auth;
+
+		auth = rpcauth_create(authflavour, clnt);
+		if (IS_ERR(auth)) {
+			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
+			return PTR_ERR(auth);
 		}
 		}
 	}
 	}
 
 
@@ -1730,8 +1804,12 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	struct nfs4_mount_data *data = raw_data;
 	struct nfs4_mount_data *data = raw_data;
 	void *p;
 	void *p;
 
 
-	if (!data) {
-		printk("nfs_read_super: missing data argument\n");
+	if (data == NULL) {
+		dprintk("%s: missing data argument\n", __FUNCTION__);
+		return ERR_PTR(-EINVAL);
+	}
+	if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
+		dprintk("%s: bad mount version\n", __FUNCTION__);
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 	}
 	}
 
 
@@ -1741,11 +1819,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	memset(server, 0, sizeof(struct nfs_server));
 	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	init_nfsv4_state(server);
-
-	if (data->version != NFS4_MOUNT_VERSION) {
-		printk("nfs warning: mount version %s than kernel\n",
-			data->version < NFS4_MOUNT_VERSION ? "older" : "newer");
-	}
+	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
 
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
 	if (IS_ERR(p))
@@ -1773,11 +1847,20 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 	}
 	}
 	if (server->addr.sin_family != AF_INET ||
 	if (server->addr.sin_family != AF_INET ||
 	    server->addr.sin_addr.s_addr == INADDR_ANY) {
 	    server->addr.sin_addr.s_addr == INADDR_ANY) {
-		printk("NFS: mount program didn't pass remote IP address!\n");
+		dprintk("%s: mount program didn't pass remote IP address!\n",
+				__FUNCTION__);
 		s = ERR_PTR(-EINVAL);
 		s = ERR_PTR(-EINVAL);
 		goto out_free;
 		goto out_free;
 	}
 	}
 
 
+	/* Fire up rpciod if not yet running */
+	s = ERR_PTR(rpciod_up());
+	if (IS_ERR(s)) {
+		dprintk("%s: couldn't start rpciod! Error = %ld\n",
+				__FUNCTION__, PTR_ERR(s));
+		goto out_free;
+	}
+
 	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
 	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
 
 
 	if (IS_ERR(s) || s->s_root)
 	if (IS_ERR(s) || s->s_root)
@@ -1785,13 +1868,6 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
 
 
 	s->s_flags = flags;
 	s->s_flags = flags;
 
 
-	/* Fire up rpciod if not yet running */
-	if (rpciod_up() != 0) {
-		printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
-		s = ERR_PTR(-EIO);
-		goto out_free;
-	}
-
 	error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
 	error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
 	if (error) {
 	if (error) {
 		up_write(&s->s_umount);
 		up_write(&s->s_umount);
@@ -1875,6 +1951,13 @@ static struct inode *nfs_alloc_inode(struct super_block *sb)
 	if (!nfsi)
 	if (!nfsi)
 		return NULL;
 		return NULL;
 	nfsi->flags = 0;
 	nfsi->flags = 0;
+#ifdef CONFIG_NFS_V3_ACL
+	nfsi->acl_access = ERR_PTR(-EAGAIN);
+	nfsi->acl_default = ERR_PTR(-EAGAIN);
+#endif
+#ifdef CONFIG_NFS_V4
+	nfsi->nfs4_acl = NULL;
+#endif /* CONFIG_NFS_V4 */
 	return &nfsi->vfs_inode;
 	return &nfsi->vfs_inode;
 }
 }
 
 

+ 1 - 3
fs/nfs/mount_clnt.c

@@ -80,9 +80,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
 	clnt = rpc_create_client(xprt, hostname,
 	clnt = rpc_create_client(xprt, hostname,
 				&mnt_program, version,
 				&mnt_program, version,
 				RPC_AUTH_UNIX);
 				RPC_AUTH_UNIX);
-	if (IS_ERR(clnt)) {
-		xprt_destroy(xprt);
-	} else {
+	if (!IS_ERR(clnt)) {
 		clnt->cl_softrtry = 1;
 		clnt->cl_softrtry = 1;
 		clnt->cl_chatty   = 1;
 		clnt->cl_chatty   = 1;
 		clnt->cl_oneshot  = 1;
 		clnt->cl_oneshot  = 1;

+ 403 - 0
fs/nfs/nfs3acl.c

@@ -0,0 +1,403 @@
+#include <linux/fs.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs_fs.h>
+#include <linux/xattr_acl.h>
+#include <linux/nfsacl.h>
+
+#define NFSDBG_FACILITY	NFSDBG_PROC
+
+ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl;
+	int pos=0, len=0;
+
+#	define output(s) do {						\
+			if (pos + sizeof(s) <= size) {			\
+				memcpy(buffer + pos, s, sizeof(s));	\
+				pos += sizeof(s);			\
+			}						\
+			len += sizeof(s);				\
+		} while(0)
+
+	acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		output("system.posix_acl_access");
+		posix_acl_release(acl);
+	}
+
+	if (S_ISDIR(inode->i_mode)) {
+		acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+		if (acl) {
+			output("system.posix_acl_default");
+			posix_acl_release(acl);
+		}
+	}
+
+#	undef output
+
+	if (!buffer || len <= size)
+		return len;
+	return -ERANGE;
+}
+
+ssize_t nfs3_getxattr(struct dentry *dentry, const char *name,
+		void *buffer, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl;
+	int type, error = 0;
+
+	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+		type = ACL_TYPE_ACCESS;
+	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+		type = ACL_TYPE_DEFAULT;
+	else
+		return -EOPNOTSUPP;
+
+	acl = nfs3_proc_getacl(inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	else if (acl) {
+		if (type == ACL_TYPE_ACCESS && acl->a_count == 0)
+			error = -ENODATA;
+		else
+			error = posix_acl_to_xattr(acl, buffer, size);
+		posix_acl_release(acl);
+	} else
+		error = -ENODATA;
+
+	return error;
+}
+
+int nfs3_setxattr(struct dentry *dentry, const char *name,
+	     const void *value, size_t size, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl;
+	int type, error;
+
+	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+		type = ACL_TYPE_ACCESS;
+	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+		type = ACL_TYPE_DEFAULT;
+	else
+		return -EOPNOTSUPP;
+
+	acl = posix_acl_from_xattr(value, size);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	error = nfs3_proc_setacl(inode, type, acl);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+int nfs3_removexattr(struct dentry *dentry, const char *name)
+{
+	struct inode *inode = dentry->d_inode;
+	int type;
+
+	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0)
+		type = ACL_TYPE_ACCESS;
+	else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0)
+		type = ACL_TYPE_DEFAULT;
+	else
+		return -EOPNOTSUPP;
+
+	return nfs3_proc_setacl(inode, type, NULL);
+}
+
+static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi)
+{
+	if (!IS_ERR(nfsi->acl_access)) {
+		posix_acl_release(nfsi->acl_access);
+		nfsi->acl_access = ERR_PTR(-EAGAIN);
+	}
+	if (!IS_ERR(nfsi->acl_default)) {
+		posix_acl_release(nfsi->acl_default);
+		nfsi->acl_default = ERR_PTR(-EAGAIN);
+	}
+}
+
+void nfs3_forget_cached_acls(struct inode *inode)
+{
+	dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id,
+		inode->i_ino);
+	spin_lock(&inode->i_lock);
+	__nfs3_forget_cached_acls(NFS_I(inode));
+	spin_unlock(&inode->i_lock);
+}
+
+static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct posix_acl *acl = ERR_PTR(-EINVAL);
+
+	spin_lock(&inode->i_lock);
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			acl = nfsi->acl_access;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			acl = nfsi->acl_default;
+			break;
+
+		default:
+			goto out;
+	}
+	if (IS_ERR(acl))
+		acl = ERR_PTR(-EAGAIN);
+	else
+		acl = posix_acl_dup(acl);
+out:
+	spin_unlock(&inode->i_lock);
+	dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id,
+		inode->i_ino, type, acl);
+	return acl;
+}
+
+static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl,
+		    struct posix_acl *dfacl)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id,
+		inode->i_ino, acl, dfacl);
+	spin_lock(&inode->i_lock);
+	__nfs3_forget_cached_acls(NFS_I(inode));
+	nfsi->acl_access = posix_acl_dup(acl);
+	nfsi->acl_default = posix_acl_dup(dfacl);
+	spin_unlock(&inode->i_lock);
+}
+
+struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_fattr fattr;
+	struct page *pages[NFSACL_MAXPAGES] = { };
+	struct nfs3_getaclargs args = {
+		.fh = NFS_FH(inode),
+		/* The xdr layer may allocate pages here. */
+		.pages = pages,
+	};
+	struct nfs3_getaclres res = {
+		.fattr =	&fattr,
+	};
+	struct posix_acl *acl;
+	int status, count;
+
+	if (!nfs_server_capable(inode, NFS_CAP_ACLS))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	status = nfs_revalidate_inode(server, inode);
+	if (status < 0)
+		return ERR_PTR(status);
+	acl = nfs3_get_cached_acl(inode, type);
+	if (acl != ERR_PTR(-EAGAIN))
+		return acl;
+	acl = NULL;
+
+	/*
+	 * Only get the access acl when explicitly requested: We don't
+	 * need it for access decisions, and only some applications use
+	 * it. Applications which request the access acl first are not
+	 * penalized from this optimization.
+	 */
+	if (type == ACL_TYPE_ACCESS)
+		args.mask |= NFS_ACLCNT|NFS_ACL;
+	if (S_ISDIR(inode->i_mode))
+		args.mask |= NFS_DFACLCNT|NFS_DFACL;
+	if (args.mask == 0)
+		return NULL;
+
+	dprintk("NFS call getacl\n");
+	status = rpc_call(server->client_acl, ACLPROC3_GETACL,
+			  &args, &res, 0);
+	dprintk("NFS reply getacl: %d\n", status);
+
+	/* pages may have been allocated at the xdr layer. */
+	for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++)
+		__free_page(args.pages[count]);
+
+	switch (status) {
+		case 0:
+			status = nfs_refresh_inode(inode, &fattr);
+			break;
+		case -EPFNOSUPPORT:
+		case -EPROTONOSUPPORT:
+			dprintk("NFS_V3_ACL extension not supported; disabling\n");
+			server->caps &= ~NFS_CAP_ACLS;
+		case -ENOTSUPP:
+			status = -EOPNOTSUPP;
+		default:
+			goto getout;
+	}
+	if ((args.mask & res.mask) != args.mask) {
+		status = -EIO;
+		goto getout;
+	}
+
+	if (res.acl_access != NULL) {
+		if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) {
+			posix_acl_release(res.acl_access);
+			res.acl_access = NULL;
+		}
+	}
+	nfs3_cache_acls(inode, res.acl_access, res.acl_default);
+
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			acl = res.acl_access;
+			res.acl_access = NULL;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			acl = res.acl_default;
+			res.acl_default = NULL;
+	}
+
+getout:
+	posix_acl_release(res.acl_access);
+	posix_acl_release(res.acl_default);
+
+	if (status != 0) {
+		posix_acl_release(acl);
+		acl = ERR_PTR(status);
+	}
+	return acl;
+}
+
+static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+		  struct posix_acl *dfacl)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_fattr fattr;
+	struct page *pages[NFSACL_MAXPAGES] = { };
+	struct nfs3_setaclargs args = {
+		.inode = inode,
+		.mask = NFS_ACL,
+		.acl_access = acl,
+		.pages = pages,
+	};
+	int status, count;
+
+	status = -EOPNOTSUPP;
+	if (!nfs_server_capable(inode, NFS_CAP_ACLS))
+		goto out;
+
+	/* We are doing this here, because XDR marshalling can only
+	   return -ENOMEM. */
+	status = -ENOSPC;
+	if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES)
+		goto out;
+	if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES)
+		goto out;
+	if (S_ISDIR(inode->i_mode)) {
+		args.mask |= NFS_DFACL;
+		args.acl_default = dfacl;
+	}
+
+	dprintk("NFS call setacl\n");
+	nfs_begin_data_update(inode);
+	status = rpc_call(server->client_acl, ACLPROC3_SETACL,
+			  &args, &fattr, 0);
+	NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
+	nfs_end_data_update(inode);
+	dprintk("NFS reply setacl: %d\n", status);
+
+	/* pages may have been allocated at the xdr layer. */
+	for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++)
+		__free_page(args.pages[count]);
+
+	switch (status) {
+		case 0:
+			status = nfs_refresh_inode(inode, &fattr);
+			break;
+		case -EPFNOSUPPORT:
+		case -EPROTONOSUPPORT:
+			dprintk("NFS_V3_ACL SETACL RPC not supported"
+					"(will not retry)\n");
+			server->caps &= ~NFS_CAP_ACLS;
+		case -ENOTSUPP:
+			status = -EOPNOTSUPP;
+	}
+out:
+	return status;
+}
+
+int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl)
+{
+	struct posix_acl *alloc = NULL, *dfacl = NULL;
+	int status;
+
+	if (S_ISDIR(inode->i_mode)) {
+		switch(type) {
+			case ACL_TYPE_ACCESS:
+				alloc = dfacl = nfs3_proc_getacl(inode,
+						ACL_TYPE_DEFAULT);
+				if (IS_ERR(alloc))
+					goto fail;
+				break;
+
+			case ACL_TYPE_DEFAULT:
+				dfacl = acl;
+				alloc = acl = nfs3_proc_getacl(inode,
+						ACL_TYPE_ACCESS);
+				if (IS_ERR(alloc))
+					goto fail;
+				break;
+
+			default:
+				return -EINVAL;
+		}
+	} else if (type != ACL_TYPE_ACCESS)
+			return -EINVAL;
+
+	if (acl == NULL) {
+		alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+		if (IS_ERR(alloc))
+			goto fail;
+	}
+	status = nfs3_proc_setacls(inode, acl, dfacl);
+	posix_acl_release(alloc);
+	return status;
+
+fail:
+	return PTR_ERR(alloc);
+}
+
+int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
+		mode_t mode)
+{
+	struct posix_acl *dfacl, *acl;
+	int error = 0;
+
+	dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT);
+	if (IS_ERR(dfacl)) {
+		error = PTR_ERR(dfacl);
+		return (error == -EOPNOTSUPP) ? 0 : error;
+	}
+	if (!dfacl)
+		return 0;
+	acl = posix_acl_clone(dfacl, GFP_KERNEL);
+	error = -ENOMEM;
+	if (!acl)
+		goto out_release_dfacl;
+	error = posix_acl_create_masq(acl, &mode);
+	if (error < 0)
+		goto out_release_acl;
+	error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ?
+						      dfacl : NULL);
+out_release_acl:
+	posix_acl_release(acl);
+out_release_dfacl:
+	posix_acl_release(dfacl);
+	return error;
+}

+ 35 - 8
fs/nfs/nfs3proc.c

@@ -17,6 +17,7 @@
 #include <linux/nfs_page.h>
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
 #include <linux/lockd/bind.h>
 #include <linux/smp_lock.h>
 #include <linux/smp_lock.h>
+#include <linux/nfs_mount.h>
 
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
 
@@ -45,7 +46,7 @@ static inline int
 nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
 nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
 {
 {
 	struct rpc_message msg = {
 	struct rpc_message msg = {
-		.rpc_proc	= &nfs3_procedures[proc],
+		.rpc_proc	= &clnt->cl_procinfo[proc],
 		.rpc_argp	= argp,
 		.rpc_argp	= argp,
 		.rpc_resp	= resp,
 		.rpc_resp	= resp,
 	};
 	};
@@ -313,7 +314,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fhandle,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 		.fattr		= &fattr
 	};
 	};
-	int			status;
+	mode_t mode = sattr->ia_mode;
+	int status;
 
 
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	arg.createmode = NFS3_CREATE_UNCHECKED;
 	arg.createmode = NFS3_CREATE_UNCHECKED;
@@ -323,6 +325,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		arg.verifier[1] = current->pid;
 		arg.verifier[1] = current->pid;
 	}
 	}
 
 
+	sattr->ia_mode &= ~current->fs->umask;
+
 again:
 again:
 	dir_attr.valid = 0;
 	dir_attr.valid = 0;
 	fattr.valid = 0;
 	fattr.valid = 0;
@@ -369,6 +373,9 @@ again:
 		nfs_refresh_inode(dentry->d_inode, &fattr);
 		nfs_refresh_inode(dentry->d_inode, &fattr);
 		dprintk("NFS reply setattr (post-create): %d\n", status);
 		dprintk("NFS reply setattr (post-create): %d\n", status);
 	}
 	}
+	if (status != 0)
+		goto out;
+	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
 out:
 out:
 	dprintk("NFS reply create: %d\n", status);
 	dprintk("NFS reply create: %d\n", status);
 	return status;
 	return status;
@@ -538,15 +545,24 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 		.fh		= &fhandle,
 		.fh		= &fhandle,
 		.fattr		= &fattr
 		.fattr		= &fattr
 	};
 	};
-	int			status;
+	int mode = sattr->ia_mode;
+	int status;
 
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 	dir_attr.valid = 0;
 	dir_attr.valid = 0;
 	fattr.valid = 0;
 	fattr.valid = 0;
+
+	sattr->ia_mode &= ~current->fs->umask;
+
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	nfs_refresh_inode(dir, &dir_attr);
-	if (status == 0)
-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs_instantiate(dentry, &fhandle, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+out:
 	dprintk("NFS reply mkdir: %d\n", status);
 	dprintk("NFS reply mkdir: %d\n", status);
 	return status;
 	return status;
 }
 }
@@ -641,6 +657,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		.fh		= &fh,
 		.fh		= &fh,
 		.fattr		= &fattr
 		.fattr		= &fattr
 	};
 	};
+	mode_t mode = sattr->ia_mode;
 	int status;
 	int status;
 
 
 	switch (sattr->ia_mode & S_IFMT) {
 	switch (sattr->ia_mode & S_IFMT) {
@@ -653,12 +670,20 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
 
 	dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,
 	dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,
 			MAJOR(rdev), MINOR(rdev));
 			MAJOR(rdev), MINOR(rdev));
+
+	sattr->ia_mode &= ~current->fs->umask;
+
 	dir_attr.valid = 0;
 	dir_attr.valid = 0;
 	fattr.valid = 0;
 	fattr.valid = 0;
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	nfs_refresh_inode(dir, &dir_attr);
-	if (status == 0)
-		status = nfs_instantiate(dentry, &fh, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs_instantiate(dentry, &fh, &fattr);
+	if (status != 0)
+		goto out;
+	status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+out:
 	dprintk("NFS reply mknod: %d\n", status);
 	dprintk("NFS reply mknod: %d\n", status);
 	return status;
 	return status;
 }
 }
@@ -825,7 +850,8 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 struct nfs_rpc_ops	nfs_v3_clientops = {
 struct nfs_rpc_ops	nfs_v3_clientops = {
 	.version	= 3,			/* protocol version */
 	.version	= 3,			/* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dentry_ops	= &nfs_dentry_operations,
-	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.dir_inode_ops	= &nfs3_dir_inode_operations,
+	.file_inode_ops	= &nfs3_file_inode_operations,
 	.getroot	= nfs3_proc_get_root,
 	.getroot	= nfs3_proc_get_root,
 	.getattr	= nfs3_proc_getattr,
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
 	.setattr	= nfs3_proc_setattr,
@@ -856,4 +882,5 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.file_open	= nfs_open,
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
 	.file_release	= nfs_release,
 	.lock		= nfs3_proc_lock,
 	.lock		= nfs3_proc_lock,
+	.clear_acl_cache = nfs3_forget_cached_acls,
 };
 };

+ 147 - 0
fs/nfs/nfs3xdr.c

@@ -21,6 +21,7 @@
 #include <linux/nfs.h>
 #include <linux/nfs.h>
 #include <linux/nfs3.h>
 #include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfsacl.h>
 
 
 #define NFSDBG_FACILITY		NFSDBG_XDR
 #define NFSDBG_FACILITY		NFSDBG_XDR
 
 
@@ -79,6 +80,11 @@ extern int			nfs_stat_to_errno(int);
 #define NFS3_pathconfres_sz	(1+NFS3_post_op_attr_sz+6)
 #define NFS3_pathconfres_sz	(1+NFS3_post_op_attr_sz+6)
 #define NFS3_commitres_sz	(1+NFS3_wcc_data_sz+2)
 #define NFS3_commitres_sz	(1+NFS3_wcc_data_sz+2)
 
 
+#define ACL3_getaclargs_sz	(NFS3_fh_sz+1)
+#define ACL3_setaclargs_sz	(NFS3_fh_sz+1+2*(2+5*3))
+#define ACL3_getaclres_sz	(1+NFS3_post_op_attr_sz+1+2*(2+5*3))
+#define ACL3_setaclres_sz	(1+NFS3_post_op_attr_sz)
+
 /*
 /*
  * Map file type to S_IFMT bits
  * Map file type to S_IFMT bits
  */
  */
@@ -627,6 +633,74 @@ nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
 	return 0;
 	return 0;
 }
 }
 
 
+#ifdef CONFIG_NFS_V3_ACL
+/*
+ * Encode GETACL arguments
+ */
+static int
+nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p,
+		    struct nfs3_getaclargs *args)
+{
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	unsigned int replen;
+
+	p = xdr_encode_fhandle(p, args->fh);
+	*p++ = htonl(args->mask);
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	if (args->mask & (NFS_ACL | NFS_DFACL)) {
+		/* Inline the page array */
+		replen = (RPC_REPHDRSIZE + auth->au_rslack +
+			  ACL3_getaclres_sz) << 2;
+		xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0,
+				 NFSACL_MAXPAGES << PAGE_SHIFT);
+	}
+	return 0;
+}
+
+/*
+ * Encode SETACL arguments
+ */
+static int
+nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p,
+                   struct nfs3_setaclargs *args)
+{
+	struct xdr_buf *buf = &req->rq_snd_buf;
+	unsigned int base, len_in_head, len = nfsacl_size(
+		(args->mask & NFS_ACL)   ? args->acl_access  : NULL,
+		(args->mask & NFS_DFACL) ? args->acl_default : NULL);
+	int count, err;
+
+	p = xdr_encode_fhandle(p, NFS_FH(args->inode));
+	*p++ = htonl(args->mask);
+	base = (char *)p - (char *)buf->head->iov_base;
+	/* put as much of the acls into head as possible. */
+	len_in_head = min_t(unsigned int, buf->head->iov_len - base, len);
+	len -= len_in_head;
+	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2));
+
+	for (count = 0; (count << PAGE_SHIFT) < len; count++) {
+		args->pages[count] = alloc_page(GFP_KERNEL);
+		if (!args->pages[count]) {
+			while (count)
+				__free_page(args->pages[--count]);
+			return -ENOMEM;
+		}
+	}
+	xdr_encode_pages(buf, args->pages, 0, len);
+
+	err = nfsacl_encode(buf, base, args->inode,
+			    (args->mask & NFS_ACL) ?
+			    args->acl_access : NULL, 1, 0);
+	if (err > 0)
+		err = nfsacl_encode(buf, base + err, args->inode,
+				    (args->mask & NFS_DFACL) ?
+				    args->acl_default : NULL, 1,
+				    NFS_ACL_DEFAULT);
+	return (err > 0) ? 0 : err;
+}
+#endif  /* CONFIG_NFS_V3_ACL */
+
 /*
 /*
  * NFS XDR decode functions
  * NFS XDR decode functions
  */
  */
@@ -978,6 +1052,54 @@ nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
 	return 0;
 	return 0;
 }
 }
 
 
+#ifdef CONFIG_NFS_V3_ACL
+/*
+ * Decode GETACL reply
+ */
+static int
+nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p,
+		   struct nfs3_getaclres *res)
+{
+	struct xdr_buf *buf = &req->rq_rcv_buf;
+	int status = ntohl(*p++);
+	struct posix_acl **acl;
+	unsigned int *aclcnt;
+	int err, base;
+
+	if (status != 0)
+		return -nfs_stat_to_errno(status);
+	p = xdr_decode_post_op_attr(p, res->fattr);
+	res->mask = ntohl(*p++);
+	if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+		return -EINVAL;
+	base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base;
+
+	acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL;
+	aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL;
+	err = nfsacl_decode(buf, base, aclcnt, acl);
+
+	acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL;
+	aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL;
+	if (err > 0)
+		err = nfsacl_decode(buf, base + err, aclcnt, acl);
+	return (err > 0) ? 0 : err;
+}
+
+/*
+ * Decode setacl reply.
+ */
+static int
+nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+	int status = ntohl(*p++);
+
+	if (status)
+		return -nfs_stat_to_errno(status);
+	xdr_decode_post_op_attr(p, fattr);
+	return 0;
+}
+#endif  /* CONFIG_NFS_V3_ACL */
+
 #ifndef MAX
 #ifndef MAX
 # define MAX(a, b)	(((a) > (b))? (a) : (b))
 # define MAX(a, b)	(((a) > (b))? (a) : (b))
 #endif
 #endif
@@ -1021,3 +1143,28 @@ struct rpc_version		nfs_version3 = {
 	.procs			= nfs3_procedures
 	.procs			= nfs3_procedures
 };
 };
 
 
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_procinfo	nfs3_acl_procedures[] = {
+	[ACLPROC3_GETACL] = {
+		.p_proc = ACLPROC3_GETACL,
+		.p_encode = (kxdrproc_t) nfs3_xdr_getaclargs,
+		.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
+		.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
+		.p_timer = 1,
+	},
+	[ACLPROC3_SETACL] = {
+		.p_proc = ACLPROC3_SETACL,
+		.p_encode = (kxdrproc_t) nfs3_xdr_setaclargs,
+		.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
+		.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
+		.p_timer = 0,
+	},
+};
+
+struct rpc_version		nfsacl_version3 = {
+	.number			= 3,
+	.nrprocs		= sizeof(nfs3_acl_procedures)/
+				  sizeof(nfs3_acl_procedures[0]),
+	.procs			= nfs3_acl_procedures,
+};
+#endif  /* CONFIG_NFS_V3_ACL */

+ 253 - 0
fs/nfs/nfs4_fs.h

@@ -0,0 +1,253 @@
+/*
+ * linux/fs/nfs/nfs4_fs.h
+ *
+ * Copyright (C) 2005 Trond Myklebust
+ *
+ * NFSv4-specific filesystem definitions and declarations
+ */
+
+#ifndef __LINUX_FS_NFS_NFS4_FS_H
+#define __LINUX_FS_NFS_NFS4_FS_H
+
+#ifdef CONFIG_NFS_V4
+
+struct idmap;
+
+/*
+ * In a seqid-mutating op, this macro controls which error return
+ * values trigger incrementation of the seqid.
+ *
+ * from rfc 3010:
+ * The client MUST monotonically increment the sequence number for the
+ * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE
+ * operations.  This is true even in the event that the previous
+ * operation that used the sequence number received an error.  The only
+ * exception to this rule is if the previous operation received one of
+ * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID,
+ * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR,
+ * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE.
+ *
+ */
+#define seqid_mutating_err(err)       \
+(((err) != NFSERR_STALE_CLIENTID) &&  \
+ ((err) != NFSERR_STALE_STATEID)  &&  \
+ ((err) != NFSERR_BAD_STATEID)    &&  \
+ ((err) != NFSERR_BAD_SEQID)      &&  \
+ ((err) != NFSERR_BAD_XDR)        &&  \
+ ((err) != NFSERR_RESOURCE)       &&  \
+ ((err) != NFSERR_NOFILEHANDLE))
+
+enum nfs4_client_state {
+	NFS4CLNT_OK  = 0,
+};
+
+/*
+ * The nfs4_client identifies our client state to the server.
+ */
+struct nfs4_client {
+	struct list_head	cl_servers;	/* Global list of servers */
+	struct in_addr		cl_addr;	/* Server identifier */
+	u64			cl_clientid;	/* constant */
+	nfs4_verifier		cl_confirm;
+	unsigned long		cl_state;
+
+	u32			cl_lockowner_id;
+
+	/*
+	 * The following rwsem ensures exclusive access to the server
+	 * while we recover the state following a lease expiration.
+	 */
+	struct rw_semaphore	cl_sem;
+
+	struct list_head	cl_delegations;
+	struct list_head	cl_state_owners;
+	struct list_head	cl_unused;
+	int			cl_nunused;
+	spinlock_t		cl_lock;
+	atomic_t		cl_count;
+
+	struct rpc_clnt *	cl_rpcclient;
+	struct rpc_cred *	cl_cred;
+
+	struct list_head	cl_superblocks;	/* List of nfs_server structs */
+
+	unsigned long		cl_lease_time;
+	unsigned long		cl_last_renewal;
+	struct work_struct	cl_renewd;
+	struct work_struct	cl_recoverd;
+
+	wait_queue_head_t	cl_waitq;
+	struct rpc_wait_queue	cl_rpcwaitq;
+
+	/* used for the setclientid verifier */
+	struct timespec		cl_boot_time;
+
+	/* idmapper */
+	struct idmap *		cl_idmap;
+
+	/* Our own IP address, as a null-terminated string.
+	 * This is used to generate the clientid, and the callback address.
+	 */
+	char			cl_ipaddr[16];
+	unsigned char		cl_id_uniquifier;
+};
+
+/*
+ * NFS4 state_owners and lock_owners are simply labels for ordered
+ * sequences of RPC calls. Their sole purpose is to provide once-only
+ * semantics by allowing the server to identify replayed requests.
+ *
+ * The ->so_sema is held during all state_owner seqid-mutating operations:
+ * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize
+ * so_seqid.
+ */
+struct nfs4_state_owner {
+	struct list_head     so_list;	 /* per-clientid list of state_owners */
+	struct nfs4_client   *so_client;
+	u32                  so_id;      /* 32-bit identifier, unique */
+	struct semaphore     so_sema;
+	u32                  so_seqid;   /* protected by so_sema */
+	atomic_t	     so_count;
+
+	struct rpc_cred	     *so_cred;	 /* Associated cred */
+	struct list_head     so_states;
+	struct list_head     so_delegations;
+};
+
+/*
+ * struct nfs4_state maintains the client-side state for a given
+ * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
+ *
+ * OPEN:
+ * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server,
+ * we need to know how many files are open for reading or writing on a
+ * given inode. This information too is stored here.
+ *
+ * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
+ */
+
+struct nfs4_lock_state {
+	struct list_head	ls_locks;	/* Other lock stateids */
+	struct nfs4_state *	ls_state;	/* Pointer to open state */
+	fl_owner_t		ls_owner;	/* POSIX lock owner */
+#define NFS_LOCK_INITIALIZED 1
+	int			ls_flags;
+	u32			ls_seqid;
+	u32			ls_id;
+	nfs4_stateid		ls_stateid;
+	atomic_t		ls_count;
+};
+
+/* bits for nfs4_state->flags */
+enum {
+	LK_STATE_IN_USE,
+	NFS_DELEGATED_STATE,
+};
+
+struct nfs4_state {
+	struct list_head open_states;	/* List of states for the same state_owner */
+	struct list_head inode_states;	/* List of states for the same inode */
+	struct list_head lock_states;	/* List of subservient lock stateids */
+
+	struct nfs4_state_owner *owner;	/* Pointer to the open owner */
+	struct inode *inode;		/* Pointer to the inode */
+
+	unsigned long flags;		/* Do we hold any locks? */
+	struct semaphore lock_sema;	/* Serializes file locking operations */
+	spinlock_t state_lock;		/* Protects the lock_states list */
+
+	nfs4_stateid stateid;
+
+	unsigned int nreaders;
+	unsigned int nwriters;
+	int state;			/* State on the server (R,W, or RW) */
+	atomic_t count;
+};
+
+
+struct nfs4_exception {
+	long timeout;
+	int retry;
+};
+
+struct nfs4_state_recovery_ops {
+	int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
+	int (*recover_lock)(struct nfs4_state *, struct file_lock *);
+};
+
+extern struct dentry_operations nfs4_dentry_operations;
+extern struct inode_operations nfs4_dir_inode_operations;
+
+/* inode.c */
+extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
+extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
+extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
+
+
+/* nfs4proc.c */
+extern int nfs4_map_errors(int err);
+extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
+extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
+extern int nfs4_proc_async_renew(struct nfs4_client *);
+extern int nfs4_proc_renew(struct nfs4_client *);
+extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
+extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
+
+extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
+extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
+
+extern const u32 nfs4_fattr_bitmap[2];
+extern const u32 nfs4_statfs_bitmap[2];
+extern const u32 nfs4_pathconf_bitmap[2];
+extern const u32 nfs4_fsinfo_bitmap[2];
+
+/* nfs4renewd.c */
+extern void nfs4_schedule_state_renewal(struct nfs4_client *);
+extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
+extern void nfs4_kill_renewd(struct nfs4_client *);
+extern void nfs4_renew_state(void *);
+
+/* nfs4state.c */
+extern void init_nfsv4_state(struct nfs_server *);
+extern void destroy_nfsv4_state(struct nfs_server *);
+extern struct nfs4_client *nfs4_get_client(struct in_addr *);
+extern void nfs4_put_client(struct nfs4_client *clp);
+extern int nfs4_init_client(struct nfs4_client *clp);
+extern struct nfs4_client *nfs4_find_client(struct in_addr *);
+extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
+
+extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
+extern void nfs4_put_state_owner(struct nfs4_state_owner *);
+extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
+extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
+extern void nfs4_put_open_state(struct nfs4_state *);
+extern void nfs4_close_state(struct nfs4_state *, mode_t);
+extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
+extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
+extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
+extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
+
+extern const nfs4_stateid zero_stateid;
+
+/* nfs4xdr.c */
+extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus);
+extern struct rpc_procinfo nfs4_procedures[];
+
+struct nfs4_mount_data;
+
+/* callback_xdr.c */
+extern struct svc_version nfs4_callback_version1;
+
+#else
+
+#define init_nfsv4_state(server)  do { } while (0)
+#define destroy_nfsv4_state(server)       do { } while (0)
+#define nfs4_put_state_owner(inode, owner) do { } while (0)
+#define nfs4_put_open_state(state) do { } while (0)
+#define nfs4_close_state(a, b) do { } while (0)
+
+#endif /* CONFIG_NFS_V4 */
+#endif /* __LINUX_FS_NFS_NFS4_FS.H */

+ 339 - 90
fs/nfs/nfs4proc.c

@@ -48,6 +48,7 @@
 #include <linux/smp_lock.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
 #include <linux/namei.h>
 
 
+#include "nfs4_fs.h"
 #include "delegation.h"
 #include "delegation.h"
 
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 #define NFSDBG_FACILITY		NFSDBG_PROC
@@ -62,8 +63,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 extern struct rpc_procinfo nfs4_procedures[];
 
 
-extern nfs4_stateid zero_stateid;
-
 /* Prevent leaks of NFSv4 errors into userland */
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
 int nfs4_map_errors(int err)
 {
 {
@@ -104,7 +103,7 @@ const u32 nfs4_statfs_bitmap[2] = {
 	| FATTR4_WORD1_SPACE_TOTAL
 	| FATTR4_WORD1_SPACE_TOTAL
 };
 };
 
 
-u32 nfs4_pathconf_bitmap[2] = {
+const u32 nfs4_pathconf_bitmap[2] = {
 	FATTR4_WORD0_MAXLINK
 	FATTR4_WORD0_MAXLINK
 	| FATTR4_WORD0_MAXNAME,
 	| FATTR4_WORD0_MAXNAME,
 	0
 	0
@@ -124,7 +123,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 
 
 	BUG_ON(readdir->count < 80);
 	BUG_ON(readdir->count < 80);
 	if (cookie > 2) {
 	if (cookie > 2) {
-		readdir->cookie = (cookie > 2) ? cookie : 0;
+		readdir->cookie = cookie;
 		memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier));
 		memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier));
 		return;
 		return;
 	}
 	}
@@ -270,14 +269,9 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	int err;
 	int err;
 	do {
 	do {
 		err = _nfs4_open_reclaim(sp, state);
 		err = _nfs4_open_reclaim(sp, state);
-		switch (err) {
-			case 0:
-			case -NFS4ERR_STALE_CLIENTID:
-			case -NFS4ERR_STALE_STATEID:
-			case -NFS4ERR_EXPIRED:
-				return err;
-		}
-		err = nfs4_handle_exception(server, err, &exception);
+		if (err != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 	} while (exception.retry);
 	return err;
 	return err;
 }
 }
@@ -509,6 +503,20 @@ out_stale:
 	goto out_nodeleg;
 	goto out_nodeleg;
 }
 }
 
 
+static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+{
+	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs4_open_expired(sp, state, dentry);
+		if (err == -NFS4ERR_DELAY)
+			nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
 static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
 {
 {
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_inode *nfsi = NFS_I(state->inode);
@@ -521,7 +529,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
 			continue;
 			continue;
 		get_nfs_open_context(ctx);
 		get_nfs_open_context(ctx);
 		spin_unlock(&state->inode->i_lock);
 		spin_unlock(&state->inode->i_lock);
-		status = _nfs4_open_expired(sp, state, ctx->dentry);
+		status = nfs4_do_open_expired(sp, state, ctx->dentry);
 		put_nfs_open_context(ctx);
 		put_nfs_open_context(ctx);
 		return status;
 		return status;
 	}
 	}
@@ -748,11 +756,10 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 
 
         fattr->valid = 0;
         fattr->valid = 0;
 
 
-	if (state != NULL)
+	if (state != NULL) {
 		msg.rpc_cred = state->owner->so_cred;
 		msg.rpc_cred = state->owner->so_cred;
-	if (sattr->ia_valid & ATTR_SIZE)
-		nfs4_copy_stateid(&arg.stateid, state, NULL);
-	else
+		nfs4_copy_stateid(&arg.stateid, state, current->files);
+	} else
 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
 
 
 	return rpc_call_sync(server->client, &msg, 0);
 	return rpc_call_sync(server->client, &msg, 0);
@@ -1116,47 +1123,31 @@ static int
 nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		  struct iattr *sattr)
 		  struct iattr *sattr)
 {
 {
-	struct inode *		inode = dentry->d_inode;
-	int			size_change = sattr->ia_valid & ATTR_SIZE;
-	struct nfs4_state	*state = NULL;
-	int need_iput = 0;
+	struct rpc_cred *cred;
+	struct inode *inode = dentry->d_inode;
+	struct nfs4_state *state;
 	int status;
 	int status;
 
 
 	fattr->valid = 0;
 	fattr->valid = 0;
 	
 	
-	if (size_change) {
-		struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
-		if (IS_ERR(cred))
-			return PTR_ERR(cred);
+	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+	if (IS_ERR(cred))
+		return PTR_ERR(cred);
+	/* Search for an existing WRITE delegation first */
+	state = nfs4_open_delegated(inode, FMODE_WRITE, cred);
+	if (!IS_ERR(state)) {
+		/* NB: nfs4_open_delegated() bumps the inode->i_count */
+		iput(inode);
+	} else {
+		/* Search for an existing open(O_WRITE) stateid */
 		state = nfs4_find_state(inode, cred, FMODE_WRITE);
 		state = nfs4_find_state(inode, cred, FMODE_WRITE);
-		if (state == NULL) {
-			state = nfs4_open_delegated(dentry->d_inode,
-					FMODE_WRITE, cred);
-			if (IS_ERR(state))
-				state = nfs4_do_open(dentry->d_parent->d_inode,
-						dentry, FMODE_WRITE,
-						NULL, cred);
-			need_iput = 1;
-		}
-		put_rpccred(cred);
-		if (IS_ERR(state))
-			return PTR_ERR(state);
-
-		if (state->inode != inode) {
-			printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode);
-			status = -EIO;
-			goto out;
-		}
 	}
 	}
+
 	status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
 	status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
 			NFS_FH(inode), sattr, state);
 			NFS_FH(inode), sattr, state);
-out:
-	if (state) {
-		inode = state->inode;
+	if (state != NULL)
 		nfs4_close_state(state, FMODE_WRITE);
 		nfs4_close_state(state, FMODE_WRITE);
-		if (need_iput)
-			iput(inode);
-	}
+	put_rpccred(cred);
 	return status;
 	return status;
 }
 }
 
 
@@ -1731,6 +1722,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	};
 	};
 	int			status;
 	int			status;
 
 
+	dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __FUNCTION__,
+			dentry->d_parent->d_name.name,
+			dentry->d_name.name,
+			(unsigned long long)cookie);
 	lock_kernel();
 	lock_kernel();
 	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
 	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
 	res.pgbase = args.pgbase;
 	res.pgbase = args.pgbase;
@@ -1738,6 +1733,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	if (status == 0)
 	if (status == 0)
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
 	unlock_kernel();
 	unlock_kernel();
+	dprintk("%s: returns %d\n", __FUNCTION__, status);
 	return status;
 	return status;
 }
 }
 
 
@@ -2163,6 +2159,193 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp)
 	return 0;
 	return 0;
 }
 }
 
 
+static inline int nfs4_server_supports_acls(struct nfs_server *server)
+{
+	return (server->caps & NFS_CAP_ACLS)
+		&& (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
+		&& (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
+}
+
+/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that
+ * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on
+ * the stack.
+ */
+#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
+
+static void buf_to_pages(const void *buf, size_t buflen,
+		struct page **pages, unsigned int *pgbase)
+{
+	const void *p = buf;
+
+	*pgbase = offset_in_page(buf);
+	p -= *pgbase;
+	while (p < buf + buflen) {
+		*(pages++) = virt_to_page(p);
+		p += PAGE_CACHE_SIZE;
+	}
+}
+
+struct nfs4_cached_acl {
+	int cached;
+	size_t len;
+	char data[0];
+};
+
+static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	spin_lock(&inode->i_lock);
+	kfree(nfsi->nfs4_acl);
+	nfsi->nfs4_acl = acl;
+	spin_unlock(&inode->i_lock);
+}
+
+static void nfs4_zap_acl_attr(struct inode *inode)
+{
+	nfs4_set_cached_acl(inode, NULL);
+}
+
+static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs4_cached_acl *acl;
+	int ret = -ENOENT;
+
+	spin_lock(&inode->i_lock);
+	acl = nfsi->nfs4_acl;
+	if (acl == NULL)
+		goto out;
+	if (buf == NULL) /* user is just asking for length */
+		goto out_len;
+	if (acl->cached == 0)
+		goto out;
+	ret = -ERANGE; /* see getxattr(2) man page */
+	if (acl->len > buflen)
+		goto out;
+	memcpy(buf, acl->data, acl->len);
+out_len:
+	ret = acl->len;
+out:
+	spin_unlock(&inode->i_lock);
+	return ret;
+}
+
+static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len)
+{
+	struct nfs4_cached_acl *acl;
+
+	if (buf && acl_len <= PAGE_SIZE) {
+		acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL);
+		if (acl == NULL)
+			goto out;
+		acl->cached = 1;
+		memcpy(acl->data, buf, acl_len);
+	} else {
+		acl = kmalloc(sizeof(*acl), GFP_KERNEL);
+		if (acl == NULL)
+			goto out;
+		acl->cached = 0;
+	}
+	acl->len = acl_len;
+out:
+	nfs4_set_cached_acl(inode, acl);
+}
+
+static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+{
+	struct page *pages[NFS4ACL_MAXPAGES];
+	struct nfs_getaclargs args = {
+		.fh = NFS_FH(inode),
+		.acl_pages = pages,
+		.acl_len = buflen,
+	};
+	size_t resp_len = buflen;
+	void *resp_buf;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
+		.rpc_argp = &args,
+		.rpc_resp = &resp_len,
+	};
+	struct page *localpage = NULL;
+	int ret;
+
+	if (buflen < PAGE_SIZE) {
+		/* As long as we're doing a round trip to the server anyway,
+		 * let's be prepared for a page of acl data. */
+		localpage = alloc_page(GFP_KERNEL);
+		resp_buf = page_address(localpage);
+		if (localpage == NULL)
+			return -ENOMEM;
+		args.acl_pages[0] = localpage;
+		args.acl_pgbase = 0;
+		args.acl_len = PAGE_SIZE;
+	} else {
+		resp_buf = buf;
+		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
+	}
+	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	if (ret)
+		goto out_free;
+	if (resp_len > args.acl_len)
+		nfs4_write_cached_acl(inode, NULL, resp_len);
+	else
+		nfs4_write_cached_acl(inode, resp_buf, resp_len);
+	if (buf) {
+		ret = -ERANGE;
+		if (resp_len > buflen)
+			goto out_free;
+		if (localpage)
+			memcpy(buf, resp_buf, resp_len);
+	}
+	ret = resp_len;
+out_free:
+	if (localpage)
+		__free_page(localpage);
+	return ret;
+}
+
+static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	int ret;
+
+	if (!nfs4_server_supports_acls(server))
+		return -EOPNOTSUPP;
+	ret = nfs_revalidate_inode(server, inode);
+	if (ret < 0)
+		return ret;
+	ret = nfs4_read_cached_acl(inode, buf, buflen);
+	if (ret != -ENOENT)
+		return ret;
+	return nfs4_get_acl_uncached(inode, buf, buflen);
+}
+
+static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct page *pages[NFS4ACL_MAXPAGES];
+	struct nfs_setaclargs arg = {
+		.fh		= NFS_FH(inode),
+		.acl_pages	= pages,
+		.acl_len	= buflen,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETACL],
+		.rpc_argp	= &arg,
+		.rpc_resp	= NULL,
+	};
+	int ret;
+
+	if (!nfs4_server_supports_acls(server))
+		return -EOPNOTSUPP;
+	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+	if (ret == 0)
+		nfs4_write_cached_acl(inode, buf, buflen);
+	return ret;
+}
+
 static int
 static int
 nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
 nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
 {
 {
@@ -2448,14 +2631,11 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	down_read(&clp->cl_sem);
 	down_read(&clp->cl_sem);
 	nlo.clientid = clp->cl_clientid;
 	nlo.clientid = clp->cl_clientid;
 	down(&state->lock_sema);
 	down(&state->lock_sema);
-	lsp = nfs4_find_lock_state(state, request->fl_owner);
-	if (lsp)
-		nlo.id = lsp->ls_id; 
-	else {
-		spin_lock(&clp->cl_lock);
-		nlo.id = nfs4_alloc_lockowner_id(clp);
-		spin_unlock(&clp->cl_lock);
-	}
+	status = nfs4_set_lock_state(state, request);
+	if (status != 0)
+		goto out;
+	lsp = request->fl_u.nfs4_fl.owner;
+	nlo.id = lsp->ls_id; 
 	arg.u.lockt = &nlo;
 	arg.u.lockt = &nlo;
 	status = rpc_call_sync(server->client, &msg, 0);
 	status = rpc_call_sync(server->client, &msg, 0);
 	if (!status) {
 	if (!status) {
@@ -2476,8 +2656,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		request->fl_pid = 0;
 		request->fl_pid = 0;
 		status = 0;
 		status = 0;
 	}
 	}
-	if (lsp)
-		nfs4_put_lock_state(lsp);
+out:
 	up(&state->lock_sema);
 	up(&state->lock_sema);
 	up_read(&clp->cl_sem);
 	up_read(&clp->cl_sem);
 	return status;
 	return status;
@@ -2537,28 +2716,26 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
 	};
 	};
 	struct nfs4_lock_state *lsp;
 	struct nfs4_lock_state *lsp;
 	struct nfs_locku_opargs luargs;
 	struct nfs_locku_opargs luargs;
-	int status = 0;
+	int status;
 			
 			
 	down_read(&clp->cl_sem);
 	down_read(&clp->cl_sem);
 	down(&state->lock_sema);
 	down(&state->lock_sema);
-	lsp = nfs4_find_lock_state(state, request->fl_owner);
-	if (!lsp)
+	status = nfs4_set_lock_state(state, request);
+	if (status != 0)
 		goto out;
 		goto out;
+	lsp = request->fl_u.nfs4_fl.owner;
 	/* We might have lost the locks! */
 	/* We might have lost the locks! */
-	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
-		luargs.seqid = lsp->ls_seqid;
-		memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
-		arg.u.locku = &luargs;
-		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-		nfs4_increment_lock_seqid(status, lsp);
-	}
+	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0)
+		goto out;
+	luargs.seqid = lsp->ls_seqid;
+	memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
+	arg.u.locku = &luargs;
+	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+	nfs4_increment_lock_seqid(status, lsp);
 
 
-	if (status == 0) {
+	if (status == 0)
 		memcpy(&lsp->ls_stateid,  &res.u.stateid, 
 		memcpy(&lsp->ls_stateid,  &res.u.stateid, 
 				sizeof(lsp->ls_stateid));
 				sizeof(lsp->ls_stateid));
-		nfs4_notify_unlck(state, request, lsp);
-	}
-	nfs4_put_lock_state(lsp);
 out:
 out:
 	up(&state->lock_sema);
 	up(&state->lock_sema);
 	if (status == 0)
 	if (status == 0)
@@ -2584,7 +2761,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 {
 {
 	struct inode *inode = state->inode;
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_lock_state *lsp;
+	struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
 	struct nfs_lockargs arg = {
 	struct nfs_lockargs arg = {
 		.fh = NFS_FH(inode),
 		.fh = NFS_FH(inode),
 		.type = nfs4_lck_type(cmd, request),
 		.type = nfs4_lck_type(cmd, request),
@@ -2606,9 +2783,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 	};
 	};
 	int status;
 	int status;
 
 
-	lsp = nfs4_get_lock_state(state, request->fl_owner);
-	if (lsp == NULL)
-		return -ENOMEM;
 	if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) {
 	if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) {
 		struct nfs4_state_owner *owner = state->owner;
 		struct nfs4_state_owner *owner = state->owner;
 		struct nfs_open_to_lock otl = {
 		struct nfs_open_to_lock otl = {
@@ -2630,38 +2804,57 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 		* seqid mutating errors */
 		* seqid mutating errors */
 		nfs4_increment_seqid(status, owner);
 		nfs4_increment_seqid(status, owner);
 		up(&owner->so_sema);
 		up(&owner->so_sema);
+		if (status == 0) {
+			lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+			lsp->ls_seqid++;
+		}
 	} else {
 	} else {
 		struct nfs_exist_lock el = {
 		struct nfs_exist_lock el = {
 			.seqid = lsp->ls_seqid,
 			.seqid = lsp->ls_seqid,
 		};
 		};
 		memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
 		memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
 		largs.u.exist_lock = &el;
 		largs.u.exist_lock = &el;
-		largs.new_lock_owner = 0;
 		arg.u.lock = &largs;
 		arg.u.lock = &largs;
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+		/* increment seqid on success, and * seqid mutating errors*/
+		nfs4_increment_lock_seqid(status, lsp);
 	}
 	}
-	/* increment seqid on success, and * seqid mutating errors*/
-	nfs4_increment_lock_seqid(status, lsp);
 	/* save the returned stateid. */
 	/* save the returned stateid. */
-	if (status == 0) {
+	if (status == 0)
 		memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid));
 		memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid));
-		lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-		if (!reclaim)
-			nfs4_notify_setlk(state, request, lsp);
-	} else if (status == -NFS4ERR_DENIED)
+	else if (status == -NFS4ERR_DENIED)
 		status = -EAGAIN;
 		status = -EAGAIN;
-	nfs4_put_lock_state(lsp);
 	return status;
 	return status;
 }
 }
 
 
 static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
 static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
 {
 {
-	return _nfs4_do_setlk(state, F_SETLK, request, 1);
+	struct nfs_server *server = NFS_SERVER(state->inode);
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs4_do_setlk(state, F_SETLK, request, 1);
+		if (err != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
 }
 }
 
 
 static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
 static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
 {
 {
-	return _nfs4_do_setlk(state, F_SETLK, request, 0);
+	struct nfs_server *server = NFS_SERVER(state->inode);
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs4_do_setlk(state, F_SETLK, request, 0);
+		if (err != -NFS4ERR_DELAY)
+			break;
+		nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+	return err;
 }
 }
 
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -2671,7 +2864,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 
 
 	down_read(&clp->cl_sem);
 	down_read(&clp->cl_sem);
 	down(&state->lock_sema);
 	down(&state->lock_sema);
-	status = _nfs4_do_setlk(state, cmd, request, 0);
+	status = nfs4_set_lock_state(state, request);
+	if (status == 0)
+		status = _nfs4_do_setlk(state, cmd, request, 0);
 	up(&state->lock_sema);
 	up(&state->lock_sema);
 	if (status == 0) {
 	if (status == 0) {
 		/* Note: we always want to sleep here! */
 		/* Note: we always want to sleep here! */
@@ -2729,10 +2924,53 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
 		if (signalled())
 		if (signalled())
 			break;
 			break;
 	} while(status < 0);
 	} while(status < 0);
-
 	return status;
 	return status;
 }
 }
 
 
+
+#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+
+int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
+		size_t buflen, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+
+	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
+		return -EOPNOTSUPP;
+
+	if (!S_ISREG(inode->i_mode) &&
+	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+		return -EPERM;
+
+	return nfs4_proc_set_acl(inode, buf, buflen);
+}
+
+/* The getxattr man page suggests returning -ENODATA for unknown attributes,
+ * and that's what we'll do for e.g. user attributes that haven't been set.
+ * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
+ * attributes in kernel-managed attribute namespaces. */
+ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
+		size_t buflen)
+{
+	struct inode *inode = dentry->d_inode;
+
+	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
+		return -EOPNOTSUPP;
+
+	return nfs4_proc_get_acl(inode, buf, buflen);
+}
+
+ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
+{
+	size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
+
+	if (buf && buflen < len)
+		return -ERANGE;
+	if (buf)
+		memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
+	return len;
+}
+
 struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
 struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
 	.recover_open	= nfs4_open_reclaim,
 	.recover_open	= nfs4_open_reclaim,
 	.recover_lock	= nfs4_lock_reclaim,
 	.recover_lock	= nfs4_lock_reclaim,
@@ -2743,10 +2981,20 @@ struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = {
 	.recover_lock	= nfs4_lock_expired,
 	.recover_lock	= nfs4_lock_expired,
 };
 };
 
 
+static struct inode_operations nfs4_file_inode_operations = {
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+	.getxattr	= nfs4_getxattr,
+	.setxattr	= nfs4_setxattr,
+	.listxattr	= nfs4_listxattr,
+};
+
 struct nfs_rpc_ops	nfs_v4_clientops = {
 struct nfs_rpc_ops	nfs_v4_clientops = {
 	.version	= 4,			/* protocol version */
 	.version	= 4,			/* protocol version */
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
+	.file_inode_ops	= &nfs4_file_inode_operations,
 	.getroot	= nfs4_proc_get_root,
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
 	.setattr	= nfs4_proc_setattr,
@@ -2777,6 +3025,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.file_open      = nfs4_proc_file_open,
 	.file_open      = nfs4_proc_file_open,
 	.file_release   = nfs4_proc_file_release,
 	.file_release   = nfs4_proc_file_release,
 	.lock		= nfs4_proc_lock,
 	.lock		= nfs4_proc_lock,
+	.clear_acl_cache = nfs4_zap_acl_attr,
 };
 };
 
 
 /*
 /*

+ 1 - 0
fs/nfs/nfs4renewd.c

@@ -53,6 +53,7 @@
 #include <linux/nfs.h>
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
 
 
 #define NFSDBG_FACILITY	NFSDBG_PROC
 #define NFSDBG_FACILITY	NFSDBG_PROC
 
 

+ 84 - 109
fs/nfs/nfs4state.c

@@ -46,24 +46,18 @@
 #include <linux/workqueue.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 #include <linux/bitops.h>
 
 
+#include "nfs4_fs.h"
 #include "callback.h"
 #include "callback.h"
 #include "delegation.h"
 #include "delegation.h"
 
 
 #define OPENOWNER_POOL_SIZE	8
 #define OPENOWNER_POOL_SIZE	8
 
 
-static DEFINE_SPINLOCK(state_spinlock);
-
-nfs4_stateid zero_stateid;
-
-#if 0
-nfs4_stateid one_stateid =
-	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-#endif
+const nfs4_stateid zero_stateid;
 
 
+static DEFINE_SPINLOCK(state_spinlock);
 static LIST_HEAD(nfs4_clientid_list);
 static LIST_HEAD(nfs4_clientid_list);
 
 
 static void nfs4_recover_state(void *);
 static void nfs4_recover_state(void *);
-extern void nfs4_renew_state(void *);
 
 
 void
 void
 init_nfsv4_state(struct nfs_server *server)
 init_nfsv4_state(struct nfs_server *server)
@@ -116,6 +110,7 @@ nfs4_alloc_client(struct in_addr *addr)
 	INIT_LIST_HEAD(&clp->cl_superblocks);
 	INIT_LIST_HEAD(&clp->cl_superblocks);
 	init_waitqueue_head(&clp->cl_waitq);
 	init_waitqueue_head(&clp->cl_waitq);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
+	clp->cl_rpcclient = ERR_PTR(-EINVAL);
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_OK;
 	clp->cl_state = 1 << NFS4CLNT_OK;
 	return clp;
 	return clp;
@@ -137,7 +132,7 @@ nfs4_free_client(struct nfs4_client *clp)
 	if (clp->cl_cred)
 	if (clp->cl_cred)
 		put_rpccred(clp->cl_cred);
 		put_rpccred(clp->cl_cred);
 	nfs_idmap_delete(clp);
 	nfs_idmap_delete(clp);
-	if (clp->cl_rpcclient)
+	if (!IS_ERR(clp->cl_rpcclient))
 		rpc_shutdown_client(clp->cl_rpcclient);
 		rpc_shutdown_client(clp->cl_rpcclient);
 	kfree(clp);
 	kfree(clp);
 	nfs_callback_down();
 	nfs_callback_down();
@@ -365,7 +360,7 @@ nfs4_alloc_open_state(void)
 	atomic_set(&state->count, 1);
 	atomic_set(&state->count, 1);
 	INIT_LIST_HEAD(&state->lock_states);
 	INIT_LIST_HEAD(&state->lock_states);
 	init_MUTEX(&state->lock_sema);
 	init_MUTEX(&state->lock_sema);
-	rwlock_init(&state->state_lock);
+	spin_lock_init(&state->state_lock);
 	return state;
 	return state;
 }
 }
 
 
@@ -547,16 +542,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 	return NULL;
 	return NULL;
 }
 }
 
 
-struct nfs4_lock_state *
-nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
-{
-	struct nfs4_lock_state *lsp;
-	read_lock(&state->state_lock);
-	lsp = __nfs4_find_lock_state(state, fl_owner);
-	read_unlock(&state->state_lock);
-	return lsp;
-}
-
 /*
 /*
  * Return a compatible lock_state. If no initialized lock_state structure
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
  * exists, return an uninitialized one.
@@ -573,14 +558,13 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 		return NULL;
 		return NULL;
 	lsp->ls_flags = 0;
 	lsp->ls_flags = 0;
 	lsp->ls_seqid = 0;	/* arbitrary */
 	lsp->ls_seqid = 0;	/* arbitrary */
-	lsp->ls_id = -1; 
 	memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
 	memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
 	atomic_set(&lsp->ls_count, 1);
 	atomic_set(&lsp->ls_count, 1);
 	lsp->ls_owner = fl_owner;
 	lsp->ls_owner = fl_owner;
-	INIT_LIST_HEAD(&lsp->ls_locks);
 	spin_lock(&clp->cl_lock);
 	spin_lock(&clp->cl_lock);
 	lsp->ls_id = nfs4_alloc_lockowner_id(clp);
 	lsp->ls_id = nfs4_alloc_lockowner_id(clp);
 	spin_unlock(&clp->cl_lock);
 	spin_unlock(&clp->cl_lock);
+	INIT_LIST_HEAD(&lsp->ls_locks);
 	return lsp;
 	return lsp;
 }
 }
 
 
@@ -590,121 +574,112 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
  *
  *
  * The caller must be holding state->lock_sema and clp->cl_sem
  * The caller must be holding state->lock_sema and clp->cl_sem
  */
  */
-struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
+static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
 {
 {
-	struct nfs4_lock_state * lsp;
+	struct nfs4_lock_state *lsp, *new = NULL;
 	
 	
-	lsp = nfs4_find_lock_state(state, owner);
-	if (lsp == NULL)
-		lsp = nfs4_alloc_lock_state(state, owner);
+	for(;;) {
+		spin_lock(&state->state_lock);
+		lsp = __nfs4_find_lock_state(state, owner);
+		if (lsp != NULL)
+			break;
+		if (new != NULL) {
+			new->ls_state = state;
+			list_add(&new->ls_locks, &state->lock_states);
+			set_bit(LK_STATE_IN_USE, &state->flags);
+			lsp = new;
+			new = NULL;
+			break;
+		}
+		spin_unlock(&state->state_lock);
+		new = nfs4_alloc_lock_state(state, owner);
+		if (new == NULL)
+			return NULL;
+	}
+	spin_unlock(&state->state_lock);
+	kfree(new);
 	return lsp;
 	return lsp;
 }
 }
 
 
 /*
 /*
- * Byte-range lock aware utility to initialize the stateid of read/write
- * requests.
+ * Release reference to lock_state, and free it if we see that
+ * it is no longer in use
  */
  */
-void
-nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
+static void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 {
 {
-	if (test_bit(LK_STATE_IN_USE, &state->flags)) {
-		struct nfs4_lock_state *lsp;
+	struct nfs4_state *state;
 
 
-		lsp = nfs4_find_lock_state(state, fl_owner);
-		if (lsp) {
-			memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
-			nfs4_put_lock_state(lsp);
-			return;
-		}
-	}
-	memcpy(dst, &state->stateid, sizeof(*dst));
+	if (lsp == NULL)
+		return;
+	state = lsp->ls_state;
+	if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock))
+		return;
+	list_del(&lsp->ls_locks);
+	if (list_empty(&state->lock_states))
+		clear_bit(LK_STATE_IN_USE, &state->flags);
+	spin_unlock(&state->state_lock);
+	kfree(lsp);
 }
 }
 
 
-/*
-* Called with state->lock_sema and clp->cl_sem held.
-*/
-void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
+static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 {
 {
-	if (status == NFS_OK || seqid_mutating_err(-status))
-		lsp->ls_seqid++;
-}
+	struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
 
 
-/* 
-* Check to see if the request lock (type FL_UNLK) effects the fl lock.
-*
-* fl and request must have the same posix owner
-*
-* return: 
-* 0 -> fl not effected by request
-* 1 -> fl consumed by request
-*/
+	dst->fl_u.nfs4_fl.owner = lsp;
+	atomic_inc(&lsp->ls_count);
+}
 
 
-static int
-nfs4_check_unlock(struct file_lock *fl, struct file_lock *request)
+static void nfs4_fl_release_lock(struct file_lock *fl)
 {
 {
-	if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end)
-		return 1;
-	return 0;
+	nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
 }
 }
 
 
-/*
- * Post an initialized lock_state on the state->lock_states list.
- */
-void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+static struct file_lock_operations nfs4_fl_lock_ops = {
+	.fl_copy_lock = nfs4_fl_copy_lock,
+	.fl_release_private = nfs4_fl_release_lock,
+};
+
+int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
 {
 {
-	if (!list_empty(&lsp->ls_locks))
-		return;
-	atomic_inc(&lsp->ls_count);
-	write_lock(&state->state_lock);
-	list_add(&lsp->ls_locks, &state->lock_states);
-	set_bit(LK_STATE_IN_USE, &state->flags);
-	write_unlock(&state->state_lock);
+	struct nfs4_lock_state *lsp;
+
+	if (fl->fl_ops != NULL)
+		return 0;
+	lsp = nfs4_get_lock_state(state, fl->fl_owner);
+	if (lsp == NULL)
+		return -ENOMEM;
+	fl->fl_u.nfs4_fl.owner = lsp;
+	fl->fl_ops = &nfs4_fl_lock_ops;
+	return 0;
 }
 }
 
 
-/* 
- * to decide to 'reap' lock state:
- * 1) search i_flock for file_locks with fl.lock_state = to ls.
- * 2) determine if unlock will consume found lock. 
- * 	if so, reap
- *
- * 	else, don't reap.
- *
+/*
+ * Byte-range lock aware utility to initialize the stateid of read/write
+ * requests.
  */
  */
-void
-nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
 {
 {
-	struct inode *inode = state->inode;
-	struct file_lock *fl;
+	struct nfs4_lock_state *lsp;
 
 
-	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
-		if (!(fl->fl_flags & FL_POSIX))
-			continue;
-		if (fl->fl_owner != lsp->ls_owner)
-			continue;
-		/* Exit if we find at least one lock which is not consumed */
-		if (nfs4_check_unlock(fl,request) == 0)
-			return;
-	}
+	memcpy(dst, &state->stateid, sizeof(*dst));
+	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
+		return;
 
 
-	write_lock(&state->state_lock);
-	list_del_init(&lsp->ls_locks);
-	if (list_empty(&state->lock_states))
-		clear_bit(LK_STATE_IN_USE, &state->flags);
-	write_unlock(&state->state_lock);
+	spin_lock(&state->state_lock);
+	lsp = __nfs4_find_lock_state(state, fl_owner);
+	if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+		memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+	spin_unlock(&state->state_lock);
 	nfs4_put_lock_state(lsp);
 	nfs4_put_lock_state(lsp);
 }
 }
 
 
 /*
 /*
- * Release reference to lock_state, and free it if we see that
- * it is no longer in use
- */
-void
-nfs4_put_lock_state(struct nfs4_lock_state *lsp)
+* Called with state->lock_sema and clp->cl_sem held.
+*/
+void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
 {
 {
-	if (!atomic_dec_and_test(&lsp->ls_count))
-		return;
-	BUG_ON (!list_empty(&lsp->ls_locks));
-	kfree(lsp);
+	if (status == NFS_OK || seqid_mutating_err(-status))
+		lsp->ls_seqid++;
 }
 }
 
 
 /*
 /*

+ 219 - 22
fs/nfs/nfs4xdr.c

@@ -51,6 +51,7 @@
 #include <linux/nfs4.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
 #include <linux/nfs_idmap.h>
+#include "nfs4_fs.h"
 
 
 #define NFSDBG_FACILITY		NFSDBG_XDR
 #define NFSDBG_FACILITY		NFSDBG_XDR
 
 
@@ -82,12 +83,16 @@ static int nfs_stat_to_errno(int);
 #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
 #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
 #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
 #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
 				((3+NFS4_FHSIZE) >> 2))
 				((3+NFS4_FHSIZE) >> 2))
-#define encode_getattr_maxsz    (op_encode_hdr_maxsz + 3)
+#define nfs4_fattr_bitmap_maxsz 3
+#define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
-#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz)
-#define decode_getattr_maxsz    (op_decode_hdr_maxsz + 3 + \
-                                nfs4_fattr_bitmap_maxsz)
+/* This is based on getfattr, which uses the most attributes: */
+#define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
+				3 + 3 + 3 + 2 * nfs4_name_maxsz))
+#define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \
+				nfs4_fattr_value_maxsz)
+#define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
 #define encode_savefh_maxsz     (op_encode_hdr_maxsz)
 #define encode_savefh_maxsz     (op_encode_hdr_maxsz)
 #define decode_savefh_maxsz     (op_decode_hdr_maxsz)
 #define decode_savefh_maxsz     (op_decode_hdr_maxsz)
 #define encode_fsinfo_maxsz	(op_encode_hdr_maxsz + 2)
 #define encode_fsinfo_maxsz	(op_encode_hdr_maxsz + 2)
@@ -122,11 +127,11 @@ static int nfs_stat_to_errno(int);
 #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
 #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
 				1 + nfs4_name_maxsz + \
 				1 + nfs4_name_maxsz + \
 				nfs4_path_maxsz + \
 				nfs4_path_maxsz + \
-				nfs4_fattr_bitmap_maxsz)
+				nfs4_fattr_maxsz)
 #define decode_symlink_maxsz	(op_decode_hdr_maxsz + 8)
 #define decode_symlink_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
 				2 + nfs4_name_maxsz + \
 				2 + nfs4_name_maxsz + \
-				nfs4_fattr_bitmap_maxsz)
+				nfs4_fattr_maxsz)
 #define decode_create_maxsz	(op_decode_hdr_maxsz + 8)
 #define decode_create_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
 #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
@@ -205,7 +210,7 @@ static int nfs_stat_to_errno(int);
 #define NFS4_enc_setattr_sz     (compound_encode_hdr_maxsz + \
 #define NFS4_enc_setattr_sz     (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
                                 encode_putfh_maxsz + \
                                 op_encode_hdr_maxsz + 4 + \
                                 op_encode_hdr_maxsz + 4 + \
-                                nfs4_fattr_bitmap_maxsz + \
+                                nfs4_fattr_maxsz + \
                                 encode_getattr_maxsz)
                                 encode_getattr_maxsz)
 #define NFS4_dec_setattr_sz     (compound_decode_hdr_maxsz + \
 #define NFS4_dec_setattr_sz     (compound_decode_hdr_maxsz + \
                                 decode_putfh_maxsz + \
                                 decode_putfh_maxsz + \
@@ -360,6 +365,20 @@ static int nfs_stat_to_errno(int);
 				encode_delegreturn_maxsz)
 				encode_delegreturn_maxsz)
 #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
 #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
 				decode_delegreturn_maxsz)
 				decode_delegreturn_maxsz)
+#define NFS4_enc_getacl_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_getattr_maxsz)
+#define NFS4_dec_getacl_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + \
+				nfs4_fattr_bitmap_maxsz + 1)
+#define NFS4_enc_setacl_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				op_encode_hdr_maxsz + 4 + \
+				nfs4_fattr_bitmap_maxsz + 1)
+#define NFS4_dec_setacl_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 
 
 static struct {
 static struct {
 	unsigned int	mode;
 	unsigned int	mode;
@@ -459,7 +478,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 	 * In the worst-case, this would be
 	 * In the worst-case, this would be
 	 *   12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
 	 *   12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
 	 *          = 36 bytes, plus any contribution from variable-length fields
 	 *          = 36 bytes, plus any contribution from variable-length fields
-	 *            such as owner/group/acl's.
+	 *            such as owner/group.
 	 */
 	 */
 	len = 16;
 	len = 16;
 
 
@@ -660,8 +679,6 @@ static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1
 
 
 static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask)
 static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask)
 {
 {
-	extern u32 nfs4_fattr_bitmap[];
-
 	return encode_getattr_two(xdr,
 	return encode_getattr_two(xdr,
 			bitmask[0] & nfs4_fattr_bitmap[0],
 			bitmask[0] & nfs4_fattr_bitmap[0],
 			bitmask[1] & nfs4_fattr_bitmap[1]);
 			bitmask[1] & nfs4_fattr_bitmap[1]);
@@ -669,8 +686,6 @@ static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask)
 
 
 static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask)
 static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask)
 {
 {
-	extern u32 nfs4_fsinfo_bitmap[];
-
 	return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
 	return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
 			bitmask[1] & nfs4_fsinfo_bitmap[1]);
 			bitmask[1] & nfs4_fsinfo_bitmap[1]);
 }
 }
@@ -969,7 +984,6 @@ static int encode_putrootfh(struct xdr_stream *xdr)
 
 
 static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
 static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
 {
 {
-	extern nfs4_stateid zero_stateid;
 	nfs4_stateid stateid;
 	nfs4_stateid stateid;
 	uint32_t *p;
 	uint32_t *p;
 
 
@@ -1000,6 +1014,10 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
 static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
 static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
 {
 {
 	struct rpc_auth *auth = req->rq_task->tk_auth;
 	struct rpc_auth *auth = req->rq_task->tk_auth;
+	uint32_t attrs[2] = {
+		FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
+		FATTR4_WORD1_MOUNTED_ON_FILEID,
+	};
 	int replen;
 	int replen;
 	uint32_t *p;
 	uint32_t *p;
 
 
@@ -1010,13 +1028,20 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 	WRITE32(readdir->count >> 1);  /* We're not doing readdirplus */
 	WRITE32(readdir->count >> 1);  /* We're not doing readdirplus */
 	WRITE32(readdir->count);
 	WRITE32(readdir->count);
 	WRITE32(2);
 	WRITE32(2);
-	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) {
-		WRITE32(0);
-		WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID);
-	} else {
-		WRITE32(FATTR4_WORD0_FILEID);
-		WRITE32(0);
-	}
+	/* Switch to mounted_on_fileid if the server supports it */
+	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
+		attrs[0] &= ~FATTR4_WORD0_FILEID;
+	else
+		attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+	WRITE32(attrs[0] & readdir->bitmask[0]);
+	WRITE32(attrs[1] & readdir->bitmask[1]);
+	dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n",
+			__FUNCTION__,
+			(unsigned long long)readdir->cookie,
+			((u32 *)readdir->verifier.data)[0],
+			((u32 *)readdir->verifier.data)[1],
+			attrs[0] & readdir->bitmask[0],
+			attrs[1] & readdir->bitmask[1]);
 
 
 	/* set up reply kvec
 	/* set up reply kvec
 	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
 	 *    toplevel_status + taglen + rescount + OP_PUTFH + status
@@ -1025,6 +1050,9 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2;
 	replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2;
 	xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages,
 	xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages,
 			 readdir->pgbase, readdir->count);
 			 readdir->pgbase, readdir->count);
+	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
+			__FUNCTION__, replen, readdir->pages,
+			readdir->pgbase, readdir->count);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1088,6 +1116,25 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client
 	return 0;
 	return 0;
 }
 }
 
 
+static int
+encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4+sizeof(zero_stateid.data));
+	WRITE32(OP_SETATTR);
+	WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
+	RESERVE_SPACE(2*4);
+	WRITE32(1);
+	WRITE32(FATTR4_WORD0_ACL);
+	if (arg->acl_len % 4)
+		return -EINVAL;
+	RESERVE_SPACE(4);
+	WRITE32(arg->acl_len);
+	xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
+	return 0;
+}
+
 static int
 static int
 encode_savefh(struct xdr_stream *xdr)
 encode_savefh(struct xdr_stream *xdr)
 {
 {
@@ -1631,6 +1678,34 @@ out:
         return status;
         return status;
 }
 }
 
 
+/*
+ * Encode a GETACL request
+ */
+static int
+nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,
+		struct nfs_getaclargs *args)
+{
+	struct xdr_stream xdr;
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct compound_hdr hdr = {
+		.nops   = 2,
+	};
+	int replen, status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0);
+	/* set up reply buffer: */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+		args->acl_pages, args->acl_pgbase, args->acl_len);
+out:
+	return status;
+}
+
 /*
 /*
  * Encode a WRITE request
  * Encode a WRITE request
  */
  */
@@ -1697,7 +1772,6 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs
  */
  */
 static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args)
 static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args)
 {
 {
-	extern u32 nfs4_pathconf_bitmap[2];
 	struct xdr_stream xdr;
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 	struct compound_hdr hdr = {
 		.nops = 2,
 		.nops = 2,
@@ -1718,7 +1792,6 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct
  */
  */
 static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args)
 static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args)
 {
 {
-	extern u32 nfs4_statfs_bitmap[];
 	struct xdr_stream xdr;
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 	struct compound_hdr hdr = {
 		.nops = 2,
 		.nops = 2,
@@ -3003,6 +3076,11 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 		return status;
 		return status;
 	READ_BUF(8);
 	READ_BUF(8);
 	COPYMEM(readdir->verifier.data, 8);
 	COPYMEM(readdir->verifier.data, 8);
+	dprintk("%s: verifier = 0x%x%x\n",
+			__FUNCTION__,
+			((u32 *)readdir->verifier.data)[0],
+			((u32 *)readdir->verifier.data)[1]);
+
 
 
 	hdrlen = (char *) p - (char *) iov->iov_base;
 	hdrlen = (char *) p - (char *) iov->iov_base;
 	recvd = rcvbuf->len - hdrlen;
 	recvd = rcvbuf->len - hdrlen;
@@ -3017,12 +3095,14 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	for (nr = 0; *p++; nr++) {
 	for (nr = 0; *p++; nr++) {
 		if (p + 3 > end)
 		if (p + 3 > end)
 			goto short_pkt;
 			goto short_pkt;
+		dprintk("cookie = %Lu, ", *((unsigned long long *)p));
 		p += 2;			/* cookie */
 		p += 2;			/* cookie */
 		len = ntohl(*p++);	/* filename length */
 		len = ntohl(*p++);	/* filename length */
 		if (len > NFS4_MAXNAMLEN) {
 		if (len > NFS4_MAXNAMLEN) {
 			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
 			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
 			goto err_unmap;
 			goto err_unmap;
 		}
 		}
+		dprintk("filename = %*s\n", len, (char *)p);
 		p += XDR_QUADLEN(len);
 		p += XDR_QUADLEN(len);
 		if (p + 1 > end)
 		if (p + 1 > end)
 			goto short_pkt;
 			goto short_pkt;
@@ -3042,6 +3122,7 @@ out:
 	kunmap_atomic(kaddr, KM_USER0);
 	kunmap_atomic(kaddr, KM_USER0);
 	return 0;
 	return 0;
 short_pkt:
 short_pkt:
+	dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr);
 	entry[0] = entry[1] = 0;
 	entry[0] = entry[1] = 0;
 	/* truncate listing ? */
 	/* truncate listing ? */
 	if (!nr) {
 	if (!nr) {
@@ -3127,6 +3208,47 @@ static int decode_renew(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_RENEW);
 	return decode_op_hdr(xdr, OP_RENEW);
 }
 }
 
 
+static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
+		size_t *acl_len)
+{
+	uint32_t *savep;
+	uint32_t attrlen,
+		 bitmap[2] = {0};
+	struct kvec *iov = req->rq_rcv_buf.head;
+	int status;
+
+	*acl_len = 0;
+	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+		goto out;
+	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+		goto out;
+	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+		goto out;
+
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
+		int hdrlen, recvd;
+
+		/* We ignore &savep and don't do consistency checks on
+		 * the attr length.  Let userspace figure it out.... */
+		hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
+		recvd = req->rq_rcv_buf.len - hdrlen;
+		if (attrlen > recvd) {
+			printk(KERN_WARNING "NFS: server cheating in getattr"
+					" acl reply: attrlen %u > recvd %u\n",
+					attrlen, recvd);
+			return -EINVAL;
+		}
+		if (attrlen <= *acl_len)
+			xdr_read_pages(xdr, attrlen);
+		*acl_len = attrlen;
+	}
+
+out:
+	return status;
+}
+
 static int
 static int
 decode_savefh(struct xdr_stream *xdr)
 decode_savefh(struct xdr_stream *xdr)
 {
 {
@@ -3418,6 +3540,71 @@ out:
 
 
 }
 }
 
 
+/*
+ * Encode an SETACL request
+ */
+static int
+nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr = {
+                .nops   = 2,
+        };
+        int status;
+
+        xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+        encode_compound_hdr(&xdr, &hdr);
+        status = encode_putfh(&xdr, args->fh);
+        if (status)
+                goto out;
+        status = encode_setacl(&xdr, args);
+out:
+        return status;
+}
+/*
+ * Decode SETACL response
+ */
+static int
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_setattr(&xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode GETACL response
+ */
+static int
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, size_t *acl_len)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_getacl(&xdr, rqstp, acl_len);
+
+out:
+	return status;
+}
 
 
 /*
 /*
  * Decode CLOSE response
  * Decode CLOSE response
@@ -3895,6 +4082,12 @@ uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus)
 	}
 	}
 	len = XDR_QUADLEN(ntohl(*p++));	/* attribute buffer length */
 	len = XDR_QUADLEN(ntohl(*p++));	/* attribute buffer length */
 	if (len > 0) {
 	if (len > 0) {
+		if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) {
+			bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
+			/* Ignore the return value of rdattr_error for now */
+			p++;
+			len--;
+		}
 		if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID)
 		if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID)
 			xdr_decode_hyper(p, &entry->ino);
 			xdr_decode_hyper(p, &entry->ino);
 		else if (bitmap[0] == FATTR4_WORD0_FILEID)
 		else if (bitmap[0] == FATTR4_WORD0_FILEID)
@@ -3934,6 +4127,8 @@ static struct {
 	{ NFS4ERR_DQUOT,	EDQUOT		},
 	{ NFS4ERR_DQUOT,	EDQUOT		},
 	{ NFS4ERR_STALE,	ESTALE		},
 	{ NFS4ERR_STALE,	ESTALE		},
 	{ NFS4ERR_BADHANDLE,	EBADHANDLE	},
 	{ NFS4ERR_BADHANDLE,	EBADHANDLE	},
+	{ NFS4ERR_BADOWNER,	EINVAL		},
+	{ NFS4ERR_BADNAME,	EINVAL		},
 	{ NFS4ERR_BAD_COOKIE,	EBADCOOKIE	},
 	{ NFS4ERR_BAD_COOKIE,	EBADCOOKIE	},
 	{ NFS4ERR_NOTSUPP,	ENOTSUPP	},
 	{ NFS4ERR_NOTSUPP,	ENOTSUPP	},
 	{ NFS4ERR_TOOSMALL,	ETOOSMALL	},
 	{ NFS4ERR_TOOSMALL,	ETOOSMALL	},
@@ -4019,6 +4214,8 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(READDIR,		enc_readdir,	dec_readdir),
   PROC(READDIR,		enc_readdir,	dec_readdir),
   PROC(SERVER_CAPS,	enc_server_caps, dec_server_caps),
   PROC(SERVER_CAPS,	enc_server_caps, dec_server_caps),
   PROC(DELEGRETURN,	enc_delegreturn, dec_delegreturn),
   PROC(DELEGRETURN,	enc_delegreturn, dec_delegreturn),
+  PROC(GETACL,		enc_getacl,	dec_getacl),
+  PROC(SETACL,		enc_setacl,	dec_setacl),
 };
 };
 
 
 struct rpc_version		nfs_version4 = {
 struct rpc_version		nfs_version4 = {

+ 9 - 0
fs/nfs/nfsroot.c

@@ -124,6 +124,7 @@ enum {
 	Opt_soft, Opt_hard, Opt_intr,
 	Opt_soft, Opt_hard, Opt_intr,
 	Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
 	Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
 	Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
 	Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
+	Opt_acl, Opt_noacl,
 	/* Error token */
 	/* Error token */
 	Opt_err
 	Opt_err
 };
 };
@@ -158,6 +159,8 @@ static match_table_t __initdata tokens = {
 	{Opt_udp, "udp"},
 	{Opt_udp, "udp"},
 	{Opt_tcp, "proto=tcp"},
 	{Opt_tcp, "proto=tcp"},
 	{Opt_tcp, "tcp"},
 	{Opt_tcp, "tcp"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
 	{Opt_err, NULL}
 	{Opt_err, NULL}
 	
 	
 };
 };
@@ -266,6 +269,12 @@ static int __init root_nfs_parse(char *name, char *buf)
 			case Opt_tcp:
 			case Opt_tcp:
 				nfs_data.flags |= NFS_MOUNT_TCP;
 				nfs_data.flags |= NFS_MOUNT_TCP;
 				break;
 				break;
+			case Opt_acl:
+				nfs_data.flags &= ~NFS_MOUNT_NOACL;
+				break;
+			case Opt_noacl:
+				nfs_data.flags |= NFS_MOUNT_NOACL;
+				break;
 			default : 
 			default : 
 				return 0;
 				return 0;
 		}
 		}

+ 108 - 34
fs/nfs/pagelist.c

@@ -107,10 +107,37 @@ void nfs_unlock_request(struct nfs_page *req)
 	smp_mb__before_clear_bit();
 	smp_mb__before_clear_bit();
 	clear_bit(PG_BUSY, &req->wb_flags);
 	clear_bit(PG_BUSY, &req->wb_flags);
 	smp_mb__after_clear_bit();
 	smp_mb__after_clear_bit();
-	wake_up_all(&req->wb_context->waitq);
+	wake_up_bit(&req->wb_flags, PG_BUSY);
 	nfs_release_request(req);
 	nfs_release_request(req);
 }
 }
 
 
+/**
+ * nfs_set_page_writeback_locked - Lock a request for writeback
+ * @req:
+ */
+int nfs_set_page_writeback_locked(struct nfs_page *req)
+{
+	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+
+	if (!nfs_lock_request(req))
+		return 0;
+	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+	return 1;
+}
+
+/**
+ * nfs_clear_page_writeback - Unlock request and wake up sleepers
+ */
+void nfs_clear_page_writeback(struct nfs_page *req)
+{
+	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+
+	spin_lock(&nfsi->req_lock);
+	radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+	spin_unlock(&nfsi->req_lock);
+	nfs_unlock_request(req);
+}
+
 /**
 /**
  * nfs_clear_request - Free up all resources allocated to the request
  * nfs_clear_request - Free up all resources allocated to the request
  * @req:
  * @req:
@@ -150,34 +177,15 @@ nfs_release_request(struct nfs_page *req)
 	nfs_page_free(req);
 	nfs_page_free(req);
 }
 }
 
 
-/**
- * nfs_list_add_request - Insert a request into a sorted list
- * @req: request
- * @head: head of list into which to insert the request.
- *
- * Note that the wb_list is sorted by page index in order to facilitate
- * coalescing of requests.
- * We use an insertion sort that is optimized for the case of appended
- * writes.
- */
-void
-nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+static int nfs_wait_bit_interruptible(void *word)
 {
 {
-	struct list_head *pos;
+	int ret = 0;
 
 
-#ifdef NFS_PARANOIA
-	if (!list_empty(&req->wb_list)) {
-		printk(KERN_ERR "NFS: Add to list failed!\n");
-		BUG();
-	}
-#endif
-	list_for_each_prev(pos, head) {
-		struct nfs_page	*p = nfs_list_entry(pos);
-		if (p->wb_index < req->wb_index)
-			break;
-	}
-	list_add(&req->wb_list, pos);
-	req->wb_list_head = head;
+	if (signal_pending(current))
+		ret = -ERESTARTSYS;
+	else
+		schedule();
+	return ret;
 }
 }
 
 
 /**
 /**
@@ -190,12 +198,22 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
 int
 int
 nfs_wait_on_request(struct nfs_page *req)
 nfs_wait_on_request(struct nfs_page *req)
 {
 {
-	struct inode	*inode = req->wb_context->dentry->d_inode;
-        struct rpc_clnt	*clnt = NFS_CLIENT(inode);
-
-	if (!NFS_WBACK_BUSY(req))
-		return 0;
-	return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req));
+        struct rpc_clnt	*clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
+	sigset_t oldmask;
+	int ret = 0;
+
+	if (!test_bit(PG_BUSY, &req->wb_flags))
+		goto out;
+	/*
+	 * Note: the call to rpc_clnt_sigmask() suffices to ensure that we
+	 *	 are not interrupted if intr flag is not set
+	 */
+	rpc_clnt_sigmask(clnt, &oldmask);
+	ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
+			nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE);
+	rpc_clnt_sigunmask(clnt, &oldmask);
+out:
+	return ret;
 }
 }
 
 
 /**
 /**
@@ -243,6 +261,62 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
 	return npages;
 	return npages;
 }
 }
 
 
+#define NFS_SCAN_MAXENTRIES 16
+/**
+ * nfs_scan_lock_dirty - Scan the radix tree for dirty requests
+ * @nfsi: NFS inode
+ * @dst: Destination list
+ * @idx_start: lower bound of page->index to scan
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+ * Moves elements from one of the inode request lists.
+ * If the number of requests is set to 0, the entire address_space
+ * starting at index idx_start, is scanned.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ * You must be holding the inode's req_lock when calling this function
+ */
+int
+nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
+	      unsigned long idx_start, unsigned int npages)
+{
+	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+	struct nfs_page *req;
+	unsigned long idx_end;
+	int found, i;
+	int res;
+
+	res = 0;
+	if (npages == 0)
+		idx_end = ~0;
+	else
+		idx_end = idx_start + npages - 1;
+
+	for (;;) {
+		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
+				(void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES,
+				NFS_PAGE_TAG_DIRTY);
+		if (found <= 0)
+			break;
+		for (i = 0; i < found; i++) {
+			req = pgvec[i];
+			if (req->wb_index > idx_end)
+				goto out;
+
+			idx_start = req->wb_index + 1;
+
+			if (nfs_set_page_writeback_locked(req)) {
+				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+						req->wb_index, NFS_PAGE_TAG_DIRTY);
+				nfs_list_remove_request(req);
+				nfs_list_add_request(req, dst);
+				res++;
+			}
+		}
+	}
+out:
+	return res;
+}
+
 /**
 /**
  * nfs_scan_list - Scan a list for matching requests
  * nfs_scan_list - Scan a list for matching requests
  * @head: One of the NFS inode request lists
  * @head: One of the NFS inode request lists
@@ -280,7 +354,7 @@ nfs_scan_list(struct list_head *head, struct list_head *dst,
 		if (req->wb_index > idx_end)
 		if (req->wb_index > idx_end)
 			break;
 			break;
 
 
-		if (!nfs_lock_request(req))
+		if (!nfs_set_page_writeback_locked(req))
 			continue;
 			continue;
 		nfs_list_remove_request(req);
 		nfs_list_remove_request(req);
 		nfs_list_add_request(req, dst);
 		nfs_list_add_request(req, dst);

+ 1 - 0
fs/nfs/proc.c

@@ -622,6 +622,7 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.version	= 2,		       /* protocol version */
 	.version	= 2,		       /* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.file_inode_ops	= &nfs_file_inode_operations,
 	.getroot	= nfs_proc_get_root,
 	.getroot	= nfs_proc_get_root,
 	.getattr	= nfs_proc_getattr,
 	.getattr	= nfs_proc_getattr,
 	.setattr	= nfs_proc_setattr,
 	.setattr	= nfs_proc_setattr,

+ 0 - 3
fs/nfs/read.c

@@ -173,7 +173,6 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	if (len < PAGE_CACHE_SIZE)
 	if (len < PAGE_CACHE_SIZE)
 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
 
 
-	nfs_lock_request(new);
 	nfs_list_add_request(new, &one_request);
 	nfs_list_add_request(new, &one_request);
 	nfs_pagein_one(&one_request, inode);
 	nfs_pagein_one(&one_request, inode);
 	return 0;
 	return 0;
@@ -185,7 +184,6 @@ static void nfs_readpage_release(struct nfs_page *req)
 
 
 	nfs_clear_request(req);
 	nfs_clear_request(req);
 	nfs_release_request(req);
 	nfs_release_request(req);
-	nfs_unlock_request(req);
 
 
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
 			req->wb_context->dentry->d_inode->i_sb->s_id,
 			req->wb_context->dentry->d_inode->i_sb->s_id,
@@ -553,7 +551,6 @@ readpage_async_filler(void *data, struct page *page)
 	}
 	}
 	if (len < PAGE_CACHE_SIZE)
 	if (len < PAGE_CACHE_SIZE)
 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
-	nfs_lock_request(new);
 	nfs_list_add_request(new, desc->head);
 	nfs_list_add_request(new, desc->head);
 	return 0;
 	return 0;
 }
 }

+ 54 - 54
fs/nfs/write.c

@@ -220,7 +220,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 		ClearPageError(page);
 		ClearPageError(page);
 
 
 io_error:
 io_error:
-	nfs_end_data_update_defer(inode);
+	nfs_end_data_update(inode);
 	nfs_writedata_free(wdata);
 	nfs_writedata_free(wdata);
 	return written ? written : result;
 	return written ? written : result;
 }
 }
@@ -352,7 +352,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 		if (err < 0)
 		if (err < 0)
 			goto out;
 			goto out;
 	}
 	}
-	err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc));
+	err = nfs_commit_inode(inode, wb_priority(wbc));
 	if (err > 0) {
 	if (err > 0) {
 		wbc->nr_to_write -= err;
 		wbc->nr_to_write -= err;
 		err = 0;
 		err = 0;
@@ -401,7 +401,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	nfsi->npages--;
 	nfsi->npages--;
 	if (!nfsi->npages) {
 	if (!nfsi->npages) {
 		spin_unlock(&nfsi->req_lock);
 		spin_unlock(&nfsi->req_lock);
-		nfs_end_data_update_defer(inode);
+		nfs_end_data_update(inode);
 		iput(inode);
 		iput(inode);
 	} else
 	} else
 		spin_unlock(&nfsi->req_lock);
 		spin_unlock(&nfsi->req_lock);
@@ -446,6 +446,8 @@ nfs_mark_request_dirty(struct nfs_page *req)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 
 	spin_lock(&nfsi->req_lock);
 	spin_lock(&nfsi->req_lock);
+	radix_tree_tag_set(&nfsi->nfs_page_tree,
+			req->wb_index, NFS_PAGE_TAG_DIRTY);
 	nfs_list_add_request(req, &nfsi->dirty);
 	nfs_list_add_request(req, &nfsi->dirty);
 	nfsi->ndirty++;
 	nfsi->ndirty++;
 	spin_unlock(&nfsi->req_lock);
 	spin_unlock(&nfsi->req_lock);
@@ -503,13 +505,12 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
 
 
 	spin_lock(&nfsi->req_lock);
 	spin_lock(&nfsi->req_lock);
 	next = idx_start;
 	next = idx_start;
-	while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) {
+	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
 		if (req->wb_index > idx_end)
 		if (req->wb_index > idx_end)
 			break;
 			break;
 
 
 		next = req->wb_index + 1;
 		next = req->wb_index + 1;
-		if (!NFS_WBACK_BUSY(req))
-			continue;
+		BUG_ON(!NFS_WBACK_BUSY(req));
 
 
 		atomic_inc(&req->wb_count);
 		atomic_inc(&req->wb_count);
 		spin_unlock(&nfsi->req_lock);
 		spin_unlock(&nfsi->req_lock);
@@ -538,12 +539,15 @@ static int
 nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
 nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
 {
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
-	int	res;
-	res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages);
-	nfsi->ndirty -= res;
-	sub_page_state(nr_dirty,res);
-	if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
-		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+	int res = 0;
+
+	if (nfsi->ndirty != 0) {
+		res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages);
+		nfsi->ndirty -= res;
+		sub_page_state(nr_dirty,res);
+		if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
+			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+	}
 	return res;
 	return res;
 }
 }
 
 
@@ -562,11 +566,14 @@ static int
 nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
 nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
 {
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
-	int	res;
-	res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages);
-	nfsi->ncommit -= res;
-	if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
-		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+	int res = 0;
+
+	if (nfsi->ncommit != 0) {
+		res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages);
+		nfsi->ncommit -= res;
+		if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
+			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+	}
 	return res;
 	return res;
 }
 }
 #endif
 #endif
@@ -750,7 +757,7 @@ int nfs_updatepage(struct file *file, struct page *page,
 	 * is entirely in cache, it may be more efficient to avoid
 	 * is entirely in cache, it may be more efficient to avoid
 	 * fragmenting write requests.
 	 * fragmenting write requests.
 	 */
 	 */
-	if (PageUptodate(page) && inode->i_flock == NULL) {
+	if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) {
 		loff_t end_offs = i_size_read(inode) - 1;
 		loff_t end_offs = i_size_read(inode) - 1;
 		unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
 		unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
 
 
@@ -821,7 +828,7 @@ out:
 #else
 #else
 	nfs_inode_remove_request(req);
 	nfs_inode_remove_request(req);
 #endif
 #endif
-	nfs_unlock_request(req);
+	nfs_clear_page_writeback(req);
 }
 }
 
 
 static inline int flush_task_priority(int how)
 static inline int flush_task_priority(int how)
@@ -952,7 +959,7 @@ out_bad:
 		nfs_writedata_free(data);
 		nfs_writedata_free(data);
 	}
 	}
 	nfs_mark_request_dirty(req);
 	nfs_mark_request_dirty(req);
-	nfs_unlock_request(req);
+	nfs_clear_page_writeback(req);
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
 
 
@@ -1002,7 +1009,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 		struct nfs_page *req = nfs_list_entry(head->next);
 		struct nfs_page *req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_list_remove_request(req);
 		nfs_mark_request_dirty(req);
 		nfs_mark_request_dirty(req);
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 	}
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
@@ -1029,7 +1036,7 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
 		req = nfs_list_entry(head->next);
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_list_remove_request(req);
 		nfs_mark_request_dirty(req);
 		nfs_mark_request_dirty(req);
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 	}
 	return error;
 	return error;
 }
 }
@@ -1121,7 +1128,7 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 		nfs_inode_remove_request(req);
 		nfs_inode_remove_request(req);
 #endif
 #endif
 	next:
 	next:
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 	}
 }
 }
 
 
@@ -1210,36 +1217,24 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 		struct nfs_write_data *data, int how)
 		struct nfs_write_data *data, int how)
 {
 {
 	struct rpc_task		*task = &data->task;
 	struct rpc_task		*task = &data->task;
-	struct nfs_page		*first, *last;
+	struct nfs_page		*first;
 	struct inode		*inode;
 	struct inode		*inode;
-	loff_t			start, end, len;
 
 
 	/* Set up the RPC argument and reply structs
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 	 * NB: take care not to mess about with data->commit et al. */
 
 
 	list_splice_init(head, &data->pages);
 	list_splice_init(head, &data->pages);
 	first = nfs_list_entry(data->pages.next);
 	first = nfs_list_entry(data->pages.next);
-	last = nfs_list_entry(data->pages.prev);
 	inode = first->wb_context->dentry->d_inode;
 	inode = first->wb_context->dentry->d_inode;
 
 
-	/*
-	 * Determine the offset range of requests in the COMMIT call.
-	 * We rely on the fact that data->pages is an ordered list...
-	 */
-	start = req_offset(first);
-	end = req_offset(last) + last->wb_bytes;
-	len = end - start;
-	/* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
-	if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1))
-		len = 0;
-
 	data->inode	  = inode;
 	data->inode	  = inode;
 	data->cred	  = first->wb_context->cred;
 	data->cred	  = first->wb_context->cred;
 
 
 	data->args.fh     = NFS_FH(data->inode);
 	data->args.fh     = NFS_FH(data->inode);
-	data->args.offset = start;
-	data->args.count  = len;
-	data->res.count   = len;
+	/* Note: we always request a commit of the entire inode */
+	data->args.offset = 0;
+	data->args.count  = 0;
+	data->res.count   = 0;
 	data->res.fattr   = &data->fattr;
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	data->res.verf    = &data->verf;
 	
 	
@@ -1278,7 +1273,7 @@ nfs_commit_list(struct list_head *head, int how)
 		req = nfs_list_entry(head->next);
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req);
 		nfs_mark_request_commit(req);
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 	}
 	}
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
@@ -1324,7 +1319,7 @@ nfs_commit_done(struct rpc_task *task)
 		dprintk(" mismatch\n");
 		dprintk(" mismatch\n");
 		nfs_mark_request_dirty(req);
 		nfs_mark_request_dirty(req);
 	next:
 	next:
-		nfs_unlock_request(req);
+		nfs_clear_page_writeback(req);
 		res++;
 		res++;
 	}
 	}
 	sub_page_state(nr_unstable,res);
 	sub_page_state(nr_unstable,res);
@@ -1342,16 +1337,23 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 	spin_lock(&nfsi->req_lock);
 	spin_lock(&nfsi->req_lock);
 	res = nfs_scan_dirty(inode, &head, idx_start, npages);
 	res = nfs_scan_dirty(inode, &head, idx_start, npages);
 	spin_unlock(&nfsi->req_lock);
 	spin_unlock(&nfsi->req_lock);
-	if (res)
-		error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
+	if (res) {
+		struct nfs_server *server = NFS_SERVER(inode);
+
+		/* For single writes, FLUSH_STABLE is more efficient */
+		if (res == nfsi->npages && nfsi->npages <= server->wpages) {
+			if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
+				how |= FLUSH_STABLE;
+		}
+		error = nfs_flush_list(&head, server->wpages, how);
+	}
 	if (error < 0)
 	if (error < 0)
 		return error;
 		return error;
 	return res;
 	return res;
 }
 }
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-int nfs_commit_inode(struct inode *inode, unsigned long idx_start,
-		    unsigned int npages, int how)
+int nfs_commit_inode(struct inode *inode, int how)
 {
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
 	LIST_HEAD(head);
@@ -1359,15 +1361,13 @@ int nfs_commit_inode(struct inode *inode, unsigned long idx_start,
 				error = 0;
 				error = 0;
 
 
 	spin_lock(&nfsi->req_lock);
 	spin_lock(&nfsi->req_lock);
-	res = nfs_scan_commit(inode, &head, idx_start, npages);
+	res = nfs_scan_commit(inode, &head, 0, 0);
+	spin_unlock(&nfsi->req_lock);
 	if (res) {
 	if (res) {
-		res += nfs_scan_commit(inode, &head, 0, 0);
-		spin_unlock(&nfsi->req_lock);
 		error = nfs_commit_list(&head, how);
 		error = nfs_commit_list(&head, how);
-	} else
-		spin_unlock(&nfsi->req_lock);
-	if (error < 0)
-		return error;
+		if (error < 0)
+			return error;
+	}
 	return res;
 	return res;
 }
 }
 #endif
 #endif
@@ -1389,7 +1389,7 @@ int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
 			error = nfs_flush_inode(inode, idx_start, npages, how);
 			error = nfs_flush_inode(inode, idx_start, npages, how);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (error == 0)
 		if (error == 0)
-			error = nfs_commit_inode(inode, idx_start, npages, how);
+			error = nfs_commit_inode(inode, how);
 #endif
 #endif
 	} while (error > 0);
 	} while (error > 0);
 	return error;
 	return error;

+ 7 - 0
fs/nfs_common/Makefile

@@ -0,0 +1,7 @@
+#
+# Makefile for Linux filesystem routines that are shared by client and server.
+#
+
+obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
+
+nfs_acl-objs := nfsacl.o

+ 257 - 0
fs/nfs_common/nfsacl.c

@@ -0,0 +1,257 @@
+/*
+ * fs/nfs_common/nfsacl.c
+ *
+ *  Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+/*
+ * The Solaris nfsacl protocol represents some ACLs slightly differently
+ * than POSIX 1003.1e draft 17 does (and we do):
+ *
+ *  - Minimal ACLs always have an ACL_MASK entry, so they have
+ *    four instead of three entries.
+ *  - The ACL_MASK entry in such minimal ACLs always has the same
+ *    permissions as the ACL_GROUP_OBJ entry. (In extended ACLs
+ *    the ACL_MASK and ACL_GROUP_OBJ entries may differ.)
+ *  - The identifier fields of the ACL_USER_OBJ and ACL_GROUP_OBJ
+ *    entries contain the identifiers of the owner and owning group.
+ *    (In POSIX ACLs we always set them to ACL_UNDEFINED_ID).
+ *  - ACL entries in the kernel are kept sorted in ascending order
+ *    of (e_tag, e_id). Solaris ACLs are unsorted.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/nfsacl.h>
+#include <linux/nfs3.h>
+#include <linux/sort.h>
+
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(nfsacl_encode);
+EXPORT_SYMBOL(nfsacl_decode);
+
+struct nfsacl_encode_desc {
+	struct xdr_array2_desc desc;
+	unsigned int count;
+	struct posix_acl *acl;
+	int typeflag;
+	uid_t uid;
+	gid_t gid;
+};
+
+static int
+xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
+{
+	struct nfsacl_encode_desc *nfsacl_desc =
+		(struct nfsacl_encode_desc *) desc;
+	u32 *p = (u32 *) elem;
+
+	if (nfsacl_desc->count < nfsacl_desc->acl->a_count) {
+		struct posix_acl_entry *entry =
+			&nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
+
+		*p++ = htonl(entry->e_tag | nfsacl_desc->typeflag);
+		switch(entry->e_tag) {
+			case ACL_USER_OBJ:
+				*p++ = htonl(nfsacl_desc->uid);
+				break;
+			case ACL_GROUP_OBJ:
+				*p++ = htonl(nfsacl_desc->gid);
+				break;
+			case ACL_USER:
+			case ACL_GROUP:
+				*p++ = htonl(entry->e_id);
+				break;
+			default:  /* Solaris depends on that! */
+				*p++ = 0;
+				break;
+		}
+		*p++ = htonl(entry->e_perm & S_IRWXO);
+	} else {
+		const struct posix_acl_entry *pa, *pe;
+		int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE;
+
+		FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) {
+			if (pa->e_tag == ACL_GROUP_OBJ) {
+				group_obj_perm = pa->e_perm & S_IRWXO;
+				break;
+			}
+		}
+		/* fake up ACL_MASK entry */
+		*p++ = htonl(ACL_MASK | nfsacl_desc->typeflag);
+		*p++ = htonl(0);
+		*p++ = htonl(group_obj_perm);
+	}
+
+	return 0;
+}
+
+unsigned int
+nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
+	      struct posix_acl *acl, int encode_entries, int typeflag)
+{
+	int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0;
+	struct nfsacl_encode_desc nfsacl_desc = {
+		.desc = {
+			.elem_size = 12,
+			.array_len = encode_entries ? entries : 0,
+			.xcode = xdr_nfsace_encode,
+		},
+		.acl = acl,
+		.typeflag = typeflag,
+		.uid = inode->i_uid,
+		.gid = inode->i_gid,
+	};
+	int err;
+
+	if (entries > NFS_ACL_MAX_ENTRIES ||
+	    xdr_encode_word(buf, base, entries))
+		return -EINVAL;
+	err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc);
+	if (!err)
+		err = 8 + nfsacl_desc.desc.elem_size *
+			  nfsacl_desc.desc.array_len;
+	return err;
+}
+
+struct nfsacl_decode_desc {
+	struct xdr_array2_desc desc;
+	unsigned int count;
+	struct posix_acl *acl;
+};
+
+static int
+xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
+{
+	struct nfsacl_decode_desc *nfsacl_desc =
+		(struct nfsacl_decode_desc *) desc;
+	u32 *p = (u32 *) elem;
+	struct posix_acl_entry *entry;
+
+	if (!nfsacl_desc->acl) {
+		if (desc->array_len > NFS_ACL_MAX_ENTRIES)
+			return -EINVAL;
+		nfsacl_desc->acl = posix_acl_alloc(desc->array_len, GFP_KERNEL);
+		if (!nfsacl_desc->acl)
+			return -ENOMEM;
+		nfsacl_desc->count = 0;
+	}
+
+	entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
+	entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT;
+	entry->e_id = ntohl(*p++);
+	entry->e_perm = ntohl(*p++);
+
+	switch(entry->e_tag) {
+		case ACL_USER_OBJ:
+		case ACL_USER:
+		case ACL_GROUP_OBJ:
+		case ACL_GROUP:
+		case ACL_OTHER:
+			if (entry->e_perm & ~S_IRWXO)
+				return -EINVAL;
+			break;
+		case ACL_MASK:
+			/* Solaris sometimes sets additonal bits in the mask */
+			entry->e_perm &= S_IRWXO;
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+cmp_acl_entry(const void *x, const void *y)
+{
+	const struct posix_acl_entry *a = x, *b = y;
+
+	if (a->e_tag != b->e_tag)
+		return a->e_tag - b->e_tag;
+	else if (a->e_id > b->e_id)
+		return 1;
+	else if (a->e_id < b->e_id)
+		return -1;
+	else
+		return 0;
+}
+
+/*
+ * Convert from a Solaris ACL to a POSIX 1003.1e draft 17 ACL.
+ */
+static int
+posix_acl_from_nfsacl(struct posix_acl *acl)
+{
+	struct posix_acl_entry *pa, *pe,
+	       *group_obj = NULL, *mask = NULL;
+
+	if (!acl)
+		return 0;
+
+	sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry),
+	     cmp_acl_entry, NULL);
+
+	/* Clear undefined identifier fields and find the ACL_GROUP_OBJ
+	   and ACL_MASK entries. */
+	FOREACH_ACL_ENTRY(pa, acl, pe) {
+		switch(pa->e_tag) {
+			case ACL_USER_OBJ:
+				pa->e_id = ACL_UNDEFINED_ID;
+				break;
+			case ACL_GROUP_OBJ:
+				pa->e_id = ACL_UNDEFINED_ID;
+				group_obj = pa;
+				break;
+			case ACL_MASK:
+				mask = pa;
+				/* fall through */
+			case ACL_OTHER:
+				pa->e_id = ACL_UNDEFINED_ID;
+				break;
+		}
+	}
+	if (acl->a_count == 4 && group_obj && mask &&
+	    mask->e_perm == group_obj->e_perm) {
+		/* remove bogus ACL_MASK entry */
+		memmove(mask, mask+1, (3 - (mask - acl->a_entries)) *
+				      sizeof(struct posix_acl_entry));
+		acl->a_count = 3;
+	}
+	return 0;
+}
+
+unsigned int
+nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
+	      struct posix_acl **pacl)
+{
+	struct nfsacl_decode_desc nfsacl_desc = {
+		.desc = {
+			.elem_size = 12,
+			.xcode = pacl ? xdr_nfsace_decode : NULL,
+		},
+	};
+	u32 entries;
+	int err;
+
+	if (xdr_decode_word(buf, base, &entries) ||
+	    entries > NFS_ACL_MAX_ENTRIES)
+		return -EINVAL;
+	err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc);
+	if (err)
+		return err;
+	if (pacl) {
+		if (entries != nfsacl_desc.desc.array_len ||
+		    posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) {
+			posix_acl_release(nfsacl_desc.acl);
+			return -EINVAL;
+		}
+		*pacl = nfsacl_desc.acl;
+	}
+	if (aclcnt)
+		*aclcnt = entries;
+	return 8 + nfsacl_desc.desc.elem_size *
+		   nfsacl_desc.desc.array_len;
+}

+ 2 - 0
fs/nfsd/Makefile

@@ -6,7 +6,9 @@ obj-$(CONFIG_NFSD)	+= nfsd.o
 
 
 nfsd-y 			:= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
 nfsd-y 			:= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
 			   export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
 			   export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
+nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
 			   nfs4acl.o nfs4callback.o
 			   nfs4acl.o nfs4callback.o
 nfsd-objs		:= $(nfsd-y)
 nfsd-objs		:= $(nfsd-y)

+ 336 - 0
fs/nfsd/nfs2acl.c

@@ -0,0 +1,336 @@
+/*
+ * linux/fs/nfsd/nfsacl.c
+ *
+ * Process version 2 NFSACL requests.
+ *
+ * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfs.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/nfsd/xdr3.h>
+#include <linux/posix_acl.h>
+#include <linux/nfsacl.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_PROC
+#define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
+
+/*
+ * NULL call.
+ */
+static int
+nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return nfs_ok;
+}
+
+/*
+ * Get the Access and/or Default ACL of a file.
+ */
+static int nfsacld_proc_getacl(struct svc_rqst * rqstp,
+		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+{
+	svc_fh *fh;
+	struct posix_acl *acl;
+	int nfserr = 0;
+
+	dprintk("nfsd: GETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+		RETURN_STATUS(nfserr_inval);
+
+	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+		RETURN_STATUS(nfserr_inval);
+	resp->mask = argp->mask;
+
+	if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		if (acl == NULL) {
+			/* Solaris returns the inode's minimum ACL. */
+
+			struct inode *inode = fh->fh_dentry->d_inode;
+			acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+		}
+		resp->acl_access = acl;
+	}
+	if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
+		/* Check how Solaris handles requests for the Default ACL
+		   of a non-directory! */
+
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		resp->acl_default = acl;
+	}
+
+	/* resp->acl_{access,default} are released in nfssvc_release_getacl. */
+	RETURN_STATUS(0);
+
+fail:
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set the Access and/or Default ACL of a file.
+ */
+static int nfsacld_proc_setacl(struct svc_rqst * rqstp,
+		struct nfsd3_setaclargs *argp,
+		struct nfsd_attrstat *resp)
+{
+	svc_fh *fh;
+	int nfserr = 0;
+
+	dprintk("nfsd: SETACL(2acl)   %s\n", SVCFH_fmt(&argp->fh));
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_ACCESS, argp->acl_access) );
+	}
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_DEFAULT, argp->acl_default) );
+	}
+
+	/* argp->acl_{access,default} may have been allocated in
+	   nfssvc_decode_setaclargs. */
+	posix_acl_release(argp->acl_access);
+	posix_acl_release(argp->acl_default);
+	return nfserr;
+}
+
+/*
+ * Check file attributes
+ */
+static int nfsacld_proc_getattr(struct svc_rqst * rqstp,
+		struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
+{
+	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
+
+	fh_copy(&resp->fh, &argp->fh);
+	return fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+}
+
+/*
+ * Check file access
+ */
+static int nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
+		struct nfsd3_accessres *resp)
+{
+	int nfserr;
+
+	dprintk("nfsd: ACCESS(2acl)   %s 0x%x\n",
+			SVCFH_fmt(&argp->fh),
+			argp->access);
+
+	fh_copy(&resp->fh, &argp->fh);
+	resp->access = argp->access;
+	nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+	return nfserr;
+}
+
+/*
+ * XDR decode functions
+ */
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclargs *argp)
+{
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	argp->mask = ntohl(*p); p++;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_setaclargs *argp)
+{
+	struct kvec *head = rqstp->rq_arg.head;
+	unsigned int base;
+	int n;
+
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	argp->mask = ntohl(*p++);
+	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
+	    !xdr_argsize_check(rqstp, p))
+		return 0;
+
+	base = (char *)p - (char *)head->iov_base;
+	n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
+			  (argp->mask & NFS_ACL) ?
+			  &argp->acl_access : NULL);
+	if (n > 0)
+		n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
+				  (argp->mask & NFS_DFACL) ?
+				  &argp->acl_default : NULL);
+	return (n > 0);
+}
+
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd_fhandle *argp)
+{
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	return xdr_argsize_check(rqstp, p);
+}
+
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_accessargs *argp)
+{
+	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+		return 0;
+	argp->access = ntohl(*p++);
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+/*
+ * XDR encode functions
+ */
+
+/* GETACL */
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	struct dentry *dentry = resp->fh.fh_dentry;
+	struct inode *inode = dentry->d_inode;
+	int w = nfsacl_size(
+		(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
+		(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+	struct kvec *head = rqstp->rq_res.head;
+	unsigned int base;
+	int n;
+
+	if (dentry == NULL || dentry->d_inode == NULL)
+		return 0;
+	inode = dentry->d_inode;
+
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+	*p++ = htonl(resp->mask);
+	if (!xdr_ressize_check(rqstp, p))
+		return 0;
+	base = (char *)p - (char *)head->iov_base;
+
+	rqstp->rq_res.page_len = w;
+	while (w > 0) {
+		if (!svc_take_res_page(rqstp))
+			return 0;
+		w -= PAGE_SIZE;
+	}
+
+	n = nfsacl_encode(&rqstp->rq_res, base, inode,
+			  resp->acl_access,
+			  resp->mask & NFS_ACL, 0);
+	if (n > 0)
+		n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+				  resp->acl_default,
+				  resp->mask & NFS_DFACL,
+				  NFS_ACL_DEFAULT);
+	if (n <= 0)
+		return 0;
+	return 1;
+}
+
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd_attrstat *resp)
+{
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/* ACCESS */
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_accessres *resp)
+{
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
+	*p++ = htonl(resp->access);
+	return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	fh_put(&resp->fh);
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	return 1;
+}
+
+static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd_fhandle *resp)
+{
+	fh_put(&resp->fh);
+	return 1;
+}
+
+#define nfsaclsvc_decode_voidargs	NULL
+#define nfsaclsvc_encode_voidres	NULL
+#define nfsaclsvc_release_void		NULL
+#define nfsd3_fhandleargs	nfsd_fhandle
+#define nfsd3_attrstatres	nfsd_attrstat
+#define nfsd3_voidres		nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize)	\
+ { (svc_procfunc) nfsacld_proc_##name,		\
+   (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
+   (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
+   (kxdrproc_t) nfsaclsvc_release_##relt,		\
+   sizeof(struct nfsd3_##argt##args),		\
+   sizeof(struct nfsd3_##rest##res),		\
+   0,						\
+   cache,					\
+   respsize,					\
+ }
+
+#define ST 1		/* status*/
+#define AT 21		/* attributes */
+#define pAT (1+AT)	/* post attributes - conditional */
+#define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
+
+static struct svc_procedure		nfsd_acl_procedures2[] = {
+  PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
+  PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
+  PROC(setacl,	setacl,		attrstat,	fhandle,  RC_NOCACHE, ST+AT),
+  PROC(getattr, fhandle,	attrstat,	fhandle,  RC_NOCACHE, ST+AT),
+  PROC(access,	access,		access,		fhandle,  RC_NOCACHE, ST+AT+1),
+};
+
+struct svc_version	nfsd_acl_version2 = {
+		.vs_vers	= 2,
+		.vs_nproc	= 5,
+		.vs_proc	= nfsd_acl_procedures2,
+		.vs_dispatch	= nfsd_dispatch,
+		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+};

+ 267 - 0
fs/nfsd/nfs3acl.c

@@ -0,0 +1,267 @@
+/*
+ * linux/fs/nfsd/nfs3acl.c
+ *
+ * Process version 3 NFSACL requests.
+ *
+ * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfs3.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr3.h>
+#include <linux/posix_acl.h>
+#include <linux/nfsacl.h>
+
+#define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
+
+/*
+ * NULL call.
+ */
+static int
+nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+	return nfs_ok;
+}
+
+/*
+ * Get the Access and/or Default ACL of a file.
+ */
+static int nfsd3_proc_getacl(struct svc_rqst * rqstp,
+		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+{
+	svc_fh *fh;
+	struct posix_acl *acl;
+	int nfserr = 0;
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP)))
+		RETURN_STATUS(nfserr_inval);
+
+	if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+		RETURN_STATUS(nfserr_inval);
+	resp->mask = argp->mask;
+
+	if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		if (acl == NULL) {
+			/* Solaris returns the inode's minimum ACL. */
+
+			struct inode *inode = fh->fh_dentry->d_inode;
+			acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+		}
+		resp->acl_access = acl;
+	}
+	if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
+		/* Check how Solaris handles requests for the Default ACL
+		   of a non-directory! */
+
+		acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl)) {
+			int err = PTR_ERR(acl);
+
+			if (err == -ENODATA || err == -EOPNOTSUPP)
+				acl = NULL;
+			else {
+				nfserr = nfserrno(err);
+				goto fail;
+			}
+		}
+		resp->acl_default = acl;
+	}
+
+	/* resp->acl_{access,default} are released in nfs3svc_release_getacl. */
+	RETURN_STATUS(0);
+
+fail:
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set the Access and/or Default ACL of a file.
+ */
+static int nfsd3_proc_setacl(struct svc_rqst * rqstp,
+		struct nfsd3_setaclargs *argp,
+		struct nfsd3_attrstat *resp)
+{
+	svc_fh *fh;
+	int nfserr = 0;
+
+	fh = fh_copy(&resp->fh, &argp->fh);
+	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_ACCESS, argp->acl_access) );
+	}
+	if (!nfserr) {
+		nfserr = nfserrno( nfsd_set_posix_acl(
+			fh, ACL_TYPE_DEFAULT, argp->acl_default) );
+	}
+
+	/* argp->acl_{access,default} may have been allocated in
+	   nfs3svc_decode_setaclargs. */
+	posix_acl_release(argp->acl_access);
+	posix_acl_release(argp->acl_default);
+	RETURN_STATUS(nfserr);
+}
+
+/*
+ * XDR decode functions
+ */
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclargs *args)
+{
+	if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+		return 0;
+	args->mask = ntohl(*p); p++;
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_setaclargs *args)
+{
+	struct kvec *head = rqstp->rq_arg.head;
+	unsigned int base;
+	int n;
+
+	if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+		return 0;
+	args->mask = ntohl(*p++);
+	if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
+	    !xdr_argsize_check(rqstp, p))
+		return 0;
+
+	base = (char *)p - (char *)head->iov_base;
+	n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
+			  (args->mask & NFS_ACL) ?
+			  &args->acl_access : NULL);
+	if (n > 0)
+		n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
+				  (args->mask & NFS_DFACL) ?
+				  &args->acl_default : NULL);
+	return (n > 0);
+}
+
+/*
+ * XDR encode functions
+ */
+
+/* GETACL */
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	struct dentry *dentry = resp->fh.fh_dentry;
+
+	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
+	if (resp->status == 0 && dentry && dentry->d_inode) {
+		struct inode *inode = dentry->d_inode;
+		int w = nfsacl_size(
+			(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
+			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+		struct kvec *head = rqstp->rq_res.head;
+		unsigned int base;
+		int n;
+
+		*p++ = htonl(resp->mask);
+		if (!xdr_ressize_check(rqstp, p))
+			return 0;
+		base = (char *)p - (char *)head->iov_base;
+
+		rqstp->rq_res.page_len = w;
+		while (w > 0) {
+			if (!svc_take_res_page(rqstp))
+				return 0;
+			w -= PAGE_SIZE;
+		}
+
+		n = nfsacl_encode(&rqstp->rq_res, base, inode,
+				  resp->acl_access,
+				  resp->mask & NFS_ACL, 0);
+		if (n > 0)
+			n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+					  resp->acl_default,
+					  resp->mask & NFS_DFACL,
+					  NFS_ACL_DEFAULT);
+		if (n <= 0)
+			return 0;
+	} else
+		if (!xdr_ressize_check(rqstp, p))
+			return 0;
+
+	return 1;
+}
+
+/* SETACL */
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_attrstat *resp)
+{
+	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
+
+	return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+static int nfs3svc_release_getacl(struct svc_rqst *rqstp, u32 *p,
+		struct nfsd3_getaclres *resp)
+{
+	fh_put(&resp->fh);
+	posix_acl_release(resp->acl_access);
+	posix_acl_release(resp->acl_default);
+	return 1;
+}
+
+#define nfs3svc_decode_voidargs		NULL
+#define nfs3svc_release_void		NULL
+#define nfsd3_setaclres			nfsd3_attrstat
+#define nfsd3_voidres			nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize)	\
+ { (svc_procfunc) nfsd3_proc_##name,		\
+   (kxdrproc_t) nfs3svc_decode_##argt##args,	\
+   (kxdrproc_t) nfs3svc_encode_##rest##res,	\
+   (kxdrproc_t) nfs3svc_release_##relt,		\
+   sizeof(struct nfsd3_##argt##args),		\
+   sizeof(struct nfsd3_##rest##res),		\
+   0,						\
+   cache,					\
+   respsize,					\
+ }
+
+#define ST 1		/* status*/
+#define AT 21		/* attributes */
+#define pAT (1+AT)	/* post attributes - conditional */
+#define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
+
+static struct svc_procedure		nfsd_acl_procedures3[] = {
+  PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
+  PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
+  PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
+};
+
+struct svc_version	nfsd_acl_version3 = {
+		.vs_vers	= 3,
+		.vs_nproc	= 3,
+		.vs_proc	= nfsd_acl_procedures3,
+		.vs_dispatch	= nfsd_dispatch,
+		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+};
+

+ 13 - 0
fs/nfsd/nfs3xdr.c

@@ -71,6 +71,12 @@ decode_fh(u32 *p, struct svc_fh *fhp)
 	return p + XDR_QUADLEN(size);
 	return p + XDR_QUADLEN(size);
 }
 }
 
 
+/* Helper function for NFSv3 ACL code */
+u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp)
+{
+	return decode_fh(p, fhp);
+}
+
 static inline u32 *
 static inline u32 *
 encode_fh(u32 *p, struct svc_fh *fhp)
 encode_fh(u32 *p, struct svc_fh *fhp)
 {
 {
@@ -233,6 +239,13 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 	return p;
 	return p;
 }
 }
 
 
+/* Helper for NFSv3 ACLs */
+u32 *
+nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	return encode_post_op_attr(rqstp, p, fhp);
+}
+
 /*
 /*
  * Enocde weak cache consistency data
  * Enocde weak cache consistency data
  */
  */

+ 1 - 3
fs/nfsd/nfs4callback.c

@@ -430,7 +430,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
 	clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
 	if (IS_ERR(clnt)) {
 	if (IS_ERR(clnt)) {
 		dprintk("NFSD: couldn't create callback client\n");
 		dprintk("NFSD: couldn't create callback client\n");
-		goto out_xprt;
+		goto out_err;
 	}
 	}
 	clnt->cl_intr = 0;
 	clnt->cl_intr = 0;
 	clnt->cl_softrtry = 1;
 	clnt->cl_softrtry = 1;
@@ -465,8 +465,6 @@ out_rpciod:
 out_clnt:
 out_clnt:
 	rpc_shutdown_client(clnt);
 	rpc_shutdown_client(clnt);
 	goto out_err;
 	goto out_err;
-out_xprt:
-	xprt_destroy(xprt);
 out_err:
 out_err:
 	dprintk("NFSD: warning: no callback path to client %.*s\n",
 	dprintk("NFSD: warning: no callback path to client %.*s\n",
 		(int)clp->cl_name.len, clp->cl_name.data);
 		(int)clp->cl_name.len, clp->cl_name.data);

+ 1 - 0
fs/nfsd/nfsproc.c

@@ -591,6 +591,7 @@ nfserrno (int errno)
 		{ nfserr_dropit, -ENOMEM },
 		{ nfserr_dropit, -ENOMEM },
 		{ nfserr_badname, -ESRCH },
 		{ nfserr_badname, -ESRCH },
 		{ nfserr_io, -ETXTBSY },
 		{ nfserr_io, -ETXTBSY },
+		{ nfserr_notsupp, -EOPNOTSUPP },
 		{ -1, -EIO }
 		{ -1, -EIO }
 	};
 	};
 	int	i;
 	int	i;

+ 28 - 0
fs/nfsd/nfssvc.c

@@ -31,6 +31,7 @@
 #include <linux/nfsd/stats.h>
 #include <linux/nfsd/stats.h>
 #include <linux/nfsd/cache.h>
 #include <linux/nfsd/cache.h>
 #include <linux/lockd/bind.h>
 #include <linux/lockd/bind.h>
+#include <linux/nfsacl.h>
 
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
 
@@ -362,6 +363,32 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
 	return 1;
 	return 1;
 }
 }
 
 
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+static struct svc_stat	nfsd_acl_svcstats;
+static struct svc_version *	nfsd_acl_version[] = {
+	[2] = &nfsd_acl_version2,
+	[3] = &nfsd_acl_version3,
+};
+
+#define NFSD_ACL_NRVERS		(sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0]))
+static struct svc_program	nfsd_acl_program = {
+	.pg_prog		= NFS_ACL_PROGRAM,
+	.pg_nvers		= NFSD_ACL_NRVERS,
+	.pg_vers		= nfsd_acl_version,
+	.pg_name		= "nfsd",
+	.pg_class		= "nfsd",
+	.pg_stats		= &nfsd_acl_svcstats,
+};
+
+static struct svc_stat	nfsd_acl_svcstats = {
+	.program	= &nfsd_acl_program,
+};
+
+#define nfsd_acl_program_p	&nfsd_acl_program
+#else
+#define nfsd_acl_program_p	NULL
+#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
+
 extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
 extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
 
 
 static struct svc_version *	nfsd_version[] = {
 static struct svc_version *	nfsd_version[] = {
@@ -376,6 +403,7 @@ static struct svc_version *	nfsd_version[] = {
 
 
 #define NFSD_NRVERS		(sizeof(nfsd_version)/sizeof(nfsd_version[0]))
 #define NFSD_NRVERS		(sizeof(nfsd_version)/sizeof(nfsd_version[0]))
 struct svc_program		nfsd_program = {
 struct svc_program		nfsd_program = {
+	.pg_next		= nfsd_acl_program_p,
 	.pg_prog		= NFS_PROGRAM,		/* program number */
 	.pg_prog		= NFS_PROGRAM,		/* program number */
 	.pg_nvers		= NFSD_NRVERS,		/* nr of entries in nfsd_version */
 	.pg_nvers		= NFSD_NRVERS,		/* nr of entries in nfsd_version */
 	.pg_vers		= nfsd_version,		/* version table */
 	.pg_vers		= nfsd_version,		/* version table */

+ 11 - 0
fs/nfsd/nfsxdr.c

@@ -49,6 +49,12 @@ decode_fh(u32 *p, struct svc_fh *fhp)
 	return p + (NFS_FHSIZE >> 2);
 	return p + (NFS_FHSIZE >> 2);
 }
 }
 
 
+/* Helper function for NFSv2 ACL code */
+u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp)
+{
+	return decode_fh(p, fhp);
+}
+
 static inline u32 *
 static inline u32 *
 encode_fh(u32 *p, struct svc_fh *fhp)
 encode_fh(u32 *p, struct svc_fh *fhp)
 {
 {
@@ -190,6 +196,11 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 	return p;
 	return p;
 }
 }
 
 
+/* Helper function for NFSv2 ACL code */
+u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+	return encode_fattr(rqstp, p, fhp);
+}
 
 
 /*
 /*
  * XDR decode functions
  * XDR decode functions

+ 106 - 1
fs/nfsd/vfs.c

@@ -46,8 +46,9 @@
 #include <linux/nfsd/nfsfh.h>
 #include <linux/nfsd/nfsfh.h>
 #include <linux/quotaops.h>
 #include <linux/quotaops.h>
 #include <linux/dnotify.h>
 #include <linux/dnotify.h>
-#ifdef CONFIG_NFSD_V4
+#include <linux/xattr_acl.h>
 #include <linux/posix_acl.h>
 #include <linux/posix_acl.h>
+#ifdef CONFIG_NFSD_V4
 #include <linux/posix_acl_xattr.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr_acl.h>
 #include <linux/xattr_acl.h>
 #include <linux/xattr.h>
 #include <linux/xattr.h>
@@ -1857,3 +1858,107 @@ nfsd_racache_init(int cache_size)
 	nfsdstats.ra_size = cache_size;
 	nfsdstats.ra_size = cache_size;
 	return 0;
 	return 0;
 }
 }
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+struct posix_acl *
+nfsd_get_posix_acl(struct svc_fh *fhp, int type)
+{
+	struct inode *inode = fhp->fh_dentry->d_inode;
+	char *name;
+	void *value = NULL;
+	ssize_t size;
+	struct posix_acl *acl;
+
+	if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr)
+		return ERR_PTR(-EOPNOTSUPP);
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			name = XATTR_NAME_ACL_ACCESS;
+			break;
+		case ACL_TYPE_DEFAULT:
+			name = XATTR_NAME_ACL_DEFAULT;
+			break;
+		default:
+			return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0);
+
+	if (size < 0) {
+		acl = ERR_PTR(size);
+		goto getout;
+	} else if (size > 0) {
+		value = kmalloc(size, GFP_KERNEL);
+		if (!value) {
+			acl = ERR_PTR(-ENOMEM);
+			goto getout;
+		}
+		size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size);
+		if (size < 0) {
+			acl = ERR_PTR(size);
+			goto getout;
+		}
+	}
+	acl = posix_acl_from_xattr(value, size);
+
+getout:
+	kfree(value);
+	return acl;
+}
+
+int
+nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
+{
+	struct inode *inode = fhp->fh_dentry->d_inode;
+	char *name;
+	void *value = NULL;
+	size_t size;
+	int error;
+
+	if (!IS_POSIXACL(inode) || !inode->i_op ||
+	    !inode->i_op->setxattr || !inode->i_op->removexattr)
+		return -EOPNOTSUPP;
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			name = XATTR_NAME_ACL_ACCESS;
+			break;
+		case ACL_TYPE_DEFAULT:
+			name = XATTR_NAME_ACL_DEFAULT;
+			break;
+		default:
+			return -EOPNOTSUPP;
+	}
+
+	if (acl && acl->a_count) {
+		size = xattr_acl_size(acl->a_count);
+		value = kmalloc(size, GFP_KERNEL);
+		if (!value)
+			return -ENOMEM;
+		size = posix_acl_to_xattr(acl, value, size);
+		if (size < 0) {
+			error = size;
+			goto getout;
+		}
+	} else
+		size = 0;
+
+	if (!fhp->fh_locked)
+		fh_lock(fhp);  /* unlocking is done automatically */
+	if (size)
+		error = inode->i_op->setxattr(fhp->fh_dentry, name,
+					      value, size, 0);
+	else {
+		if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
+			error = 0;
+		else {
+			error = inode->i_op->removexattr(fhp->fh_dentry, name);
+			if (error == -ENODATA)
+				error = 0;
+		}
+	}
+
+getout:
+	kfree(value);
+	return error;
+}
+#endif  /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */

+ 1 - 0
include/linux/fs.h

@@ -674,6 +674,7 @@ struct file_lock {
 	struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
 	struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
 	union {
 	union {
 		struct nfs_lock_info	nfs_fl;
 		struct nfs_lock_info	nfs_fl;
+		struct nfs4_lock_info	nfs4_fl;
 	} fl_u;
 	} fl_u;
 };
 };
 
 

+ 6 - 1
include/linux/lockd/lockd.h

@@ -72,6 +72,8 @@ struct nlm_lockowner {
 	uint32_t pid;
 	uint32_t pid;
 };
 };
 
 
+struct nlm_wait;
+
 /*
 /*
  * Memory chunk for NLM client RPC request.
  * Memory chunk for NLM client RPC request.
  */
  */
@@ -81,6 +83,7 @@ struct nlm_rqst {
 	struct nlm_host *	a_host;		/* host handle */
 	struct nlm_host *	a_host;		/* host handle */
 	struct nlm_args		a_args;		/* arguments */
 	struct nlm_args		a_args;		/* arguments */
 	struct nlm_res		a_res;		/* result */
 	struct nlm_res		a_res;		/* result */
+	struct nlm_wait *	a_block;
 	char			a_owner[NLMCLNT_OHSIZE];
 	char			a_owner[NLMCLNT_OHSIZE];
 };
 };
 
 
@@ -142,7 +145,9 @@ extern unsigned long		nlmsvc_timeout;
  * Lockd client functions
  * Lockd client functions
  */
  */
 struct nlm_rqst * nlmclnt_alloc_call(void);
 struct nlm_rqst * nlmclnt_alloc_call(void);
-int		  nlmclnt_block(struct nlm_host *, struct file_lock *, u32 *);
+int		  nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
+void		  nlmclnt_finish_block(struct nlm_rqst *req);
+long		  nlmclnt_block(struct nlm_rqst *req, long timeout);
 int		  nlmclnt_cancel(struct nlm_host *, struct file_lock *);
 int		  nlmclnt_cancel(struct nlm_host *, struct file_lock *);
 u32		  nlmclnt_grant(struct nlm_lock *);
 u32		  nlmclnt_grant(struct nlm_lock *);
 void		  nlmclnt_recovery(struct nlm_host *, u32);
 void		  nlmclnt_recovery(struct nlm_host *, u32);

+ 2 - 0
include/linux/nfs4.h

@@ -382,6 +382,8 @@ enum {
 	NFSPROC4_CLNT_READDIR,
 	NFSPROC4_CLNT_READDIR,
 	NFSPROC4_CLNT_SERVER_CAPS,
 	NFSPROC4_CLNT_SERVER_CAPS,
 	NFSPROC4_CLNT_DELEGRETURN,
 	NFSPROC4_CLNT_DELEGRETURN,
+	NFSPROC4_CLNT_GETACL,
+	NFSPROC4_CLNT_SETACL,
 };
 };
 
 
 #endif
 #endif

+ 59 - 247
include/linux/nfs_fs.h

@@ -15,7 +15,6 @@
 #include <linux/pagemap.h>
 #include <linux/pagemap.h>
 #include <linux/rwsem.h>
 #include <linux/rwsem.h>
 #include <linux/wait.h>
 #include <linux/wait.h>
-#include <linux/uio.h>
 
 
 #include <linux/nfs_fs_sb.h>
 #include <linux/nfs_fs_sb.h>
 
 
@@ -29,7 +28,6 @@
 #include <linux/nfs4.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_xdr.h>
 #include <linux/nfs_xdr.h>
 #include <linux/rwsem.h>
 #include <linux/rwsem.h>
-#include <linux/workqueue.h>
 #include <linux/mempool.h>
 #include <linux/mempool.h>
 
 
 /*
 /*
@@ -43,13 +41,6 @@
 #define NFS_MAX_FILE_IO_BUFFER_SIZE	32768
 #define NFS_MAX_FILE_IO_BUFFER_SIZE	32768
 #define NFS_DEF_FILE_IO_BUFFER_SIZE	4096
 #define NFS_DEF_FILE_IO_BUFFER_SIZE	4096
 
 
-/*
- * The upper limit on timeouts for the exponential backoff algorithm.
- */
-#define NFS_WRITEBACK_DELAY		(5*HZ)
-#define NFS_WRITEBACK_LOCKDELAY		(60*HZ)
-#define NFS_COMMIT_DELAY		(5*HZ)
-
 /*
 /*
  * superblock magic number for NFS
  * superblock magic number for NFS
  */
  */
@@ -60,9 +51,6 @@
  */
  */
 #define NFS_RPC_SWAPFLAGS		(RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
 #define NFS_RPC_SWAPFLAGS		(RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
 
 
-#define NFS_RW_SYNC		0x0001	/* O_SYNC handling */
-#define NFS_RW_SWAP		0x0002	/* This is a swap request */
-
 /*
 /*
  * When flushing a cluster of dirty pages, there can be different
  * When flushing a cluster of dirty pages, there can be different
  * strategies:
  * strategies:
@@ -96,7 +84,8 @@ struct nfs_open_context {
 	int error;
 	int error;
 
 
 	struct list_head list;
 	struct list_head list;
-	wait_queue_head_t waitq;
+
+	__u64 dir_cookie;
 };
 };
 
 
 /*
 /*
@@ -104,6 +93,8 @@ struct nfs_open_context {
  */
  */
 struct nfs_delegation;
 struct nfs_delegation;
 
 
+struct posix_acl;
+
 /*
 /*
  * nfs fs inode data in memory
  * nfs fs inode data in memory
  */
  */
@@ -140,7 +131,6 @@ struct nfs_inode {
 	 *
 	 *
 	 *	mtime != read_cache_mtime
 	 *	mtime != read_cache_mtime
 	 */
 	 */
-	unsigned long		readdir_timestamp;
 	unsigned long		read_cache_jiffies;
 	unsigned long		read_cache_jiffies;
 	unsigned long		attrtimeo;
 	unsigned long		attrtimeo;
 	unsigned long		attrtimeo_timestamp;
 	unsigned long		attrtimeo_timestamp;
@@ -158,6 +148,10 @@ struct nfs_inode {
 	atomic_t		data_updates;
 	atomic_t		data_updates;
 
 
 	struct nfs_access_entry	cache_access;
 	struct nfs_access_entry	cache_access;
+#ifdef CONFIG_NFS_V3_ACL
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+#endif
 
 
 	/*
 	/*
 	 * This is the cookie verifier used for NFSv3 readdir
 	 * This is the cookie verifier used for NFSv3 readdir
@@ -183,13 +177,13 @@ struct nfs_inode {
 	wait_queue_head_t	nfs_i_wait;
 	wait_queue_head_t	nfs_i_wait;
 
 
 #ifdef CONFIG_NFS_V4
 #ifdef CONFIG_NFS_V4
+	struct nfs4_cached_acl	*nfs4_acl;
         /* NFSv4 state */
         /* NFSv4 state */
 	struct list_head	open_states;
 	struct list_head	open_states;
 	struct nfs_delegation	*delegation;
 	struct nfs_delegation	*delegation;
 	int			 delegation_state;
 	int			 delegation_state;
 	struct rw_semaphore	rwsem;
 	struct rw_semaphore	rwsem;
 #endif /* CONFIG_NFS_V4*/
 #endif /* CONFIG_NFS_V4*/
-
 	struct inode		vfs_inode;
 	struct inode		vfs_inode;
 };
 };
 
 
@@ -203,6 +197,8 @@ struct nfs_inode {
 #define NFS_INO_INVALID_DATA	0x0010		/* cached data is invalid */
 #define NFS_INO_INVALID_DATA	0x0010		/* cached data is invalid */
 #define NFS_INO_INVALID_ATIME	0x0020		/* cached atime is invalid */
 #define NFS_INO_INVALID_ATIME	0x0020		/* cached atime is invalid */
 #define NFS_INO_INVALID_ACCESS	0x0040		/* cached access cred invalid */
 #define NFS_INO_INVALID_ACCESS	0x0040		/* cached access cred invalid */
+#define NFS_INO_INVALID_ACL	0x0080		/* cached acls are invalid */
+#define NFS_INO_REVAL_PAGECACHE	0x1000		/* must revalidate pagecache */
 
 
 static inline struct nfs_inode *NFS_I(struct inode *inode)
 static inline struct nfs_inode *NFS_I(struct inode *inode)
 {
 {
@@ -294,12 +290,12 @@ extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
+extern void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_begin_attr_update(struct inode *);
 extern void nfs_begin_attr_update(struct inode *);
 extern void nfs_end_attr_update(struct inode *);
 extern void nfs_end_attr_update(struct inode *);
 extern void nfs_begin_data_update(struct inode *);
 extern void nfs_begin_data_update(struct inode *);
 extern void nfs_end_data_update(struct inode *);
 extern void nfs_end_data_update(struct inode *);
-extern void nfs_end_data_update_defer(struct inode *);
 extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred);
 extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
@@ -314,6 +310,9 @@ extern u32 root_nfs_parse_addr(char *name); /*__init*/
  * linux/fs/nfs/file.c
  * linux/fs/nfs/file.c
  */
  */
 extern struct inode_operations nfs_file_inode_operations;
 extern struct inode_operations nfs_file_inode_operations;
+#ifdef CONFIG_NFS_V3
+extern struct inode_operations nfs3_file_inode_operations;
+#endif /* CONFIG_NFS_V3 */
 extern struct file_operations nfs_file_operations;
 extern struct file_operations nfs_file_operations;
 extern struct address_space_operations nfs_file_aops;
 extern struct address_space_operations nfs_file_aops;
 
 
@@ -328,6 +327,22 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file)
 	return NULL;
 	return NULL;
 }
 }
 
 
+/*
+ * linux/fs/nfs/xattr.c
+ */
+#ifdef CONFIG_NFS_V3_ACL
+extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
+extern ssize_t nfs3_getxattr(struct dentry *, const char *, void *, size_t);
+extern int nfs3_setxattr(struct dentry *, const char *,
+			const void *, size_t, int);
+extern int nfs3_removexattr (struct dentry *, const char *name);
+#else
+# define nfs3_listxattr NULL
+# define nfs3_getxattr NULL
+# define nfs3_setxattr NULL
+# define nfs3_removexattr NULL
+#endif
+
 /*
 /*
  * linux/fs/nfs/direct.c
  * linux/fs/nfs/direct.c
  */
  */
@@ -342,6 +357,9 @@ extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf,
  * linux/fs/nfs/dir.c
  * linux/fs/nfs/dir.c
  */
  */
 extern struct inode_operations nfs_dir_inode_operations;
 extern struct inode_operations nfs_dir_inode_operations;
+#ifdef CONFIG_NFS_V3
+extern struct inode_operations nfs3_dir_inode_operations;
+#endif /* CONFIG_NFS_V3 */
 extern struct file_operations nfs_dir_operations;
 extern struct file_operations nfs_dir_operations;
 extern struct dentry_operations nfs_dentry_operations;
 extern struct dentry_operations nfs_dentry_operations;
 
 
@@ -377,10 +395,10 @@ extern void nfs_commit_done(struct rpc_task *);
  */
  */
 extern int  nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
 extern int  nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int  nfs_commit_inode(struct inode *, unsigned long, unsigned int, int);
+extern int  nfs_commit_inode(struct inode *, int);
 #else
 #else
 static inline int
 static inline int
-nfs_commit_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how)
+nfs_commit_inode(struct inode *inode, int how)
 {
 {
 	return 0;
 	return 0;
 }
 }
@@ -434,11 +452,6 @@ static inline void nfs_writedata_free(struct nfs_write_data *p)
 	mempool_free(p, nfs_wdata_mempool);
 	mempool_free(p, nfs_wdata_mempool);
 }
 }
 
 
-/* Hack for future NFS swap support */
-#ifndef IS_SWAPFILE
-# define IS_SWAPFILE(inode)	(0)
-#endif
-
 /*
 /*
  * linux/fs/nfs/read.c
  * linux/fs/nfs/read.c
  */
  */
@@ -467,6 +480,29 @@ static inline void nfs_readdata_free(struct nfs_read_data *p)
 
 
 extern void  nfs_readdata_release(struct rpc_task *task);
 extern void  nfs_readdata_release(struct rpc_task *task);
 
 
+/*
+ * linux/fs/nfs3proc.c
+ */
+#ifdef CONFIG_NFS_V3_ACL
+extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type);
+extern int nfs3_proc_setacl(struct inode *inode, int type,
+			    struct posix_acl *acl);
+extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
+		mode_t mode);
+extern void nfs3_forget_cached_acls(struct inode *inode);
+#else
+static inline int nfs3_proc_set_default_acl(struct inode *dir,
+					    struct inode *inode,
+					    mode_t mode)
+{
+	return 0;
+}
+
+static inline void nfs3_forget_cached_acls(struct inode *inode)
+{
+}
+#endif /* CONFIG_NFS_V3_ACL */
+
 /*
 /*
  * linux/fs/mount_clnt.c
  * linux/fs/mount_clnt.c
  * (Used only by nfsroot module)
  * (Used only by nfsroot module)
@@ -515,230 +551,6 @@ extern void * nfs_root_data(void);
 
 
 #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 
 
-#ifdef CONFIG_NFS_V4
-
-struct idmap;
-
-/*
- * In a seqid-mutating op, this macro controls which error return
- * values trigger incrementation of the seqid.
- *
- * from rfc 3010:
- * The client MUST monotonically increment the sequence number for the
- * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE
- * operations.  This is true even in the event that the previous
- * operation that used the sequence number received an error.  The only
- * exception to this rule is if the previous operation received one of
- * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID,
- * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR,
- * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE.
- *
- */
-#define seqid_mutating_err(err)       \
-(((err) != NFSERR_STALE_CLIENTID) &&  \
- ((err) != NFSERR_STALE_STATEID)  &&  \
- ((err) != NFSERR_BAD_STATEID)    &&  \
- ((err) != NFSERR_BAD_SEQID)      &&  \
- ((err) != NFSERR_BAD_XDR)        &&  \
- ((err) != NFSERR_RESOURCE)       &&  \
- ((err) != NFSERR_NOFILEHANDLE))
-
-enum nfs4_client_state {
-	NFS4CLNT_OK  = 0,
-};
-
-/*
- * The nfs4_client identifies our client state to the server.
- */
-struct nfs4_client {
-	struct list_head	cl_servers;	/* Global list of servers */
-	struct in_addr		cl_addr;	/* Server identifier */
-	u64			cl_clientid;	/* constant */
-	nfs4_verifier		cl_confirm;
-	unsigned long		cl_state;
-
-	u32			cl_lockowner_id;
-
-	/*
-	 * The following rwsem ensures exclusive access to the server
-	 * while we recover the state following a lease expiration.
-	 */
-	struct rw_semaphore	cl_sem;
-
-	struct list_head	cl_delegations;
-	struct list_head	cl_state_owners;
-	struct list_head	cl_unused;
-	int			cl_nunused;
-	spinlock_t		cl_lock;
-	atomic_t		cl_count;
-
-	struct rpc_clnt *	cl_rpcclient;
-	struct rpc_cred *	cl_cred;
-
-	struct list_head	cl_superblocks;	/* List of nfs_server structs */
-
-	unsigned long		cl_lease_time;
-	unsigned long		cl_last_renewal;
-	struct work_struct	cl_renewd;
-	struct work_struct	cl_recoverd;
-
-	wait_queue_head_t	cl_waitq;
-	struct rpc_wait_queue	cl_rpcwaitq;
-
-	/* used for the setclientid verifier */
-	struct timespec		cl_boot_time;
-
-	/* idmapper */
-	struct idmap *		cl_idmap;
-
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			cl_ipaddr[16];
-	unsigned char		cl_id_uniquifier;
-};
-
-/*
- * NFS4 state_owners and lock_owners are simply labels for ordered
- * sequences of RPC calls. Their sole purpose is to provide once-only
- * semantics by allowing the server to identify replayed requests.
- *
- * The ->so_sema is held during all state_owner seqid-mutating operations:
- * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize
- * so_seqid.
- */
-struct nfs4_state_owner {
-	struct list_head     so_list;	 /* per-clientid list of state_owners */
-	struct nfs4_client   *so_client;
-	u32                  so_id;      /* 32-bit identifier, unique */
-	struct semaphore     so_sema;
-	u32                  so_seqid;   /* protected by so_sema */
-	atomic_t	     so_count;
-
-	struct rpc_cred	     *so_cred;	 /* Associated cred */
-	struct list_head     so_states;
-	struct list_head     so_delegations;
-};
-
-/*
- * struct nfs4_state maintains the client-side state for a given
- * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
- *
- * OPEN:
- * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server,
- * we need to know how many files are open for reading or writing on a
- * given inode. This information too is stored here.
- *
- * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
- */
-
-struct nfs4_lock_state {
-	struct list_head	ls_locks;	/* Other lock stateids */
-	fl_owner_t		ls_owner;	/* POSIX lock owner */
-#define NFS_LOCK_INITIALIZED 1
-	int			ls_flags;
-	u32			ls_seqid;
-	u32			ls_id;
-	nfs4_stateid		ls_stateid;
-	atomic_t		ls_count;
-};
-
-/* bits for nfs4_state->flags */
-enum {
-	LK_STATE_IN_USE,
-	NFS_DELEGATED_STATE,
-};
-
-struct nfs4_state {
-	struct list_head open_states;	/* List of states for the same state_owner */
-	struct list_head inode_states;	/* List of states for the same inode */
-	struct list_head lock_states;	/* List of subservient lock stateids */
-
-	struct nfs4_state_owner *owner;	/* Pointer to the open owner */
-	struct inode *inode;		/* Pointer to the inode */
-
-	unsigned long flags;		/* Do we hold any locks? */
-	struct semaphore lock_sema;	/* Serializes file locking operations */
-	rwlock_t state_lock;		/* Protects the lock_states list */
-
-	nfs4_stateid stateid;
-
-	unsigned int nreaders;
-	unsigned int nwriters;
-	int state;			/* State on the server (R,W, or RW) */
-	atomic_t count;
-};
-
-
-struct nfs4_exception {
-	long timeout;
-	int retry;
-};
-
-struct nfs4_state_recovery_ops {
-	int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
-	int (*recover_lock)(struct nfs4_state *, struct file_lock *);
-};
-
-extern struct dentry_operations nfs4_dentry_operations;
-extern struct inode_operations nfs4_dir_inode_operations;
-
-/* nfs4proc.c */
-extern int nfs4_map_errors(int err);
-extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
-extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
-extern int nfs4_proc_async_renew(struct nfs4_client *);
-extern int nfs4_proc_renew(struct nfs4_client *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
-extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
-extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
-
-extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
-extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
-
-/* nfs4renewd.c */
-extern void nfs4_schedule_state_renewal(struct nfs4_client *);
-extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
-extern void nfs4_kill_renewd(struct nfs4_client *);
-
-/* nfs4state.c */
-extern void init_nfsv4_state(struct nfs_server *);
-extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs4_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs4_client *clp);
-extern int nfs4_init_client(struct nfs4_client *clp);
-extern struct nfs4_client *nfs4_find_client(struct in_addr *);
-extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
-
-extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
-extern void nfs4_put_state_owner(struct nfs4_state_owner *);
-extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
-extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
-extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct nfs4_state *, mode_t);
-extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
-extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
-extern void nfs4_schedule_state_recovery(struct nfs4_client *);
-extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t);
-extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t);
-extern void nfs4_put_lock_state(struct nfs4_lock_state *state);
-extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
-extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *);
-extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *);
-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
-
-
-
-struct nfs4_mount_data;
-#else
-#define init_nfsv4_state(server)  do { } while (0)
-#define destroy_nfsv4_state(server)       do { } while (0)
-#define nfs4_put_state_owner(inode, owner) do { } while (0)
-#define nfs4_put_open_state(state) do { } while (0)
-#define nfs4_close_state(a, b) do { } while (0)
-#define nfs4_renewd_prepare_shutdown(server) do { } while (0)
-#endif
-
 #endif /* __KERNEL__ */
 #endif /* __KERNEL__ */
 
 
 /*
 /*

+ 5 - 0
include/linux/nfs_fs_i.h

@@ -16,6 +16,11 @@ struct nfs_lock_info {
 	struct nlm_lockowner *owner;
 	struct nlm_lockowner *owner;
 };
 };
 
 
+struct nfs4_lock_state;
+struct nfs4_lock_info {
+	struct nfs4_lock_state *owner;
+};
+
 /*
 /*
  * Lock flag values
  * Lock flag values
  */
  */

+ 1 - 0
include/linux/nfs_fs_sb.h

@@ -10,6 +10,7 @@
 struct nfs_server {
 struct nfs_server {
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
 	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
+	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
 	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
 	struct backing_dev_info	backing_dev_info;
 	struct backing_dev_info	backing_dev_info;
 	int			flags;		/* various flags */
 	int			flags;		/* various flags */

+ 1 - 0
include/linux/nfs_mount.h

@@ -58,6 +58,7 @@ struct nfs_mount_data {
 #define NFS_MOUNT_KERBEROS	0x0100	/* 3 */
 #define NFS_MOUNT_KERBEROS	0x0100	/* 3 */
 #define NFS_MOUNT_NONLM		0x0200	/* 3 */
 #define NFS_MOUNT_NONLM		0x0200	/* 3 */
 #define NFS_MOUNT_BROKEN_SUID	0x0400	/* 4 */
 #define NFS_MOUNT_BROKEN_SUID	0x0400	/* 4 */
+#define NFS_MOUNT_NOACL		0x0800	/* 4 */
 #define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
 #define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
 #define NFS_MOUNT_SECFLAVOUR	0x2000	/* 5 */
 #define NFS_MOUNT_SECFLAVOUR	0x2000	/* 5 */
 #define NFS_MOUNT_FLAGMASK	0xFFFF
 #define NFS_MOUNT_FLAGMASK	0xFFFF

+ 24 - 6
include/linux/nfs_page.h

@@ -19,6 +19,12 @@
 
 
 #include <asm/atomic.h>
 #include <asm/atomic.h>
 
 
+/*
+ * Valid flags for the radix tree
+ */
+#define NFS_PAGE_TAG_DIRTY	0
+#define NFS_PAGE_TAG_WRITEBACK	1
+
 /*
 /*
  * Valid flags for a dirty buffer
  * Valid flags for a dirty buffer
  */
  */
@@ -26,6 +32,7 @@
 #define PG_NEED_COMMIT		1
 #define PG_NEED_COMMIT		1
 #define PG_NEED_RESCHED		2
 #define PG_NEED_RESCHED		2
 
 
+struct nfs_inode;
 struct nfs_page {
 struct nfs_page {
 	struct list_head	wb_list,	/* Defines state of page: */
 	struct list_head	wb_list,	/* Defines state of page: */
 				*wb_list_head;	/*      read/write/commit */
 				*wb_list_head;	/*      read/write/commit */
@@ -54,14 +61,17 @@ extern	void nfs_clear_request(struct nfs_page *req);
 extern	void nfs_release_request(struct nfs_page *req);
 extern	void nfs_release_request(struct nfs_page *req);
 
 
 
 
-extern	void nfs_list_add_request(struct nfs_page *, struct list_head *);
-
+extern  int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
+				unsigned long idx_start, unsigned int npages);
 extern	int nfs_scan_list(struct list_head *, struct list_head *,
 extern	int nfs_scan_list(struct list_head *, struct list_head *,
 			  unsigned long, unsigned int);
 			  unsigned long, unsigned int);
 extern	int nfs_coalesce_requests(struct list_head *, struct list_head *,
 extern	int nfs_coalesce_requests(struct list_head *, struct list_head *,
 				  unsigned int);
 				  unsigned int);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	void nfs_unlock_request(struct nfs_page *req);
+extern  int nfs_set_page_writeback_locked(struct nfs_page *req);
+extern  void nfs_clear_page_writeback(struct nfs_page *req);
+
 
 
 /*
 /*
  * Lock the page of an asynchronous request without incrementing the wb_count
  * Lock the page of an asynchronous request without incrementing the wb_count
@@ -86,6 +96,18 @@ nfs_lock_request(struct nfs_page *req)
 	return 1;
 	return 1;
 }
 }
 
 
+/**
+ * nfs_list_add_request - Insert a request into a list
+ * @req: request
+ * @head: head of list into which to insert the request.
+ */
+static inline void
+nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+{
+	list_add_tail(&req->wb_list, head);
+	req->wb_list_head = head;
+}
+
 
 
 /**
 /**
  * nfs_list_remove_request - Remove a request from its wb_list
  * nfs_list_remove_request - Remove a request from its wb_list
@@ -96,10 +118,6 @@ nfs_list_remove_request(struct nfs_page *req)
 {
 {
 	if (list_empty(&req->wb_list))
 	if (list_empty(&req->wb_list))
 		return;
 		return;
-	if (!NFS_WBACK_BUSY(req)) {
-		printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n");
-		BUG();
-	}
 	list_del_init(&req->wb_list);
 	list_del_init(&req->wb_list);
 	req->wb_list_head = NULL;
 	req->wb_list_head = NULL;
 }
 }

+ 43 - 0
include/linux/nfs_xdr.h

@@ -2,6 +2,7 @@
 #define _LINUX_NFS_XDR_H
 #define _LINUX_NFS_XDR_H
 
 
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/xprt.h>
+#include <linux/nfsacl.h>
 
 
 struct nfs4_fsid {
 struct nfs4_fsid {
 	__u64 major;
 	__u64 major;
@@ -326,6 +327,20 @@ struct nfs_setattrargs {
 	const u32 *			bitmask;
 	const u32 *			bitmask;
 };
 };
 
 
+struct nfs_setaclargs {
+	struct nfs_fh *			fh;
+	size_t				acl_len;
+	unsigned int			acl_pgbase;
+	struct page **			acl_pages;
+};
+
+struct nfs_getaclargs {
+	struct nfs_fh *			fh;
+	size_t				acl_len;
+	unsigned int			acl_pgbase;
+	struct page **			acl_pages;
+};
+
 struct nfs_setattrres {
 struct nfs_setattrres {
 	struct nfs_fattr *              fattr;
 	struct nfs_fattr *              fattr;
 	const struct nfs_server *	server;
 	const struct nfs_server *	server;
@@ -354,6 +369,20 @@ struct nfs_readdirargs {
 	struct page **		pages;
 	struct page **		pages;
 };
 };
 
 
+struct nfs3_getaclargs {
+	struct nfs_fh *		fh;
+	int			mask;
+	struct page **		pages;
+};
+
+struct nfs3_setaclargs {
+	struct inode *		inode;
+	int			mask;
+	struct posix_acl *	acl_access;
+	struct posix_acl *	acl_default;
+	struct page **		pages;
+};
+
 struct nfs_diropok {
 struct nfs_diropok {
 	struct nfs_fh *		fh;
 	struct nfs_fh *		fh;
 	struct nfs_fattr *	fattr;
 	struct nfs_fattr *	fattr;
@@ -477,6 +506,15 @@ struct nfs3_readdirres {
 	int			plus;
 	int			plus;
 };
 };
 
 
+struct nfs3_getaclres {
+	struct nfs_fattr *	fattr;
+	int			mask;
+	unsigned int		acl_access_count;
+	unsigned int		acl_default_count;
+	struct posix_acl *	acl_access;
+	struct posix_acl *	acl_default;
+};
+
 #ifdef CONFIG_NFS_V4
 #ifdef CONFIG_NFS_V4
 
 
 typedef u64 clientid4;
 typedef u64 clientid4;
@@ -667,6 +705,7 @@ struct nfs_rpc_ops {
 	int	version;		/* Protocol version */
 	int	version;		/* Protocol version */
 	struct dentry_operations *dentry_ops;
 	struct dentry_operations *dentry_ops;
 	struct inode_operations *dir_inode_ops;
 	struct inode_operations *dir_inode_ops;
+	struct inode_operations *file_inode_ops;
 
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
 			    struct nfs_fsinfo *);
@@ -713,6 +752,7 @@ struct nfs_rpc_ops {
 	int	(*file_open)   (struct inode *, struct file *);
 	int	(*file_open)   (struct inode *, struct file *);
 	int	(*file_release) (struct inode *, struct file *);
 	int	(*file_release) (struct inode *, struct file *);
 	int	(*lock)(struct file *, int, struct file_lock *);
 	int	(*lock)(struct file *, int, struct file_lock *);
+	void	(*clear_acl_cache)(struct inode *);
 };
 };
 
 
 /*
 /*
@@ -732,4 +772,7 @@ extern struct rpc_version	nfs_version2;
 extern struct rpc_version	nfs_version3;
 extern struct rpc_version	nfs_version3;
 extern struct rpc_version	nfs_version4;
 extern struct rpc_version	nfs_version4;
 
 
+extern struct rpc_version	nfsacl_version3;
+extern struct rpc_program	nfsacl_program;
+
 #endif
 #endif

+ 58 - 0
include/linux/nfsacl.h

@@ -0,0 +1,58 @@
+/*
+ * File: linux/nfsacl.h
+ *
+ * (C) 2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+#ifndef __LINUX_NFSACL_H
+#define __LINUX_NFSACL_H
+
+#define NFS_ACL_PROGRAM	100227
+
+#define ACLPROC2_GETACL		1
+#define ACLPROC2_SETACL		2
+#define ACLPROC2_GETATTR	3
+#define ACLPROC2_ACCESS		4
+
+#define ACLPROC3_GETACL		1
+#define ACLPROC3_SETACL		2
+
+
+/* Flags for the getacl/setacl mode */
+#define NFS_ACL			0x0001
+#define NFS_ACLCNT		0x0002
+#define NFS_DFACL		0x0004
+#define NFS_DFACLCNT		0x0008
+
+/* Flag for Default ACL entries */
+#define NFS_ACL_DEFAULT		0x1000
+
+#ifdef __KERNEL__
+
+#include <linux/posix_acl.h>
+
+/* Maximum number of ACL entries over NFS */
+#define NFS_ACL_MAX_ENTRIES	1024
+
+#define NFSACL_MAXWORDS		(2*(2+3*NFS_ACL_MAX_ENTRIES))
+#define NFSACL_MAXPAGES		((2*(8+12*NFS_ACL_MAX_ENTRIES) + PAGE_SIZE-1) \
+				 >> PAGE_SHIFT)
+
+static inline unsigned int
+nfsacl_size(struct posix_acl *acl_access, struct posix_acl *acl_default)
+{
+	unsigned int w = 16;
+	w += max(acl_access ? (int)acl_access->a_count : 3, 4) * 12;
+	if (acl_default)
+		w += max((int)acl_default->a_count, 4) * 12;
+	return w;
+}
+
+extern unsigned int
+nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
+	      struct posix_acl *acl, int encode_entries, int typeflag);
+extern unsigned int
+nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
+	      struct posix_acl **pacl);
+
+#endif /* __KERNEL__ */
+#endif  /* __LINUX_NFSACL_H */

+ 16 - 0
include/linux/nfsd/nfsd.h

@@ -15,6 +15,7 @@
 #include <linux/unistd.h>
 #include <linux/unistd.h>
 #include <linux/dirent.h>
 #include <linux/dirent.h>
 #include <linux/fs.h>
 #include <linux/fs.h>
+#include <linux/posix_acl.h>
 #include <linux/mount.h>
 #include <linux/mount.h>
 
 
 #include <linux/nfsd/debug.h>
 #include <linux/nfsd/debug.h>
@@ -124,6 +125,21 @@ int		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 int		nfsd_notify_change(struct inode *, struct iattr *);
 int		nfsd_notify_change(struct inode *, struct iattr *);
 int		nfsd_permission(struct svc_export *, struct dentry *, int);
 int		nfsd_permission(struct svc_export *, struct dentry *, int);
 
 
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+#ifdef CONFIG_NFSD_V2_ACL
+extern struct svc_version nfsd_acl_version2;
+#else
+#define nfsd_acl_version2 NULL
+#endif
+#ifdef CONFIG_NFSD_V3_ACL
+extern struct svc_version nfsd_acl_version3;
+#else
+#define nfsd_acl_version3 NULL
+#endif
+struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
+int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
+#endif
+
 
 
 /* 
 /* 
  * NFSv4 State
  * NFSv4 State

+ 4 - 0
include/linux/nfsd/xdr.h

@@ -169,4 +169,8 @@ int nfssvc_encode_entry(struct readdir_cd *, const char *name,
 
 
 int nfssvc_release_fhandle(struct svc_rqst *, u32 *, struct nfsd_fhandle *);
 int nfssvc_release_fhandle(struct svc_rqst *, u32 *, struct nfsd_fhandle *);
 
 
+/* Helper functions for NFSv2 ACL code */
+u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp);
+u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp);
+
 #endif /* LINUX_NFSD_H */
 #endif /* LINUX_NFSD_H */

+ 26 - 0
include/linux/nfsd/xdr3.h

@@ -110,6 +110,19 @@ struct nfsd3_commitargs {
 	__u32			count;
 	__u32			count;
 };
 };
 
 
+struct nfsd3_getaclargs {
+	struct svc_fh		fh;
+	int			mask;
+};
+
+struct posix_acl;
+struct nfsd3_setaclargs {
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
 struct nfsd3_attrstat {
 struct nfsd3_attrstat {
 	__u32			status;
 	__u32			status;
 	struct svc_fh		fh;
 	struct svc_fh		fh;
@@ -209,6 +222,14 @@ struct nfsd3_commitres {
 	struct svc_fh		fh;
 	struct svc_fh		fh;
 };
 };
 
 
+struct nfsd3_getaclres {
+	__u32			status;
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
 /* dummy type for release */
 /* dummy type for release */
 struct nfsd3_fhandle_pair {
 struct nfsd3_fhandle_pair {
 	__u32			dummy;
 	__u32			dummy;
@@ -241,6 +262,7 @@ union nfsd3_xdrstore {
 	struct nfsd3_fsinfores		fsinfores;
 	struct nfsd3_fsinfores		fsinfores;
 	struct nfsd3_pathconfres	pathconfres;
 	struct nfsd3_pathconfres	pathconfres;
 	struct nfsd3_commitres		commitres;
 	struct nfsd3_commitres		commitres;
+	struct nfsd3_getaclres		getaclres;
 };
 };
 
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
@@ -316,6 +338,10 @@ int nfs3svc_encode_entry(struct readdir_cd *, const char *name,
 int nfs3svc_encode_entry_plus(struct readdir_cd *, const char *name,
 int nfs3svc_encode_entry_plus(struct readdir_cd *, const char *name,
 				int namlen, loff_t offset, ino_t ino,
 				int namlen, loff_t offset, ino_t ino,
 				unsigned int);
 				unsigned int);
+/* Helper functions for NFSv3 ACL code */
+u32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p,
+				struct svc_fh *fhp);
+u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp);
 
 
 
 
 #endif /* _LINUX_NFSD_XDR3_H */
 #endif /* _LINUX_NFSD_XDR3_H */

+ 6 - 0
include/linux/sunrpc/clnt.h

@@ -111,6 +111,11 @@ struct rpc_procinfo {
 struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
 struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
 				struct rpc_program *info,
 				struct rpc_program *info,
 				u32 version, rpc_authflavor_t authflavor);
 				u32 version, rpc_authflavor_t authflavor);
+struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname,
+				struct rpc_program *info,
+				u32 version, rpc_authflavor_t authflavor);
+struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
+				struct rpc_program *, int);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 int		rpc_shutdown_client(struct rpc_clnt *);
 int		rpc_shutdown_client(struct rpc_clnt *);
 int		rpc_destroy_client(struct rpc_clnt *);
 int		rpc_destroy_client(struct rpc_clnt *);
@@ -129,6 +134,7 @@ void		rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 size_t		rpc_max_payload(struct rpc_clnt *);
 size_t		rpc_max_payload(struct rpc_clnt *);
+int		rpc_ping(struct rpc_clnt *clnt, int flags);
 
 
 static __inline__
 static __inline__
 int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
 int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)

+ 0 - 1
include/linux/sunrpc/sched.h

@@ -31,7 +31,6 @@ struct rpc_wait_queue;
 struct rpc_wait {
 struct rpc_wait {
 	struct list_head	list;		/* wait queue links */
 	struct list_head	list;		/* wait queue links */
 	struct list_head	links;		/* Links to related tasks */
 	struct list_head	links;		/* Links to related tasks */
-	wait_queue_head_t	waitq;		/* sync: sleep on this q */
 	struct rpc_wait_queue *	rpc_waitq;	/* RPC wait queue we're on */
 	struct rpc_wait_queue *	rpc_waitq;	/* RPC wait queue we're on */
 };
 };
 
 

+ 13 - 1
include/linux/sunrpc/svc.h

@@ -185,6 +185,17 @@ xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
 	return vec->iov_len <= PAGE_SIZE;
 	return vec->iov_len <= PAGE_SIZE;
 }
 }
 
 
+static inline struct page *
+svc_take_res_page(struct svc_rqst *rqstp)
+{
+	if (rqstp->rq_arghi <= rqstp->rq_argused)
+		return NULL;
+	rqstp->rq_arghi--;
+	rqstp->rq_respages[rqstp->rq_resused] =
+		rqstp->rq_argpages[rqstp->rq_arghi];
+	return rqstp->rq_respages[rqstp->rq_resused++];
+}
+
 static inline int svc_take_page(struct svc_rqst *rqstp)
 static inline int svc_take_page(struct svc_rqst *rqstp)
 {
 {
 	if (rqstp->rq_arghi <= rqstp->rq_argused)
 	if (rqstp->rq_arghi <= rqstp->rq_argused)
@@ -240,9 +251,10 @@ struct svc_deferred_req {
 };
 };
 
 
 /*
 /*
- * RPC program
+ * List of RPC programs on the same transport endpoint
  */
  */
 struct svc_program {
 struct svc_program {
+	struct svc_program *	pg_next;	/* other programs (same xprt) */
 	u32			pg_prog;	/* program number */
 	u32			pg_prog;	/* program number */
 	unsigned int		pg_lovers;	/* lowest version */
 	unsigned int		pg_lovers;	/* lowest version */
 	unsigned int		pg_hivers;	/* lowest version */
 	unsigned int		pg_hivers;	/* lowest version */

+ 19 - 2
include/linux/sunrpc/xdr.h

@@ -146,7 +146,8 @@ extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int);
 extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int);
 extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int);
-extern int read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len);
+extern int read_bytes_from_xdr_buf(struct xdr_buf *, int, void *, int);
+extern int write_bytes_to_xdr_buf(struct xdr_buf *, int, void *, int);
 
 
 /*
 /*
  * Helper structure for copying from an sk_buff.
  * Helper structure for copying from an sk_buff.
@@ -160,7 +161,7 @@ typedef struct {
 
 
 typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len);
 typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len);
 
 
-extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
+extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
 		skb_reader_t *, skb_read_actor_t);
 		skb_reader_t *, skb_read_actor_t);
 
 
 struct socket;
 struct socket;
@@ -168,6 +169,22 @@ struct sockaddr;
 extern int xdr_sendpages(struct socket *, struct sockaddr *, int,
 extern int xdr_sendpages(struct socket *, struct sockaddr *, int,
 		struct xdr_buf *, unsigned int, int);
 		struct xdr_buf *, unsigned int, int);
 
 
+extern int xdr_encode_word(struct xdr_buf *, int, u32);
+extern int xdr_decode_word(struct xdr_buf *, int, u32 *);
+
+struct xdr_array2_desc;
+typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem);
+struct xdr_array2_desc {
+	unsigned int elem_size;
+	unsigned int array_len;
+	xdr_xcode_elem_t xcode;
+};
+
+extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
+                             struct xdr_array2_desc *desc);
+extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
+			     struct xdr_array2_desc *desc);
+
 /*
 /*
  * Provide some simple tools for XDR buffer overflow-checking etc.
  * Provide some simple tools for XDR buffer overflow-checking etc.
  */
  */

+ 3 - 3
net/sunrpc/auth.c

@@ -66,10 +66,10 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
 	u32			flavor = pseudoflavor_to_flavor(pseudoflavor);
 	u32			flavor = pseudoflavor_to_flavor(pseudoflavor);
 
 
 	if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor]))
 	if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor]))
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	auth = ops->create(clnt, pseudoflavor);
 	auth = ops->create(clnt, pseudoflavor);
-	if (!auth)
-		return NULL;
+	if (IS_ERR(auth))
+		return auth;
 	if (clnt->cl_auth)
 	if (clnt->cl_auth)
 		rpcauth_destroy(clnt->cl_auth);
 		rpcauth_destroy(clnt->cl_auth);
 	clnt->cl_auth = auth;
 	clnt->cl_auth = auth;

+ 11 - 7
net/sunrpc/auth_gss/auth_gss.c

@@ -660,14 +660,16 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 {
 {
 	struct gss_auth *gss_auth;
 	struct gss_auth *gss_auth;
 	struct rpc_auth * auth;
 	struct rpc_auth * auth;
+	int err = -ENOMEM; /* XXX? */
 
 
 	dprintk("RPC:      creating GSS authenticator for client %p\n",clnt);
 	dprintk("RPC:      creating GSS authenticator for client %p\n",clnt);
 
 
 	if (!try_module_get(THIS_MODULE))
 	if (!try_module_get(THIS_MODULE))
-		return NULL;
+		return ERR_PTR(err);
 	if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
 	if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
 		goto out_dec;
 		goto out_dec;
 	gss_auth->client = clnt;
 	gss_auth->client = clnt;
+	err = -EINVAL;
 	gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
 	gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
 	if (!gss_auth->mech) {
 	if (!gss_auth->mech) {
 		printk(KERN_WARNING "%s: Pseudoflavor %d not found!",
 		printk(KERN_WARNING "%s: Pseudoflavor %d not found!",
@@ -675,9 +677,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 		goto err_free;
 		goto err_free;
 	}
 	}
 	gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
 	gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
-	/* FIXME: Will go away once privacy support is merged in */
-	if (gss_auth->service == RPC_GSS_SVC_PRIVACY)
-		gss_auth->service = RPC_GSS_SVC_INTEGRITY;
+	if (gss_auth->service == 0)
+		goto err_put_mech;
 	INIT_LIST_HEAD(&gss_auth->upcalls);
 	INIT_LIST_HEAD(&gss_auth->upcalls);
 	spin_lock_init(&gss_auth->lock);
 	spin_lock_init(&gss_auth->lock);
 	auth = &gss_auth->rpc_auth;
 	auth = &gss_auth->rpc_auth;
@@ -687,15 +688,18 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 	auth->au_flavor = flavor;
 	auth->au_flavor = flavor;
 	atomic_set(&auth->au_count, 1);
 	atomic_set(&auth->au_count, 1);
 
 
-	if (rpcauth_init_credcache(auth, GSS_CRED_EXPIRE) < 0)
+	err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE);
+	if (err)
 		goto err_put_mech;
 		goto err_put_mech;
 
 
 	snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
 	snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
 			clnt->cl_pathname,
 			clnt->cl_pathname,
 			gss_auth->mech->gm_name);
 			gss_auth->mech->gm_name);
 	gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
 	gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
-	if (IS_ERR(gss_auth->dentry))
+	if (IS_ERR(gss_auth->dentry)) {
+		err = PTR_ERR(gss_auth->dentry);
 		goto err_put_mech;
 		goto err_put_mech;
+	}
 
 
 	return auth;
 	return auth;
 err_put_mech:
 err_put_mech:
@@ -704,7 +708,7 @@ err_free:
 	kfree(gss_auth);
 	kfree(gss_auth);
 out_dec:
 out_dec:
 	module_put(THIS_MODULE);
 	module_put(THIS_MODULE);
-	return NULL;
+	return ERR_PTR(err);
 }
 }
 
 
 static void
 static void

+ 157 - 48
net/sunrpc/clnt.c

@@ -97,12 +97,13 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
  * made to sleep too long.
  * made to sleep too long.
  */
  */
 struct rpc_clnt *
 struct rpc_clnt *
-rpc_create_client(struct rpc_xprt *xprt, char *servname,
+rpc_new_client(struct rpc_xprt *xprt, char *servname,
 		  struct rpc_program *program, u32 vers,
 		  struct rpc_program *program, u32 vers,
 		  rpc_authflavor_t flavor)
 		  rpc_authflavor_t flavor)
 {
 {
 	struct rpc_version	*version;
 	struct rpc_version	*version;
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_clnt		*clnt = NULL;
+	struct rpc_auth		*auth;
 	int err;
 	int err;
 	int len;
 	int len;
 
 
@@ -157,10 +158,11 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname,
 	if (err < 0)
 	if (err < 0)
 		goto out_no_path;
 		goto out_no_path;
 
 
-	err = -ENOMEM;
-	if (!rpcauth_create(flavor, clnt)) {
+	auth = rpcauth_create(flavor, clnt);
+	if (IS_ERR(auth)) {
 		printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
 		printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
 				flavor);
 				flavor);
+		err = PTR_ERR(auth);
 		goto out_no_auth;
 		goto out_no_auth;
 	}
 	}
 
 
@@ -178,6 +180,37 @@ out_no_path:
 		kfree(clnt->cl_server);
 		kfree(clnt->cl_server);
 	kfree(clnt);
 	kfree(clnt);
 out_err:
 out_err:
+	xprt_destroy(xprt);
+	return ERR_PTR(err);
+}
+
+/**
+ * Create an RPC client
+ * @xprt - pointer to xprt struct
+ * @servname - name of server
+ * @info - rpc_program
+ * @version - rpc_program version
+ * @authflavor - rpc_auth flavour to use
+ *
+ * Creates an RPC client structure, then pings the server in order to
+ * determine if it is up, and if it supports this program and version.
+ *
+ * This function should never be called by asynchronous tasks such as
+ * the portmapper.
+ */
+struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
+		struct rpc_program *info, u32 version, rpc_authflavor_t authflavor)
+{
+	struct rpc_clnt *clnt;
+	int err;
+	
+	clnt = rpc_new_client(xprt, servname, info, version, authflavor);
+	if (IS_ERR(clnt))
+		return clnt;
+	err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+	if (err == 0)
+		return clnt;
+	rpc_shutdown_client(clnt);
 	return ERR_PTR(err);
 	return ERR_PTR(err);
 }
 }
 
 
@@ -208,6 +241,8 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 	if (new->cl_auth)
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
 		atomic_inc(&new->cl_auth->au_count);
+	new->cl_pmap		= &new->cl_pmap_default;
+	rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
 	return new;
 	return new;
 out_no_clnt:
 out_no_clnt:
 	printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
 	printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
@@ -296,6 +331,44 @@ rpc_release_client(struct rpc_clnt *clnt)
 		rpc_destroy_client(clnt);
 		rpc_destroy_client(clnt);
 }
 }
 
 
+/**
+ * rpc_bind_new_program - bind a new RPC program to an existing client
+ * @old - old rpc_client
+ * @program - rpc program to set
+ * @vers - rpc program version
+ *
+ * Clones the rpc client and sets up a new RPC program. This is mainly
+ * of use for enabling different RPC programs to share the same transport.
+ * The Sun NFSv2/v3 ACL protocol can do this.
+ */
+struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
+				      struct rpc_program *program,
+				      int vers)
+{
+	struct rpc_clnt *clnt;
+	struct rpc_version *version;
+	int err;
+
+	BUG_ON(vers >= program->nrvers || !program->version[vers]);
+	version = program->version[vers];
+	clnt = rpc_clone_client(old);
+	if (IS_ERR(clnt))
+		goto out;
+	clnt->cl_procinfo = version->procs;
+	clnt->cl_maxproc  = version->nrprocs;
+	clnt->cl_protname = program->name;
+	clnt->cl_prog     = program->number;
+	clnt->cl_vers     = version->number;
+	clnt->cl_stats    = program->stats;
+	err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+	if (err != 0) {
+		rpc_shutdown_client(clnt);
+		clnt = ERR_PTR(err);
+	}
+out:	
+	return clnt;
+}
+
 /*
 /*
  * Default callback for async RPC calls
  * Default callback for async RPC calls
  */
  */
@@ -305,38 +378,41 @@ rpc_default_callback(struct rpc_task *task)
 }
 }
 
 
 /*
 /*
- *	Export the signal mask handling for aysnchronous code that
+ *	Export the signal mask handling for synchronous code that
  *	sleeps on RPC calls
  *	sleeps on RPC calls
  */
  */
+#define RPC_INTR_SIGNALS (sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGKILL))
  
  
+static void rpc_save_sigmask(sigset_t *oldset, int intr)
+{
+	unsigned long	sigallow = 0;
+	sigset_t sigmask;
+
+	/* Block all signals except those listed in sigallow */
+	if (intr)
+		sigallow |= RPC_INTR_SIGNALS;
+	siginitsetinv(&sigmask, sigallow);
+	sigprocmask(SIG_BLOCK, &sigmask, oldset);
+}
+
+static inline void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset)
+{
+	rpc_save_sigmask(oldset, !RPC_TASK_UNINTERRUPTIBLE(task));
+}
+
+static inline void rpc_restore_sigmask(sigset_t *oldset)
+{
+	sigprocmask(SIG_SETMASK, oldset, NULL);
+}
+
 void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
 void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
 {
 {
-	unsigned long	sigallow = sigmask(SIGKILL);
-	unsigned long	irqflags;
-	
-	/* Turn off various signals */
-	if (clnt->cl_intr) {
-		struct k_sigaction *action = current->sighand->action;
-		if (action[SIGINT-1].sa.sa_handler == SIG_DFL)
-			sigallow |= sigmask(SIGINT);
-		if (action[SIGQUIT-1].sa.sa_handler == SIG_DFL)
-			sigallow |= sigmask(SIGQUIT);
-	}
-	spin_lock_irqsave(&current->sighand->siglock, irqflags);
-	*oldset = current->blocked;
-	siginitsetinv(&current->blocked, sigallow & ~oldset->sig[0]);
-	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
+	rpc_save_sigmask(oldset, clnt->cl_intr);
 }
 }
 
 
 void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
 void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
 {
 {
-	unsigned long	irqflags;
-	
-	spin_lock_irqsave(&current->sighand->siglock, irqflags);
-	current->blocked = *oldset;
-	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
+	rpc_restore_sigmask(oldset);
 }
 }
 
 
 /*
 /*
@@ -354,26 +430,26 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 
 
 	BUG_ON(flags & RPC_TASK_ASYNC);
 	BUG_ON(flags & RPC_TASK_ASYNC);
 
 
-	rpc_clnt_sigmask(clnt, &oldset);		
-
 	status = -ENOMEM;
 	status = -ENOMEM;
 	task = rpc_new_task(clnt, NULL, flags);
 	task = rpc_new_task(clnt, NULL, flags);
 	if (task == NULL)
 	if (task == NULL)
 		goto out;
 		goto out;
 
 
+	/* Mask signals on RPC calls _and_ GSS_AUTH upcalls */
+	rpc_task_sigmask(task, &oldset);
+
 	rpc_call_setup(task, msg, 0);
 	rpc_call_setup(task, msg, 0);
 
 
 	/* Set up the call info struct and execute the task */
 	/* Set up the call info struct and execute the task */
-	if (task->tk_status == 0)
+	if (task->tk_status == 0) {
 		status = rpc_execute(task);
 		status = rpc_execute(task);
-	else {
+	} else {
 		status = task->tk_status;
 		status = task->tk_status;
 		rpc_release_task(task);
 		rpc_release_task(task);
 	}
 	}
 
 
+	rpc_restore_sigmask(&oldset);
 out:
 out:
-	rpc_clnt_sigunmask(clnt, &oldset);		
-
 	return status;
 	return status;
 }
 }
 
 
@@ -394,8 +470,6 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 
 
 	flags |= RPC_TASK_ASYNC;
 	flags |= RPC_TASK_ASYNC;
 
 
-	rpc_clnt_sigmask(clnt, &oldset);		
-
 	/* Create/initialize a new RPC task */
 	/* Create/initialize a new RPC task */
 	if (!callback)
 	if (!callback)
 		callback = rpc_default_callback;
 		callback = rpc_default_callback;
@@ -404,6 +478,9 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 		goto out;
 		goto out;
 	task->tk_calldata = data;
 	task->tk_calldata = data;
 
 
+	/* Mask signals on GSS_AUTH upcalls */
+	rpc_task_sigmask(task, &oldset);		
+
 	rpc_call_setup(task, msg, 0);
 	rpc_call_setup(task, msg, 0);
 
 
 	/* Set up the call info struct and execute the task */
 	/* Set up the call info struct and execute the task */
@@ -413,9 +490,8 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 	else
 	else
 		rpc_release_task(task);
 		rpc_release_task(task);
 
 
+	rpc_restore_sigmask(&oldset);		
 out:
 out:
-	rpc_clnt_sigunmask(clnt, &oldset);		
-
 	return status;
 	return status;
 }
 }
 
 
@@ -593,7 +669,7 @@ call_allocate(struct rpc_task *task)
 		return;
 		return;
 	printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 
 	printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 
 
 
-	if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) {
+	if (RPC_IS_ASYNC(task) || !signalled()) {
 		xprt_release(task);
 		xprt_release(task);
 		task->tk_action = call_reserve;
 		task->tk_action = call_reserve;
 		rpc_delay(task, HZ>>4);
 		rpc_delay(task, HZ>>4);
@@ -957,7 +1033,9 @@ call_header(struct rpc_task *task)
 	*p++ = htonl(clnt->cl_prog);	/* program number */
 	*p++ = htonl(clnt->cl_prog);	/* program number */
 	*p++ = htonl(clnt->cl_vers);	/* program version */
 	*p++ = htonl(clnt->cl_vers);	/* program version */
 	*p++ = htonl(task->tk_msg.rpc_proc->p_proc);	/* procedure */
 	*p++ = htonl(task->tk_msg.rpc_proc->p_proc);	/* procedure */
-	return rpcauth_marshcred(task, p);
+	p = rpcauth_marshcred(task, p);
+	req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p);
+	return p;
 }
 }
 
 
 /*
 /*
@@ -986,10 +1064,11 @@ call_verify(struct rpc_task *task)
 			case RPC_AUTH_ERROR:
 			case RPC_AUTH_ERROR:
 				break;
 				break;
 			case RPC_MISMATCH:
 			case RPC_MISMATCH:
-				printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__);
-				goto out_eio;
+				dprintk("%s: RPC call version mismatch!\n", __FUNCTION__);
+				error = -EPROTONOSUPPORT;
+				goto out_err;
 			default:
 			default:
-				printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n);
+				dprintk("%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n);
 				goto out_eio;
 				goto out_eio;
 		}
 		}
 		if (--len < 0)
 		if (--len < 0)
@@ -1040,23 +1119,26 @@ call_verify(struct rpc_task *task)
 	case RPC_SUCCESS:
 	case RPC_SUCCESS:
 		return p;
 		return p;
 	case RPC_PROG_UNAVAIL:
 	case RPC_PROG_UNAVAIL:
-		printk(KERN_WARNING "RPC: call_verify: program %u is unsupported by server %s\n",
+		dprintk("RPC: call_verify: program %u is unsupported by server %s\n",
 				(unsigned int)task->tk_client->cl_prog,
 				(unsigned int)task->tk_client->cl_prog,
 				task->tk_client->cl_server);
 				task->tk_client->cl_server);
-		goto out_eio;
+		error = -EPFNOSUPPORT;
+		goto out_err;
 	case RPC_PROG_MISMATCH:
 	case RPC_PROG_MISMATCH:
-		printk(KERN_WARNING "RPC: call_verify: program %u, version %u unsupported by server %s\n",
+		dprintk("RPC: call_verify: program %u, version %u unsupported by server %s\n",
 				(unsigned int)task->tk_client->cl_prog,
 				(unsigned int)task->tk_client->cl_prog,
 				(unsigned int)task->tk_client->cl_vers,
 				(unsigned int)task->tk_client->cl_vers,
 				task->tk_client->cl_server);
 				task->tk_client->cl_server);
-		goto out_eio;
+		error = -EPROTONOSUPPORT;
+		goto out_err;
 	case RPC_PROC_UNAVAIL:
 	case RPC_PROC_UNAVAIL:
-		printk(KERN_WARNING "RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n",
+		dprintk("RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n",
 				task->tk_msg.rpc_proc,
 				task->tk_msg.rpc_proc,
 				task->tk_client->cl_prog,
 				task->tk_client->cl_prog,
 				task->tk_client->cl_vers,
 				task->tk_client->cl_vers,
 				task->tk_client->cl_server);
 				task->tk_client->cl_server);
-		goto out_eio;
+		error = -EOPNOTSUPP;
+		goto out_err;
 	case RPC_GARBAGE_ARGS:
 	case RPC_GARBAGE_ARGS:
 		dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__);
 		dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__);
 		break;			/* retry */
 		break;			/* retry */
@@ -1069,7 +1151,7 @@ out_retry:
 	task->tk_client->cl_stats->rpcgarbage++;
 	task->tk_client->cl_stats->rpcgarbage++;
 	if (task->tk_garb_retry) {
 	if (task->tk_garb_retry) {
 		task->tk_garb_retry--;
 		task->tk_garb_retry--;
-		dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
+		dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
 		task->tk_action = call_bind;
 		task->tk_action = call_bind;
 		return NULL;
 		return NULL;
 	}
 	}
@@ -1083,3 +1165,30 @@ out_overflow:
 	printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
 	printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
 	goto out_retry;
 	goto out_retry;
 }
 }
+
+static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj)
+{
+	return 0;
+}
+
+static int rpcproc_decode_null(void *rqstp, u32 *data, void *obj)
+{
+	return 0;
+}
+
+static struct rpc_procinfo rpcproc_null = {
+	.p_encode = rpcproc_encode_null,
+	.p_decode = rpcproc_decode_null,
+};
+
+int rpc_ping(struct rpc_clnt *clnt, int flags)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &rpcproc_null,
+	};
+	int err;
+	msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0);
+	err = rpc_call_sync(clnt, &msg, flags);
+	put_rpccred(msg.rpc_cred);
+	return err;
+}

+ 5 - 4
net/sunrpc/pmap_clnt.c

@@ -53,6 +53,9 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
 			task->tk_pid, clnt->cl_server,
 			task->tk_pid, clnt->cl_server,
 			map->pm_prog, map->pm_vers, map->pm_prot);
 			map->pm_prog, map->pm_vers, map->pm_prot);
 
 
+	/* Autobind on cloned rpc clients is discouraged */
+	BUG_ON(clnt->cl_parent != clnt);
+
 	spin_lock(&pmap_lock);
 	spin_lock(&pmap_lock);
 	if (map->pm_binding) {
 	if (map->pm_binding) {
 		rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL);
 		rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL);
@@ -207,12 +210,10 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto)
 	xprt->addr.sin_port = htons(RPC_PMAP_PORT);
 	xprt->addr.sin_port = htons(RPC_PMAP_PORT);
 
 
 	/* printk("pmap: create clnt\n"); */
 	/* printk("pmap: create clnt\n"); */
-	clnt = rpc_create_client(xprt, hostname,
+	clnt = rpc_new_client(xprt, hostname,
 				&pmap_program, RPC_PMAP_VERSION,
 				&pmap_program, RPC_PMAP_VERSION,
 				RPC_AUTH_UNIX);
 				RPC_AUTH_UNIX);
-	if (IS_ERR(clnt)) {
-		xprt_destroy(xprt);
-	} else {
+	if (!IS_ERR(clnt)) {
 		clnt->cl_softrtry = 1;
 		clnt->cl_softrtry = 1;
 		clnt->cl_chatty   = 1;
 		clnt->cl_chatty   = 1;
 		clnt->cl_oneshot  = 1;
 		clnt->cl_oneshot  = 1;

+ 48 - 36
net/sunrpc/sched.c

@@ -290,7 +290,7 @@ static void rpc_make_runnable(struct rpc_task *task)
 			return;
 			return;
 		}
 		}
 	} else
 	} else
-		wake_up(&task->u.tk_wait.waitq);
+		wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
 }
 }
 
 
 /*
 /*
@@ -554,6 +554,38 @@ __rpc_atrun(struct rpc_task *task)
 	rpc_wake_up_task(task);
 	rpc_wake_up_task(task);
 }
 }
 
 
+/*
+ * Helper that calls task->tk_exit if it exists and then returns
+ * true if we should exit __rpc_execute.
+ */
+static inline int __rpc_do_exit(struct rpc_task *task)
+{
+	if (task->tk_exit != NULL) {
+		lock_kernel();
+		task->tk_exit(task);
+		unlock_kernel();
+		/* If tk_action is non-null, we should restart the call */
+		if (task->tk_action != NULL) {
+			if (!RPC_ASSASSINATED(task)) {
+				/* Release RPC slot and buffer memory */
+				xprt_release(task);
+				rpc_free(task);
+				return 0;
+			}
+			printk(KERN_ERR "RPC: dead task tried to walk away.\n");
+		}
+	}
+	return 1;
+}
+
+static int rpc_wait_bit_interruptible(void *word)
+{
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+	schedule();
+	return 0;
+}
+
 /*
 /*
  * This is the RPC `scheduler' (or rather, the finite state machine).
  * This is the RPC `scheduler' (or rather, the finite state machine).
  */
  */
@@ -566,8 +598,7 @@ static int __rpc_execute(struct rpc_task *task)
 
 
 	BUG_ON(RPC_IS_QUEUED(task));
 	BUG_ON(RPC_IS_QUEUED(task));
 
 
- restarted:
-	while (1) {
+	for (;;) {
 		/*
 		/*
 		 * Garbage collection of pending timers...
 		 * Garbage collection of pending timers...
 		 */
 		 */
@@ -600,11 +631,12 @@ static int __rpc_execute(struct rpc_task *task)
 		 * by someone else.
 		 * by someone else.
 		 */
 		 */
 		if (!RPC_IS_QUEUED(task)) {
 		if (!RPC_IS_QUEUED(task)) {
-			if (!task->tk_action)
+			if (task->tk_action != NULL) {
+				lock_kernel();
+				task->tk_action(task);
+				unlock_kernel();
+			} else if (__rpc_do_exit(task))
 				break;
 				break;
-			lock_kernel();
-			task->tk_action(task);
-			unlock_kernel();
 		}
 		}
 
 
 		/*
 		/*
@@ -624,44 +656,26 @@ static int __rpc_execute(struct rpc_task *task)
 
 
 		/* sync task: sleep here */
 		/* sync task: sleep here */
 		dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
 		dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
-		if (RPC_TASK_UNINTERRUPTIBLE(task)) {
-			__wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task));
-		} else {
-			__wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status);
+		/* Note: Caller should be using rpc_clnt_sigmask() */
+		status = out_of_line_wait_on_bit(&task->tk_runstate,
+				RPC_TASK_QUEUED, rpc_wait_bit_interruptible,
+				TASK_INTERRUPTIBLE);
+		if (status == -ERESTARTSYS) {
 			/*
 			/*
 			 * When a sync task receives a signal, it exits with
 			 * When a sync task receives a signal, it exits with
 			 * -ERESTARTSYS. In order to catch any callbacks that
 			 * -ERESTARTSYS. In order to catch any callbacks that
 			 * clean up after sleeping on some queue, we don't
 			 * clean up after sleeping on some queue, we don't
 			 * break the loop here, but go around once more.
 			 * break the loop here, but go around once more.
 			 */
 			 */
-			if (status == -ERESTARTSYS) {
-				dprintk("RPC: %4d got signal\n", task->tk_pid);
-				task->tk_flags |= RPC_TASK_KILLED;
-				rpc_exit(task, -ERESTARTSYS);
-				rpc_wake_up_task(task);
-			}
+			dprintk("RPC: %4d got signal\n", task->tk_pid);
+			task->tk_flags |= RPC_TASK_KILLED;
+			rpc_exit(task, -ERESTARTSYS);
+			rpc_wake_up_task(task);
 		}
 		}
 		rpc_set_running(task);
 		rpc_set_running(task);
 		dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
 		dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
 	}
 	}
 
 
-	if (task->tk_exit) {
-		lock_kernel();
-		task->tk_exit(task);
-		unlock_kernel();
-		/* If tk_action is non-null, the user wants us to restart */
-		if (task->tk_action) {
-			if (!RPC_ASSASSINATED(task)) {
-				/* Release RPC slot and buffer memory */
-				if (task->tk_rqstp)
-					xprt_release(task);
-				rpc_free(task);
-				goto restarted;
-			}
-			printk(KERN_ERR "RPC: dead task tries to walk away.\n");
-		}
-	}
-
 	dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
 	dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
 	status = task->tk_status;
 	status = task->tk_status;
 
 
@@ -759,8 +773,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
 
 
 	/* Initialize workqueue for async tasks */
 	/* Initialize workqueue for async tasks */
 	task->tk_workqueue = rpciod_workqueue;
 	task->tk_workqueue = rpciod_workqueue;
-	if (!RPC_IS_ASYNC(task))
-		init_waitqueue_head(&task->u.tk_wait.waitq);
 
 
 	if (clnt) {
 	if (clnt) {
 		atomic_inc(&clnt->cl_users);
 		atomic_inc(&clnt->cl_users);

+ 5 - 1
net/sunrpc/sunrpc_syms.c

@@ -42,6 +42,7 @@ EXPORT_SYMBOL(rpc_release_task);
 /* RPC client functions */
 /* RPC client functions */
 EXPORT_SYMBOL(rpc_create_client);
 EXPORT_SYMBOL(rpc_create_client);
 EXPORT_SYMBOL(rpc_clone_client);
 EXPORT_SYMBOL(rpc_clone_client);
+EXPORT_SYMBOL(rpc_bind_new_program);
 EXPORT_SYMBOL(rpc_destroy_client);
 EXPORT_SYMBOL(rpc_destroy_client);
 EXPORT_SYMBOL(rpc_shutdown_client);
 EXPORT_SYMBOL(rpc_shutdown_client);
 EXPORT_SYMBOL(rpc_release_client);
 EXPORT_SYMBOL(rpc_release_client);
@@ -61,7 +62,6 @@ EXPORT_SYMBOL(rpc_mkpipe);
 
 
 /* Client transport */
 /* Client transport */
 EXPORT_SYMBOL(xprt_create_proto);
 EXPORT_SYMBOL(xprt_create_proto);
-EXPORT_SYMBOL(xprt_destroy);
 EXPORT_SYMBOL(xprt_set_timeout);
 EXPORT_SYMBOL(xprt_set_timeout);
 EXPORT_SYMBOL(xprt_udp_slot_table_entries);
 EXPORT_SYMBOL(xprt_udp_slot_table_entries);
 EXPORT_SYMBOL(xprt_tcp_slot_table_entries);
 EXPORT_SYMBOL(xprt_tcp_slot_table_entries);
@@ -129,6 +129,10 @@ EXPORT_SYMBOL(xdr_encode_netobj);
 EXPORT_SYMBOL(xdr_encode_pages);
 EXPORT_SYMBOL(xdr_encode_pages);
 EXPORT_SYMBOL(xdr_inline_pages);
 EXPORT_SYMBOL(xdr_inline_pages);
 EXPORT_SYMBOL(xdr_shift_buf);
 EXPORT_SYMBOL(xdr_shift_buf);
+EXPORT_SYMBOL(xdr_encode_word);
+EXPORT_SYMBOL(xdr_decode_word);
+EXPORT_SYMBOL(xdr_encode_array2);
+EXPORT_SYMBOL(xdr_decode_array2);
 EXPORT_SYMBOL(xdr_buf_from_iov);
 EXPORT_SYMBOL(xdr_buf_from_iov);
 EXPORT_SYMBOL(xdr_buf_subsegment);
 EXPORT_SYMBOL(xdr_buf_subsegment);
 EXPORT_SYMBOL(xdr_buf_read_netobj);
 EXPORT_SYMBOL(xdr_buf_read_netobj);

+ 19 - 17
net/sunrpc/svc.c

@@ -35,20 +35,24 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
 	if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL)))
 	if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL)))
 		return NULL;
 		return NULL;
 	memset(serv, 0, sizeof(*serv));
 	memset(serv, 0, sizeof(*serv));
+	serv->sv_name      = prog->pg_name;
 	serv->sv_program   = prog;
 	serv->sv_program   = prog;
 	serv->sv_nrthreads = 1;
 	serv->sv_nrthreads = 1;
 	serv->sv_stats     = prog->pg_stats;
 	serv->sv_stats     = prog->pg_stats;
 	serv->sv_bufsz	   = bufsize? bufsize : 4096;
 	serv->sv_bufsz	   = bufsize? bufsize : 4096;
-	prog->pg_lovers = prog->pg_nvers-1;
 	xdrsize = 0;
 	xdrsize = 0;
-	for (vers=0; vers<prog->pg_nvers ; vers++)
-		if (prog->pg_vers[vers]) {
-			prog->pg_hivers = vers;
-			if (prog->pg_lovers > vers)
-				prog->pg_lovers = vers;
-			if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
-				xdrsize = prog->pg_vers[vers]->vs_xdrsize;
-		}
+	while (prog) {
+		prog->pg_lovers = prog->pg_nvers-1;
+		for (vers=0; vers<prog->pg_nvers ; vers++)
+			if (prog->pg_vers[vers]) {
+				prog->pg_hivers = vers;
+				if (prog->pg_lovers > vers)
+					prog->pg_lovers = vers;
+				if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
+					xdrsize = prog->pg_vers[vers]->vs_xdrsize;
+			}
+		prog = prog->pg_next;
+	}
 	serv->sv_xdrsize   = xdrsize;
 	serv->sv_xdrsize   = xdrsize;
 	INIT_LIST_HEAD(&serv->sv_threads);
 	INIT_LIST_HEAD(&serv->sv_threads);
 	INIT_LIST_HEAD(&serv->sv_sockets);
 	INIT_LIST_HEAD(&serv->sv_sockets);
@@ -56,8 +60,6 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
 	INIT_LIST_HEAD(&serv->sv_permsocks);
 	INIT_LIST_HEAD(&serv->sv_permsocks);
 	spin_lock_init(&serv->sv_lock);
 	spin_lock_init(&serv->sv_lock);
 
 
-	serv->sv_name      = prog->pg_name;
-
 	/* Remove any stale portmap registrations */
 	/* Remove any stale portmap registrations */
 	svc_register(serv, 0, 0);
 	svc_register(serv, 0, 0);
 
 
@@ -281,6 +283,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
 	rqstp->rq_res.len = 0;
 	rqstp->rq_res.len = 0;
 	rqstp->rq_res.page_base = 0;
 	rqstp->rq_res.page_base = 0;
 	rqstp->rq_res.page_len = 0;
 	rqstp->rq_res.page_len = 0;
+	rqstp->rq_res.buflen = PAGE_SIZE;
 	rqstp->rq_res.tail[0].iov_len = 0;
 	rqstp->rq_res.tail[0].iov_len = 0;
 	/* tcp needs a space for the record length... */
 	/* tcp needs a space for the record length... */
 	if (rqstp->rq_prot == IPPROTO_TCP)
 	if (rqstp->rq_prot == IPPROTO_TCP)
@@ -338,7 +341,10 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
 		goto sendit;
 		goto sendit;
 	}
 	}
 		
 		
-	if (prog != progp->pg_prog)
+	for (progp = serv->sv_program; progp; progp = progp->pg_next)
+		if (prog == progp->pg_prog)
+			break;
+	if (progp == NULL)
 		goto err_bad_prog;
 		goto err_bad_prog;
 
 
 	if (vers >= progp->pg_nvers ||
 	if (vers >= progp->pg_nvers ||
@@ -451,11 +457,7 @@ err_bad_auth:
 	goto sendit;
 	goto sendit;
 
 
 err_bad_prog:
 err_bad_prog:
-#ifdef RPC_PARANOIA
-	if (prog != 100227 || progp->pg_prog != 100003)
-		printk("svc: unknown program %d (me %d)\n", prog, progp->pg_prog);
-	/* else it is just a Solaris client seeing if ACLs are supported */
-#endif
+	dprintk("svc: unknown program %d\n", prog);
 	serv->sv_stats->rpcbadfmt++;
 	serv->sv_stats->rpcbadfmt++;
 	svc_putu32(resv, rpc_prog_unavail);
 	svc_putu32(resv, rpc_prog_unavail);
 	goto sendit;
 	goto sendit;

+ 288 - 10
net/sunrpc/xdr.c

@@ -176,21 +176,23 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
 	xdr->buflen += len;
 	xdr->buflen += len;
 }
 }
 
 
-void
+ssize_t
 xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
 xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
 			  skb_reader_t *desc,
 			  skb_reader_t *desc,
 			  skb_read_actor_t copy_actor)
 			  skb_read_actor_t copy_actor)
 {
 {
 	struct page	**ppage = xdr->pages;
 	struct page	**ppage = xdr->pages;
 	unsigned int	len, pglen = xdr->page_len;
 	unsigned int	len, pglen = xdr->page_len;
+	ssize_t		copied = 0;
 	int		ret;
 	int		ret;
 
 
 	len = xdr->head[0].iov_len;
 	len = xdr->head[0].iov_len;
 	if (base < len) {
 	if (base < len) {
 		len -= base;
 		len -= base;
 		ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
 		ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
+		copied += ret;
 		if (ret != len || !desc->count)
 		if (ret != len || !desc->count)
-			return;
+			goto out;
 		base = 0;
 		base = 0;
 	} else
 	} else
 		base -= len;
 		base -= len;
@@ -210,6 +212,17 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
 	do {
 	do {
 		char *kaddr;
 		char *kaddr;
 
 
+		/* ACL likes to be lazy in allocating pages - ACLs
+		 * are small by default but can get huge. */
+		if (unlikely(*ppage == NULL)) {
+			*ppage = alloc_page(GFP_ATOMIC);
+			if (unlikely(*ppage == NULL)) {
+				if (copied == 0)
+					copied = -ENOMEM;
+				goto out;
+			}
+		}
+
 		len = PAGE_CACHE_SIZE;
 		len = PAGE_CACHE_SIZE;
 		kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA);
 		kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA);
 		if (base) {
 		if (base) {
@@ -225,14 +238,17 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
 		}
 		}
 		flush_dcache_page(*ppage);
 		flush_dcache_page(*ppage);
 		kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA);
 		kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA);
+		copied += ret;
 		if (ret != len || !desc->count)
 		if (ret != len || !desc->count)
-			return;
+			goto out;
 		ppage++;
 		ppage++;
 	} while ((pglen -= len) != 0);
 	} while ((pglen -= len) != 0);
 copy_tail:
 copy_tail:
 	len = xdr->tail[0].iov_len;
 	len = xdr->tail[0].iov_len;
 	if (base < len)
 	if (base < len)
-		copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
+		copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
+out:
+	return copied;
 }
 }
 
 
 
 
@@ -616,12 +632,24 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len)
 void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
 void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
 {
 {
 	struct kvec *iov = buf->head;
 	struct kvec *iov = buf->head;
+	int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
 
 
+	BUG_ON(scratch_len < 0);
 	xdr->buf = buf;
 	xdr->buf = buf;
 	xdr->iov = iov;
 	xdr->iov = iov;
-	xdr->end = (uint32_t *)((char *)iov->iov_base + iov->iov_len);
-	buf->len = iov->iov_len = (char *)p - (char *)iov->iov_base;
-	xdr->p = p;
+	xdr->p = (uint32_t *)((char *)iov->iov_base + iov->iov_len);
+	xdr->end = (uint32_t *)((char *)iov->iov_base + scratch_len);
+	BUG_ON(iov->iov_len > scratch_len);
+
+	if (p != xdr->p && p != NULL) {
+		size_t len;
+
+		BUG_ON(p < xdr->p || p > xdr->end);
+		len = (char *)p - (char *)xdr->p;
+		xdr->p = p;
+		buf->len += len;
+		iov->iov_len += len;
+	}
 }
 }
 EXPORT_SYMBOL(xdr_init_encode);
 EXPORT_SYMBOL(xdr_init_encode);
 
 
@@ -859,8 +887,34 @@ out:
 	return status;
 	return status;
 }
 }
 
 
-static int
-read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
+/* obj is assumed to point to allocated memory of size at least len: */
+int
+write_bytes_to_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
+{
+	struct xdr_buf subbuf;
+	int this_len;
+	int status;
+
+	status = xdr_buf_subsegment(buf, &subbuf, base, len);
+	if (status)
+		goto out;
+	this_len = min(len, (int)subbuf.head[0].iov_len);
+	memcpy(subbuf.head[0].iov_base, obj, this_len);
+	len -= this_len;
+	obj += this_len;
+	this_len = min(len, (int)subbuf.page_len);
+	if (this_len)
+		_copy_to_pages(subbuf.pages, subbuf.page_base, obj, this_len);
+	len -= this_len;
+	obj += this_len;
+	this_len = min(len, (int)subbuf.tail[0].iov_len);
+	memcpy(subbuf.tail[0].iov_base, obj, this_len);
+out:
+	return status;
+}
+
+int
+xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
 {
 {
 	u32	raw;
 	u32	raw;
 	int	status;
 	int	status;
@@ -872,6 +926,14 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
 	return 0;
 	return 0;
 }
 }
 
 
+int
+xdr_encode_word(struct xdr_buf *buf, int base, u32 obj)
+{
+	u32	raw = htonl(obj);
+
+	return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj));
+}
+
 /* If the netobj starting offset bytes from the start of xdr_buf is contained
 /* If the netobj starting offset bytes from the start of xdr_buf is contained
  * entirely in the head or the tail, set object to point to it; otherwise
  * entirely in the head or the tail, set object to point to it; otherwise
  * try to find space for it at the end of the tail, copy it there, and
  * try to find space for it at the end of the tail, copy it there, and
@@ -882,7 +944,7 @@ xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset)
 	u32	tail_offset = buf->head[0].iov_len + buf->page_len;
 	u32	tail_offset = buf->head[0].iov_len + buf->page_len;
 	u32	obj_end_offset;
 	u32	obj_end_offset;
 
 
-	if (read_u32_from_xdr_buf(buf, offset, &obj->len))
+	if (xdr_decode_word(buf, offset, &obj->len))
 		goto out;
 		goto out;
 	obj_end_offset = offset + 4 + obj->len;
 	obj_end_offset = offset + 4 + obj->len;
 
 
@@ -915,3 +977,219 @@ xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset)
 out:
 out:
 	return -1;
 	return -1;
 }
 }
+
+/* Returns 0 on success, or else a negative error code. */
+static int
+xdr_xcode_array2(struct xdr_buf *buf, unsigned int base,
+		 struct xdr_array2_desc *desc, int encode)
+{
+	char *elem = NULL, *c;
+	unsigned int copied = 0, todo, avail_here;
+	struct page **ppages = NULL;
+	int err;
+
+	if (encode) {
+		if (xdr_encode_word(buf, base, desc->array_len) != 0)
+			return -EINVAL;
+	} else {
+		if (xdr_decode_word(buf, base, &desc->array_len) != 0 ||
+		    (unsigned long) base + 4 + desc->array_len *
+				    desc->elem_size > buf->len)
+			return -EINVAL;
+	}
+	base += 4;
+
+	if (!desc->xcode)
+		return 0;
+
+	todo = desc->array_len * desc->elem_size;
+
+	/* process head */
+	if (todo && base < buf->head->iov_len) {
+		c = buf->head->iov_base + base;
+		avail_here = min_t(unsigned int, todo,
+				   buf->head->iov_len - base);
+		todo -= avail_here;
+
+		while (avail_here >= desc->elem_size) {
+			err = desc->xcode(desc, c);
+			if (err)
+				goto out;
+			c += desc->elem_size;
+			avail_here -= desc->elem_size;
+		}
+		if (avail_here) {
+			if (!elem) {
+				elem = kmalloc(desc->elem_size, GFP_KERNEL);
+				err = -ENOMEM;
+				if (!elem)
+					goto out;
+			}
+			if (encode) {
+				err = desc->xcode(desc, elem);
+				if (err)
+					goto out;
+				memcpy(c, elem, avail_here);
+			} else
+				memcpy(elem, c, avail_here);
+			copied = avail_here;
+		}
+		base = buf->head->iov_len;  /* align to start of pages */
+	}
+
+	/* process pages array */
+	base -= buf->head->iov_len;
+	if (todo && base < buf->page_len) {
+		unsigned int avail_page;
+
+		avail_here = min(todo, buf->page_len - base);
+		todo -= avail_here;
+
+		base += buf->page_base;
+		ppages = buf->pages + (base >> PAGE_CACHE_SHIFT);
+		base &= ~PAGE_CACHE_MASK;
+		avail_page = min_t(unsigned int, PAGE_CACHE_SIZE - base,
+					avail_here);
+		c = kmap(*ppages) + base;
+
+		while (avail_here) {
+			avail_here -= avail_page;
+			if (copied || avail_page < desc->elem_size) {
+				unsigned int l = min(avail_page,
+					desc->elem_size - copied);
+				if (!elem) {
+					elem = kmalloc(desc->elem_size,
+						       GFP_KERNEL);
+					err = -ENOMEM;
+					if (!elem)
+						goto out;
+				}
+				if (encode) {
+					if (!copied) {
+						err = desc->xcode(desc, elem);
+						if (err)
+							goto out;
+					}
+					memcpy(c, elem + copied, l);
+					copied += l;
+					if (copied == desc->elem_size)
+						copied = 0;
+				} else {
+					memcpy(elem + copied, c, l);
+					copied += l;
+					if (copied == desc->elem_size) {
+						err = desc->xcode(desc, elem);
+						if (err)
+							goto out;
+						copied = 0;
+					}
+				}
+				avail_page -= l;
+				c += l;
+			}
+			while (avail_page >= desc->elem_size) {
+				err = desc->xcode(desc, c);
+				if (err)
+					goto out;
+				c += desc->elem_size;
+				avail_page -= desc->elem_size;
+			}
+			if (avail_page) {
+				unsigned int l = min(avail_page,
+					    desc->elem_size - copied);
+				if (!elem) {
+					elem = kmalloc(desc->elem_size,
+						       GFP_KERNEL);
+					err = -ENOMEM;
+					if (!elem)
+						goto out;
+				}
+				if (encode) {
+					if (!copied) {
+						err = desc->xcode(desc, elem);
+						if (err)
+							goto out;
+					}
+					memcpy(c, elem + copied, l);
+					copied += l;
+					if (copied == desc->elem_size)
+						copied = 0;
+				} else {
+					memcpy(elem + copied, c, l);
+					copied += l;
+					if (copied == desc->elem_size) {
+						err = desc->xcode(desc, elem);
+						if (err)
+							goto out;
+						copied = 0;
+					}
+				}
+			}
+			if (avail_here) {
+				kunmap(*ppages);
+				ppages++;
+				c = kmap(*ppages);
+			}
+
+			avail_page = min(avail_here,
+				 (unsigned int) PAGE_CACHE_SIZE);
+		}
+		base = buf->page_len;  /* align to start of tail */
+	}
+
+	/* process tail */
+	base -= buf->page_len;
+	if (todo) {
+		c = buf->tail->iov_base + base;
+		if (copied) {
+			unsigned int l = desc->elem_size - copied;
+
+			if (encode)
+				memcpy(c, elem + copied, l);
+			else {
+				memcpy(elem + copied, c, l);
+				err = desc->xcode(desc, elem);
+				if (err)
+					goto out;
+			}
+			todo -= l;
+			c += l;
+		}
+		while (todo) {
+			err = desc->xcode(desc, c);
+			if (err)
+				goto out;
+			c += desc->elem_size;
+			todo -= desc->elem_size;
+		}
+	}
+	err = 0;
+
+out:
+	if (elem)
+		kfree(elem);
+	if (ppages)
+		kunmap(*ppages);
+	return err;
+}
+
+int
+xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
+		  struct xdr_array2_desc *desc)
+{
+	if (base >= buf->len)
+		return -EINVAL;
+
+	return xdr_xcode_array2(buf, base, desc, 0);
+}
+
+int
+xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
+		  struct xdr_array2_desc *desc)
+{
+	if ((unsigned long) base + 4 + desc->array_len * desc->elem_size >
+	    buf->head->iov_len + buf->page_len + buf->tail->iov_len)
+		return -EINVAL;
+
+	return xdr_xcode_array2(buf, base, desc, 1);
+}

+ 57 - 14
net/sunrpc/xprt.c

@@ -569,8 +569,11 @@ void xprt_connect(struct rpc_task *task)
 		if (xprt->sock != NULL)
 		if (xprt->sock != NULL)
 			schedule_delayed_work(&xprt->sock_connect,
 			schedule_delayed_work(&xprt->sock_connect,
 					RPC_REESTABLISH_TIMEOUT);
 					RPC_REESTABLISH_TIMEOUT);
-		else
+		else {
 			schedule_work(&xprt->sock_connect);
 			schedule_work(&xprt->sock_connect);
+			if (!RPC_IS_ASYNC(task))
+				flush_scheduled_work();
+		}
 	}
 	}
 	return;
 	return;
  out_write:
  out_write:
@@ -725,7 +728,8 @@ csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 		goto no_checksum;
 		goto no_checksum;
 
 
 	desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
 	desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
-	xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits);
+	if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0)
+		return -1;
 	if (desc.offset != skb->len) {
 	if (desc.offset != skb->len) {
 		unsigned int csum2;
 		unsigned int csum2;
 		csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
 		csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
@@ -737,7 +741,8 @@ csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 		return -1;
 		return -1;
 	return 0;
 	return 0;
 no_checksum:
 no_checksum:
-	xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits);
+	if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0)
+		return -1;
 	if (desc.count)
 	if (desc.count)
 		return -1;
 		return -1;
 	return 0;
 	return 0;
@@ -821,10 +826,15 @@ tcp_copy_data(skb_reader_t *desc, void *p, size_t len)
 {
 {
 	if (len > desc->count)
 	if (len > desc->count)
 		len = desc->count;
 		len = desc->count;
-	if (skb_copy_bits(desc->skb, desc->offset, p, len))
+	if (skb_copy_bits(desc->skb, desc->offset, p, len)) {
+		dprintk("RPC:      failed to copy %zu bytes from skb. %zu bytes remain\n",
+				len, desc->count);
 		return 0;
 		return 0;
+	}
 	desc->offset += len;
 	desc->offset += len;
 	desc->count -= len;
 	desc->count -= len;
+	dprintk("RPC:      copied %zu bytes from skb. %zu bytes remain\n",
+			len, desc->count);
 	return len;
 	return len;
 }
 }
 
 
@@ -863,6 +873,8 @@ tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
 static void
 static void
 tcp_check_recm(struct rpc_xprt *xprt)
 tcp_check_recm(struct rpc_xprt *xprt)
 {
 {
+	dprintk("RPC:      xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n",
+			xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags);
 	if (xprt->tcp_offset == xprt->tcp_reclen) {
 	if (xprt->tcp_offset == xprt->tcp_reclen) {
 		xprt->tcp_flags |= XPRT_COPY_RECM;
 		xprt->tcp_flags |= XPRT_COPY_RECM;
 		xprt->tcp_offset = 0;
 		xprt->tcp_offset = 0;
@@ -907,6 +919,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
 	struct rpc_rqst *req;
 	struct rpc_rqst *req;
 	struct xdr_buf *rcvbuf;
 	struct xdr_buf *rcvbuf;
 	size_t len;
 	size_t len;
+	ssize_t r;
 
 
 	/* Find and lock the request corresponding to this xid */
 	/* Find and lock the request corresponding to this xid */
 	spin_lock(&xprt->sock_lock);
 	spin_lock(&xprt->sock_lock);
@@ -927,15 +940,40 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
 		len = xprt->tcp_reclen - xprt->tcp_offset;
 		len = xprt->tcp_reclen - xprt->tcp_offset;
 		memcpy(&my_desc, desc, sizeof(my_desc));
 		memcpy(&my_desc, desc, sizeof(my_desc));
 		my_desc.count = len;
 		my_desc.count = len;
-		xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
+		r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
 					  &my_desc, tcp_copy_data);
 					  &my_desc, tcp_copy_data);
-		desc->count -= len;
-		desc->offset += len;
+		desc->count -= r;
+		desc->offset += r;
 	} else
 	} else
-		xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
+		r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
 					  desc, tcp_copy_data);
 					  desc, tcp_copy_data);
-	xprt->tcp_copied += len;
-	xprt->tcp_offset += len;
+
+	if (r > 0) {
+		xprt->tcp_copied += r;
+		xprt->tcp_offset += r;
+	}
+	if (r != len) {
+		/* Error when copying to the receive buffer,
+		 * usually because we weren't able to allocate
+		 * additional buffer pages. All we can do now
+		 * is turn off XPRT_COPY_DATA, so the request
+		 * will not receive any additional updates,
+		 * and time out.
+		 * Any remaining data from this record will
+		 * be discarded.
+		 */
+		xprt->tcp_flags &= ~XPRT_COPY_DATA;
+		dprintk("RPC:      XID %08x truncated request\n",
+				ntohl(xprt->tcp_xid));
+		dprintk("RPC:      xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
+				xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
+		goto out;
+	}
+
+	dprintk("RPC:      XID %08x read %u bytes\n",
+			ntohl(xprt->tcp_xid), r);
+	dprintk("RPC:      xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
+			xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
 
 
 	if (xprt->tcp_copied == req->rq_private_buf.buflen)
 	if (xprt->tcp_copied == req->rq_private_buf.buflen)
 		xprt->tcp_flags &= ~XPRT_COPY_DATA;
 		xprt->tcp_flags &= ~XPRT_COPY_DATA;
@@ -944,6 +982,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
 			xprt->tcp_flags &= ~XPRT_COPY_DATA;
 			xprt->tcp_flags &= ~XPRT_COPY_DATA;
 	}
 	}
 
 
+out:
 	if (!(xprt->tcp_flags & XPRT_COPY_DATA)) {
 	if (!(xprt->tcp_flags & XPRT_COPY_DATA)) {
 		dprintk("RPC: %4d received reply complete\n",
 		dprintk("RPC: %4d received reply complete\n",
 				req->rq_task->tk_pid);
 				req->rq_task->tk_pid);
@@ -967,6 +1006,7 @@ tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc)
 	desc->count -= len;
 	desc->count -= len;
 	desc->offset += len;
 	desc->offset += len;
 	xprt->tcp_offset += len;
 	xprt->tcp_offset += len;
+	dprintk("RPC:      discarded %u bytes\n", len);
 	tcp_check_recm(xprt);
 	tcp_check_recm(xprt);
 }
 }
 
 
@@ -1064,8 +1104,7 @@ tcp_state_change(struct sock *sk)
 	case TCP_SYN_RECV:
 	case TCP_SYN_RECV:
 		break;
 		break;
 	default:
 	default:
-		if (xprt_test_and_clear_connected(xprt))
-			rpc_wake_up_status(&xprt->pending, -ENOTCONN);
+		xprt_disconnect(xprt);
 		break;
 		break;
 	}
 	}
  out:
  out:
@@ -1203,6 +1242,8 @@ xprt_transmit(struct rpc_task *task)
 			list_add_tail(&req->rq_list, &xprt->recv);
 			list_add_tail(&req->rq_list, &xprt->recv);
 			spin_unlock_bh(&xprt->sock_lock);
 			spin_unlock_bh(&xprt->sock_lock);
 			xprt_reset_majortimeo(req);
 			xprt_reset_majortimeo(req);
+			/* Turn off autodisconnect */
+			del_singleshot_timer_sync(&xprt->timer);
 		}
 		}
 	} else if (!req->rq_bytes_sent)
 	} else if (!req->rq_bytes_sent)
 		return;
 		return;
@@ -1333,8 +1374,6 @@ xprt_reserve(struct rpc_task *task)
 		spin_lock(&xprt->xprt_lock);
 		spin_lock(&xprt->xprt_lock);
 		do_xprt_reserve(task);
 		do_xprt_reserve(task);
 		spin_unlock(&xprt->xprt_lock);
 		spin_unlock(&xprt->xprt_lock);
-		if (task->tk_rqstp)
-			del_timer_sync(&xprt->timer);
 	}
 	}
 }
 }
 
 
@@ -1649,6 +1688,10 @@ xprt_shutdown(struct rpc_xprt *xprt)
 	rpc_wake_up(&xprt->backlog);
 	rpc_wake_up(&xprt->backlog);
 	wake_up(&xprt->cong_wait);
 	wake_up(&xprt->cong_wait);
 	del_timer_sync(&xprt->timer);
 	del_timer_sync(&xprt->timer);
+
+	/* synchronously wait for connect worker to finish */
+	cancel_delayed_work(&xprt->sock_connect);
+	flush_scheduled_work();
 }
 }
 
 
 /*
 /*