14 năm trước cách đây · 2017bd1945
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1527,6 +1527,8 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 
				 S:	Supported
			
 
				 F:	Documentation/filesystems/ceph.txt
			
 
				 F:	fs/ceph
			
 
				+F:	net/ceph
			
 
				+F:	include/linux/ceph
			
 
				 
			
 
				 CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
			
 
				 M:	David Vrabel <david.vrabel@csr.com>
			
@@ -4805,6 +4807,15 @@ F:	fs/qnx4/
 
				 F:	include/linux/qnx4_fs.h
			
 
				 F:	include/linux/qnxtypes.h
			
 
				 
			
 
				+RADOS BLOCK DEVICE (RBD)
			
 
				+F:	include/linux/qnxtypes.h
			
 
				+M:	Yehuda Sadeh <yehuda@hq.newdream.net>
			
 
				+M:	Sage Weil <sage@newdream.net>
			
 
				+M:	ceph-devel@vger.kernel.org
			
 
				+S:	Supported
			
 
				+F:	drivers/block/rbd.c
			
 
				+F:	drivers/block/rbd_types.h
			
 
				+
			
 
				 RADEON FRAMEBUFFER DISPLAY DRIVER
			
 
				 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
			
 
				 L:	linux-fbdev@vger.kernel.org
			
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -488,4 +488,21 @@ config BLK_DEV_HD
 
				 
			
 
				 	  If unsure, say N.
			
 
				 
			
 
				+config BLK_DEV_RBD
			
 
				+	tristate "Rados block device (RBD)"
			
 
				+	depends on INET && EXPERIMENTAL && BLOCK
			
 
				+	select CEPH_LIB
			
 
				+	select LIBCRC32C
			
 
				+	select CRYPTO_AES
			
 
				+	select CRYPTO
			
 
				+	default n
			
 
				+	help
			
 
				+	  Say Y here if you want include the Rados block device, which stripes
			
 
				+	  a block device over objects stored in the Ceph distributed object
			
 
				+	  store.
			
 
				+
			
 
				+	  More information at http://ceph.newdream.net/.
			
 
				+
			
 
				+	  If unsure, say N.
			
 
				+
			
 
				 endif # BLK_DEV
			
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -37,5 +37,6 @@ obj-$(CONFIG_BLK_DEV_HD)	+= hd.o
 
				 
			
 
				 obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
			
 
				 obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/
			
 
				+obj-$(CONFIG_BLK_DEV_RBD)     += rbd.o
			
 
				 
			
 
				 swim_mod-objs	:= swim.o swim_asm.o
			
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -0,0 +1,1841 @@
 
				+/*
			
 
				+   rbd.c -- Export ceph rados objects as a Linux block device
			
 
				+
			
 
				+
			
 
				+   based on drivers/block/osdblk.c:
			
 
				+
			
 
				+   Copyright 2009 Red Hat, Inc.
			
 
				+
			
 
				+   This program is free software; you can redistribute it and/or modify
			
 
				+   it under the terms of the GNU General Public License as published by
			
 
				+   the Free Software Foundation.
			
 
				+
			
 
				+   This program is distributed in the hope that it will be useful,
			
 
				+   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+   GNU General Public License for more details.
			
 
				+
			
 
				+   You should have received a copy of the GNU General Public License
			
 
				+   along with this program; see the file COPYING.  If not, write to
			
 
				+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+
			
 
				+
			
 
				+
			
 
				+   Instructions for use
			
 
				+   --------------------
			
 
				+
			
 
				+   1) Map a Linux block device to an existing rbd image.
			
 
				+
			
 
				+      Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name]
			
 
				+
			
 
				+      $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add
			
 
				+
			
 
				+      The snapshot name can be "-" or omitted to map the image read/write.
			
 
				+
			
 
				+   2) List all active blkdev<->object mappings.
			
 
				+
			
 
				+      In this example, we have performed step #1 twice, creating two blkdevs,
			
 
				+      mapped to two separate rados objects in the rados rbd pool
			
 
				+
			
 
				+      $ cat /sys/class/rbd/list
			
 
				+      #id     major   client_name     pool    name    snap    KB
			
 
				+      0       254     client4143      rbd     foo     -      1024000
			
 
				+
			
 
				+      The columns, in order, are:
			
 
				+      - blkdev unique id
			
 
				+      - blkdev assigned major
			
 
				+      - rados client id
			
 
				+      - rados pool name
			
 
				+      - rados block device name
			
 
				+      - mapped snapshot ("-" if none)
			
 
				+      - device size in KB
			
 
				+
			
 
				+
			
 
				+   3) Create a snapshot.
			
 
				+
			
 
				+      Usage: <blkdev id> <snapname>
			
 
				+
			
 
				+      $ echo "0 mysnap" > /sys/class/rbd/snap_create
			
 
				+
			
 
				+
			
 
				+   4) Listing a snapshot.
			
 
				+
			
 
				+      $ cat /sys/class/rbd/snaps_list
			
 
				+      #id     snap    KB
			
 
				+      0       -       1024000 (*)
			
 
				+      0       foo     1024000
			
 
				+
			
 
				+      The columns, in order, are:
			
 
				+      - blkdev unique id
			
 
				+      - snapshot name, '-' means none (active read/write version)
			
 
				+      - size of device at time of snapshot
			
 
				+      - the (*) indicates this is the active version
			
 
				+
			
 
				+   5) Rollback to snapshot.
			
 
				+
			
 
				+      Usage: <blkdev id> <snapname>
			
 
				+
			
 
				+      $ echo "0 mysnap" > /sys/class/rbd/snap_rollback
			
 
				+
			
 
				+
			
 
				+   6) Mapping an image using snapshot.
			
 
				+
			
 
				+      A snapshot mapping is read-only. This is being done by passing
			
 
				+      snap=<snapname> to the options when adding a device.
			
 
				+
			
 
				+      $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add
			
 
				+
			
 
				+
			
 
				+   7) Remove an active blkdev<->rbd image mapping.
			
 
				+
			
 
				+      In this example, we remove the mapping with blkdev unique id 1.
			
 
				+
			
 
				+      $ echo 1 > /sys/class/rbd/remove
			
 
				+
			
 
				+
			
 
				+   NOTE:  The actual creation and deletion of rados objects is outside the scope
			
 
				+   of this driver.
			
 
				+
			
 
				+ */
			
 
				+
			
 
				+#include <linux/ceph/libceph.h>
			
 
				+#include <linux/ceph/osd_client.h>
			
 
				+#include <linux/ceph/mon_client.h>
			
 
				+#include <linux/ceph/decode.h>
			
 
				+
			
 
				+#include <linux/kernel.h>
			
 
				+#include <linux/device.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/blkdev.h>
			
 
				+
			
 
				+#include "rbd_types.h"
			
 
				+
			
 
				+#define DRV_NAME "rbd"
			
 
				+#define DRV_NAME_LONG "rbd (rados block device)"
			
 
				+
			
 
				+#define RBD_MINORS_PER_MAJOR	256		/* max minors per blkdev */
			
 
				+
			
 
				+#define RBD_MAX_MD_NAME_LEN	(96 + sizeof(RBD_SUFFIX))
			
 
				+#define RBD_MAX_POOL_NAME_LEN	64
			
 
				+#define RBD_MAX_SNAP_NAME_LEN	32
			
 
				+#define RBD_MAX_OPT_LEN		1024
			
 
				+
			
 
				+#define RBD_SNAP_HEAD_NAME	"-"
			
 
				+
			
 
				+#define DEV_NAME_LEN		32
			
 
				+
			
 
				+/*
			
 
				+ * block device image metadata (in-memory version)
			
 
				+ */
			
 
				+struct rbd_image_header {
			
 
				+	u64 image_size;
			
 
				+	char block_name[32];
			
 
				+	__u8 obj_order;
			
 
				+	__u8 crypt_type;
			
 
				+	__u8 comp_type;
			
 
				+	struct rw_semaphore snap_rwsem;
			
 
				+	struct ceph_snap_context *snapc;
			
 
				+	size_t snap_names_len;
			
 
				+	u64 snap_seq;
			
 
				+	u32 total_snaps;
			
 
				+
			
 
				+	char *snap_names;
			
 
				+	u64 *snap_sizes;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * an instance of the client.  multiple devices may share a client.
			
 
				+ */
			
 
				+struct rbd_client {
			
 
				+	struct ceph_client	*client;
			
 
				+	struct kref		kref;
			
 
				+	struct list_head	node;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * a single io request
			
 
				+ */
			
 
				+struct rbd_request {
			
 
				+	struct request		*rq;		/* blk layer request */
			
 
				+	struct bio		*bio;		/* cloned bio */
			
 
				+	struct page		**pages;	/* list of used pages */
			
 
				+	u64			len;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * a single device
			
 
				+ */
			
 
				+struct rbd_device {
			
 
				+	int			id;		/* blkdev unique id */
			
 
				+
			
 
				+	int			major;		/* blkdev assigned major */
			
 
				+	struct gendisk		*disk;		/* blkdev's gendisk and rq */
			
 
				+	struct request_queue	*q;
			
 
				+
			
 
				+	struct ceph_client	*client;
			
 
				+	struct rbd_client	*rbd_client;
			
 
				+
			
 
				+	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
			
 
				+
			
 
				+	spinlock_t		lock;		/* queue lock */
			
 
				+
			
 
				+	struct rbd_image_header	header;
			
 
				+	char			obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
			
 
				+	int			obj_len;
			
 
				+	char			obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
			
 
				+	char			pool_name[RBD_MAX_POOL_NAME_LEN];
			
 
				+	int			poolid;
			
 
				+
			
 
				+	char                    snap_name[RBD_MAX_SNAP_NAME_LEN];
			
 
				+	u32 cur_snap;	/* index+1 of current snapshot within snap context
			
 
				+			   0 - for the head */
			
 
				+	int read_only;
			
 
				+
			
 
				+	struct list_head	node;
			
 
				+};
			
 
				+
			
 
				+static spinlock_t node_lock;      /* protects client get/put */
			
 
				+
			
 
				+static struct class *class_rbd;	  /* /sys/class/rbd */
			
 
				+static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */
			
 
				+static LIST_HEAD(rbd_dev_list);    /* devices */
			
 
				+static LIST_HEAD(rbd_client_list);      /* clients */
			
 
				+
			
 
				+
			
 
				+static int rbd_open(struct block_device *bdev, fmode_t mode)
			
 
				+{
			
 
				+	struct gendisk *disk = bdev->bd_disk;
			
 
				+	struct rbd_device *rbd_dev = disk->private_data;
			
 
				+
			
 
				+	set_device_ro(bdev, rbd_dev->read_only);
			
 
				+
			
 
				+	if ((mode & FMODE_WRITE) && rbd_dev->read_only)
			
 
				+		return -EROFS;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static const struct block_device_operations rbd_bd_ops = {
			
 
				+	.owner			= THIS_MODULE,
			
 
				+	.open			= rbd_open,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Initialize an rbd client instance.
			
 
				+ * We own *opt.
			
 
				+ */
			
 
				+static struct rbd_client *rbd_client_create(struct ceph_options *opt)
			
 
				+{
			
 
				+	struct rbd_client *rbdc;
			
 
				+	int ret = -ENOMEM;
			
 
				+
			
 
				+	dout("rbd_client_create\n");
			
 
				+	rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
			
 
				+	if (!rbdc)
			
 
				+		goto out_opt;
			
 
				+
			
 
				+	kref_init(&rbdc->kref);
			
 
				+	INIT_LIST_HEAD(&rbdc->node);
			
 
				+
			
 
				+	rbdc->client = ceph_create_client(opt, rbdc);
			
 
				+	if (IS_ERR(rbdc->client))
			
 
				+		goto out_rbdc;
			
 
				+	opt = NULL; /* Now rbdc->client is responsible for opt */
			
 
				+
			
 
				+	ret = ceph_open_session(rbdc->client);
			
 
				+	if (ret < 0)
			
 
				+		goto out_err;
			
 
				+
			
 
				+	spin_lock(&node_lock);
			
 
				+	list_add_tail(&rbdc->node, &rbd_client_list);
			
 
				+	spin_unlock(&node_lock);
			
 
				+
			
 
				+	dout("rbd_client_create created %p\n", rbdc);
			
 
				+	return rbdc;
			
 
				+
			
 
				+out_err:
			
 
				+	ceph_destroy_client(rbdc->client);
			
 
				+out_rbdc:
			
 
				+	kfree(rbdc);
			
 
				+out_opt:
			
 
				+	if (opt)
			
 
				+		ceph_destroy_options(opt);
			
 
				+	return ERR_PTR(ret);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Find a ceph client with specific addr and configuration.
			
 
				+ */
			
 
				+static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
			
 
				+{
			
 
				+	struct rbd_client *client_node;
			
 
				+
			
 
				+	if (opt->flags & CEPH_OPT_NOSHARE)
			
 
				+		return NULL;
			
 
				+
			
 
				+	list_for_each_entry(client_node, &rbd_client_list, node)
			
 
				+		if (ceph_compare_options(opt, client_node->client) == 0)
			
 
				+			return client_node;
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Get a ceph client with specific addr and configuration, if one does
			
 
				+ * not exist create it.
			
 
				+ */
			
 
				+static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
			
 
				+			  char *options)
			
 
				+{
			
 
				+	struct rbd_client *rbdc;
			
 
				+	struct ceph_options *opt;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = ceph_parse_options(&opt, options, mon_addr,
			
 
				+				 mon_addr + strlen(mon_addr), NULL, NULL);
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	spin_lock(&node_lock);
			
 
				+	rbdc = __rbd_client_find(opt);
			
 
				+	if (rbdc) {
			
 
				+		ceph_destroy_options(opt);
			
 
				+
			
 
				+		/* using an existing client */
			
 
				+		kref_get(&rbdc->kref);
			
 
				+		rbd_dev->rbd_client = rbdc;
			
 
				+		rbd_dev->client = rbdc->client;
			
 
				+		spin_unlock(&node_lock);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	spin_unlock(&node_lock);
			
 
				+
			
 
				+	rbdc = rbd_client_create(opt);
			
 
				+	if (IS_ERR(rbdc))
			
 
				+		return PTR_ERR(rbdc);
			
 
				+
			
 
				+	rbd_dev->rbd_client = rbdc;
			
 
				+	rbd_dev->client = rbdc->client;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Destroy ceph client
			
 
				+ */
			
 
				+static void rbd_client_release(struct kref *kref)
			
 
				+{
			
 
				+	struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
			
 
				+
			
 
				+	dout("rbd_release_client %p\n", rbdc);
			
 
				+	spin_lock(&node_lock);
			
 
				+	list_del(&rbdc->node);
			
 
				+	spin_unlock(&node_lock);
			
 
				+
			
 
				+	ceph_destroy_client(rbdc->client);
			
 
				+	kfree(rbdc);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Drop reference to ceph client node. If it's not referenced anymore, release
			
 
				+ * it.
			
 
				+ */
			
 
				+static void rbd_put_client(struct rbd_device *rbd_dev)
			
 
				+{
			
 
				+	kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
			
 
				+	rbd_dev->rbd_client = NULL;
			
 
				+	rbd_dev->client = NULL;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Create a new header structure, translate header format from the on-disk
			
 
				+ * header.
			
 
				+ */
			
 
				+static int rbd_header_from_disk(struct rbd_image_header *header,
			
 
				+				 struct rbd_image_header_ondisk *ondisk,
			
 
				+				 int allocated_snaps,
			
 
				+				 gfp_t gfp_flags)
			
 
				+{
			
 
				+	int i;
			
 
				+	u32 snap_count = le32_to_cpu(ondisk->snap_count);
			
 
				+	int ret = -ENOMEM;
			
 
				+
			
 
				+	init_rwsem(&header->snap_rwsem);
			
 
				+
			
 
				+	header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
			
 
				+	header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
			
 
				+				snap_count *
			
 
				+				 sizeof(struct rbd_image_snap_ondisk),
			
 
				+				gfp_flags);
			
 
				+	if (!header->snapc)
			
 
				+		return -ENOMEM;
			
 
				+	if (snap_count) {
			
 
				+		header->snap_names = kmalloc(header->snap_names_len,
			
 
				+					     GFP_KERNEL);
			
 
				+		if (!header->snap_names)
			
 
				+			goto err_snapc;
			
 
				+		header->snap_sizes = kmalloc(snap_count * sizeof(u64),
			
 
				+					     GFP_KERNEL);
			
 
				+		if (!header->snap_sizes)
			
 
				+			goto err_names;
			
 
				+	} else {
			
 
				+		header->snap_names = NULL;
			
 
				+		header->snap_sizes = NULL;
			
 
				+	}
			
 
				+	memcpy(header->block_name, ondisk->block_name,
			
 
				+	       sizeof(ondisk->block_name));
			
 
				+
			
 
				+	header->image_size = le64_to_cpu(ondisk->image_size);
			
 
				+	header->obj_order = ondisk->options.order;
			
 
				+	header->crypt_type = ondisk->options.crypt_type;
			
 
				+	header->comp_type = ondisk->options.comp_type;
			
 
				+
			
 
				+	atomic_set(&header->snapc->nref, 1);
			
 
				+	header->snap_seq = le64_to_cpu(ondisk->snap_seq);
			
 
				+	header->snapc->num_snaps = snap_count;
			
 
				+	header->total_snaps = snap_count;
			
 
				+
			
 
				+	if (snap_count &&
			
 
				+	    allocated_snaps == snap_count) {
			
 
				+		for (i = 0; i < snap_count; i++) {
			
 
				+			header->snapc->snaps[i] =
			
 
				+				le64_to_cpu(ondisk->snaps[i].id);
			
 
				+			header->snap_sizes[i] =
			
 
				+				le64_to_cpu(ondisk->snaps[i].image_size);
			
 
				+		}
			
 
				+
			
 
				+		/* copy snapshot names */
			
 
				+		memcpy(header->snap_names, &ondisk->snaps[i],
			
 
				+			header->snap_names_len);
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+err_names:
			
 
				+	kfree(header->snap_names);
			
 
				+err_snapc:
			
 
				+	kfree(header->snapc);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static int snap_index(struct rbd_image_header *header, int snap_num)
			
 
				+{
			
 
				+	return header->total_snaps - snap_num;
			
 
				+}
			
 
				+
			
 
				+static u64 cur_snap_id(struct rbd_device *rbd_dev)
			
 
				+{
			
 
				+	struct rbd_image_header *header = &rbd_dev->header;
			
 
				+
			
 
				+	if (!rbd_dev->cur_snap)
			
 
				+		return 0;
			
 
				+
			
 
				+	return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
			
 
				+}
			
 
				+
			
 
				+static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
			
 
				+			u64 *seq, u64 *size)
			
 
				+{
			
 
				+	int i;
			
 
				+	char *p = header->snap_names;
			
 
				+
			
 
				+	for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
			
 
				+		if (strcmp(snap_name, p) == 0)
			
 
				+			break;
			
 
				+	}
			
 
				+	if (i == header->total_snaps)
			
 
				+		return -ENOENT;
			
 
				+	if (seq)
			
 
				+		*seq = header->snapc->snaps[i];
			
 
				+
			
 
				+	if (size)
			
 
				+		*size = header->snap_sizes[i];
			
 
				+
			
 
				+	return i;
			
 
				+}
			
 
				+
			
 
				+static int rbd_header_set_snap(struct rbd_device *dev,
			
 
				+			       const char *snap_name,
			
 
				+			       u64 *size)
			
 
				+{
			
 
				+	struct rbd_image_header *header = &dev->header;
			
 
				+	struct ceph_snap_context *snapc = header->snapc;
			
 
				+	int ret = -ENOENT;
			
 
				+
			
 
				+	down_write(&header->snap_rwsem);
			
 
				+
			
 
				+	if (!snap_name ||
			
 
				+	    !*snap_name ||
			
 
				+	    strcmp(snap_name, "-") == 0 ||
			
 
				+	    strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) {
			
 
				+		if (header->total_snaps)
			
 
				+			snapc->seq = header->snap_seq;
			
 
				+		else
			
 
				+			snapc->seq = 0;
			
 
				+		dev->cur_snap = 0;
			
 
				+		dev->read_only = 0;
			
 
				+		if (size)
			
 
				+			*size = header->image_size;
			
 
				+	} else {
			
 
				+		ret = snap_by_name(header, snap_name, &snapc->seq, size);
			
 
				+		if (ret < 0)
			
 
				+			goto done;
			
 
				+
			
 
				+		dev->cur_snap = header->total_snaps - ret;
			
 
				+		dev->read_only = 1;
			
 
				+	}
			
 
				+
			
 
				+	ret = 0;
			
 
				+done:
			
 
				+	up_write(&header->snap_rwsem);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void rbd_header_free(struct rbd_image_header *header)
			
 
				+{
			
 
				+	kfree(header->snapc);
			
 
				+	kfree(header->snap_names);
			
 
				+	kfree(header->snap_sizes);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * get the actual striped segment name, offset and length
			
 
				+ */
			
 
				+static u64 rbd_get_segment(struct rbd_image_header *header,
			
 
				+			   const char *block_name,
			
 
				+			   u64 ofs, u64 len,
			
 
				+			   char *seg_name, u64 *segofs)
			
 
				+{
			
 
				+	u64 seg = ofs >> header->obj_order;
			
 
				+
			
 
				+	if (seg_name)
			
 
				+		snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
			
 
				+			 "%s.%012llx", block_name, seg);
			
 
				+
			
 
				+	ofs = ofs & ((1 << header->obj_order) - 1);
			
 
				+	len = min_t(u64, len, (1 << header->obj_order) - ofs);
			
 
				+
			
 
				+	if (segofs)
			
 
				+		*segofs = ofs;
			
 
				+
			
 
				+	return len;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * bio helpers
			
 
				+ */
			
 
				+
			
 
				+static void bio_chain_put(struct bio *chain)
			
 
				+{
			
 
				+	struct bio *tmp;
			
 
				+
			
 
				+	while (chain) {
			
 
				+		tmp = chain;
			
 
				+		chain = chain->bi_next;
			
 
				+		bio_put(tmp);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * zeros a bio chain, starting at specific offset
			
 
				+ */
			
 
				+static void zero_bio_chain(struct bio *chain, int start_ofs)
			
 
				+{
			
 
				+	struct bio_vec *bv;
			
 
				+	unsigned long flags;
			
 
				+	void *buf;
			
 
				+	int i;
			
 
				+	int pos = 0;
			
 
				+
			
 
				+	while (chain) {
			
 
				+		bio_for_each_segment(bv, chain, i) {
			
 
				+			if (pos + bv->bv_len > start_ofs) {
			
 
				+				int remainder = max(start_ofs - pos, 0);
			
 
				+				buf = bvec_kmap_irq(bv, &flags);
			
 
				+				memset(buf + remainder, 0,
			
 
				+				       bv->bv_len - remainder);
			
 
				+				bvec_kunmap_irq(buf, &flags);
			
 
				+			}
			
 
				+			pos += bv->bv_len;
			
 
				+		}
			
 
				+
			
 
				+		chain = chain->bi_next;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * bio_chain_clone - clone a chain of bios up to a certain length.
			
 
				+ * might return a bio_pair that will need to be released.
			
 
				+ */
			
 
				+static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
			
 
				+				   struct bio_pair **bp,
			
 
				+				   int len, gfp_t gfpmask)
			
 
				+{
			
 
				+	struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL;
			
 
				+	int total = 0;
			
 
				+
			
 
				+	if (*bp) {
			
 
				+		bio_pair_release(*bp);
			
 
				+		*bp = NULL;
			
 
				+	}
			
 
				+
			
 
				+	while (old_chain && (total < len)) {
			
 
				+		tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
			
 
				+		if (!tmp)
			
 
				+			goto err_out;
			
 
				+
			
 
				+		if (total + old_chain->bi_size > len) {
			
 
				+			struct bio_pair *bp;
			
 
				+
			
 
				+			/*
			
 
				+			 * this split can only happen with a single paged bio,
			
 
				+			 * split_bio will BUG_ON if this is not the case
			
 
				+			 */
			
 
				+			dout("bio_chain_clone split! total=%d remaining=%d"
			
 
				+			     "bi_size=%d\n",
			
 
				+			     (int)total, (int)len-total,
			
 
				+			     (int)old_chain->bi_size);
			
 
				+
			
 
				+			/* split the bio. We'll release it either in the next
			
 
				+			   call, or it will have to be released outside */
			
 
				+			bp = bio_split(old_chain, (len - total) / 512ULL);
			
 
				+			if (!bp)
			
 
				+				goto err_out;
			
 
				+
			
 
				+			__bio_clone(tmp, &bp->bio1);
			
 
				+
			
 
				+			*next = &bp->bio2;
			
 
				+		} else {
			
 
				+			__bio_clone(tmp, old_chain);
			
 
				+			*next = old_chain->bi_next;
			
 
				+		}
			
 
				+
			
 
				+		tmp->bi_bdev = NULL;
			
 
				+		gfpmask &= ~__GFP_WAIT;
			
 
				+		tmp->bi_next = NULL;
			
 
				+
			
 
				+		if (!new_chain) {
			
 
				+			new_chain = tail = tmp;
			
 
				+		} else {
			
 
				+			tail->bi_next = tmp;
			
 
				+			tail = tmp;
			
 
				+		}
			
 
				+		old_chain = old_chain->bi_next;
			
 
				+
			
 
				+		total += tmp->bi_size;
			
 
				+	}
			
 
				+
			
 
				+	BUG_ON(total < len);
			
 
				+
			
 
				+	if (tail)
			
 
				+		tail->bi_next = NULL;
			
 
				+
			
 
				+	*old = old_chain;
			
 
				+
			
 
				+	return new_chain;
			
 
				+
			
 
				+err_out:
			
 
				+	dout("bio_chain_clone with err\n");
			
 
				+	bio_chain_put(new_chain);
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * helpers for osd request op vectors.
			
 
				+ */
			
 
				+static int rbd_create_rw_ops(struct ceph_osd_req_op **ops,
			
 
				+			    int num_ops,
			
 
				+			    int opcode,
			
 
				+			    u32 payload_len)
			
 
				+{
			
 
				+	*ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1),
			
 
				+		       GFP_NOIO);
			
 
				+	if (!*ops)
			
 
				+		return -ENOMEM;
			
 
				+	(*ops)[0].op = opcode;
			
 
				+	/*
			
 
				+	 * op extent offset and length will be set later on
			
 
				+	 * in calc_raw_layout()
			
 
				+	 */
			
 
				+	(*ops)[0].payload_len = payload_len;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
			
 
				+{
			
 
				+	kfree(ops);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Send ceph osd request
			
 
				+ */
			
 
				+static int rbd_do_request(struct request *rq,
			
 
				+			  struct rbd_device *dev,
			
 
				+			  struct ceph_snap_context *snapc,
			
 
				+			  u64 snapid,
			
 
				+			  const char *obj, u64 ofs, u64 len,
			
 
				+			  struct bio *bio,
			
 
				+			  struct page **pages,
			
 
				+			  int num_pages,
			
 
				+			  int flags,
			
 
				+			  struct ceph_osd_req_op *ops,
			
 
				+			  int num_reply,
			
 
				+			  void (*rbd_cb)(struct ceph_osd_request *req,
			
 
				+					 struct ceph_msg *msg))
			
 
				+{
			
 
				+	struct ceph_osd_request *req;
			
 
				+	struct ceph_file_layout *layout;
			
 
				+	int ret;
			
 
				+	u64 bno;
			
 
				+	struct timespec mtime = CURRENT_TIME;
			
 
				+	struct rbd_request *req_data;
			
 
				+	struct ceph_osd_request_head *reqhead;
			
 
				+	struct rbd_image_header *header = &dev->header;
			
 
				+
			
 
				+	ret = -ENOMEM;
			
 
				+	req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
			
 
				+	if (!req_data)
			
 
				+		goto done;
			
 
				+
			
 
				+	dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs);
			
 
				+
			
 
				+	down_read(&header->snap_rwsem);
			
 
				+
			
 
				+	req = ceph_osdc_alloc_request(&dev->client->osdc, flags,
			
 
				+				      snapc,
			
 
				+				      ops,
			
 
				+				      false,
			
 
				+				      GFP_NOIO, pages, bio);
			
 
				+	if (IS_ERR(req)) {
			
 
				+		up_read(&header->snap_rwsem);
			
 
				+		ret = PTR_ERR(req);
			
 
				+		goto done_pages;
			
 
				+	}
			
 
				+
			
 
				+	req->r_callback = rbd_cb;
			
 
				+
			
 
				+	req_data->rq = rq;
			
 
				+	req_data->bio = bio;
			
 
				+	req_data->pages = pages;
			
 
				+	req_data->len = len;
			
 
				+
			
 
				+	req->r_priv = req_data;
			
 
				+
			
 
				+	reqhead = req->r_request->front.iov_base;
			
 
				+	reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
			
 
				+
			
 
				+	strncpy(req->r_oid, obj, sizeof(req->r_oid));
			
 
				+	req->r_oid_len = strlen(req->r_oid);
			
 
				+
			
 
				+	layout = &req->r_file_layout;
			
 
				+	memset(layout, 0, sizeof(*layout));
			
 
				+	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
			
 
				+	layout->fl_stripe_count = cpu_to_le32(1);
			
 
				+	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
			
 
				+	layout->fl_pg_preferred = cpu_to_le32(-1);
			
 
				+	layout->fl_pg_pool = cpu_to_le32(dev->poolid);
			
 
				+	ceph_calc_raw_layout(&dev->client->osdc, layout, snapid,
			
 
				+			     ofs, &len, &bno, req, ops);
			
 
				+
			
 
				+	ceph_osdc_build_request(req, ofs, &len,
			
 
				+				ops,
			
 
				+				snapc,
			
 
				+				&mtime,
			
 
				+				req->r_oid, req->r_oid_len);
			
 
				+	up_read(&header->snap_rwsem);
			
 
				+
			
 
				+	ret = ceph_osdc_start_request(&dev->client->osdc, req, false);
			
 
				+	if (ret < 0)
			
 
				+		goto done_err;
			
 
				+
			
 
				+	if (!rbd_cb) {
			
 
				+		ret = ceph_osdc_wait_request(&dev->client->osdc, req);
			
 
				+		ceph_osdc_put_request(req);
			
 
				+	}
			
 
				+	return ret;
			
 
				+
			
 
				+done_err:
			
 
				+	bio_chain_put(req_data->bio);
			
 
				+	ceph_osdc_put_request(req);
			
 
				+done_pages:
			
 
				+	kfree(req_data);
			
 
				+done:
			
 
				+	if (rq)
			
 
				+		blk_end_request(rq, ret, len);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Ceph osd op callback
			
 
				+ */
			
 
				+static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
			
 
				+{
			
 
				+	struct rbd_request *req_data = req->r_priv;
			
 
				+	struct ceph_osd_reply_head *replyhead;
			
 
				+	struct ceph_osd_op *op;
			
 
				+	__s32 rc;
			
 
				+	u64 bytes;
			
 
				+	int read_op;
			
 
				+
			
 
				+	/* parse reply */
			
 
				+	replyhead = msg->front.iov_base;
			
 
				+	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
			
 
				+	op = (void *)(replyhead + 1);
			
 
				+	rc = le32_to_cpu(replyhead->result);
			
 
				+	bytes = le64_to_cpu(op->extent.length);
			
 
				+	read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
			
 
				+
			
 
				+	dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
			
 
				+
			
 
				+	if (rc == -ENOENT && read_op) {
			
 
				+		zero_bio_chain(req_data->bio, 0);
			
 
				+		rc = 0;
			
 
				+	} else if (rc == 0 && read_op && bytes < req_data->len) {
			
 
				+		zero_bio_chain(req_data->bio, bytes);
			
 
				+		bytes = req_data->len;
			
 
				+	}
			
 
				+
			
 
				+	blk_end_request(req_data->rq, rc, bytes);
			
 
				+
			
 
				+	if (req_data->bio)
			
 
				+		bio_chain_put(req_data->bio);
			
 
				+
			
 
				+	ceph_osdc_put_request(req);
			
 
				+	kfree(req_data);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Do a synchronous ceph osd operation
			
 
				+ */
			
 
				+static int rbd_req_sync_op(struct rbd_device *dev,
			
 
				+			   struct ceph_snap_context *snapc,
			
 
				+			   u64 snapid,
			
 
				+			   int opcode,
			
 
				+			   int flags,
			
 
				+			   struct ceph_osd_req_op *orig_ops,
			
 
				+			   int num_reply,
			
 
				+			   const char *obj,
			
 
				+			   u64 ofs, u64 len,
			
 
				+			   char *buf)
			
 
				+{
			
 
				+	int ret;
			
 
				+	struct page **pages;
			
 
				+	int num_pages;
			
 
				+	struct ceph_osd_req_op *ops = orig_ops;
			
 
				+	u32 payload_len;
			
 
				+
			
 
				+	num_pages = calc_pages_for(ofs , len);
			
 
				+	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
			
 
				+	if (IS_ERR(pages))
			
 
				+		return PTR_ERR(pages);
			
 
				+
			
 
				+	if (!orig_ops) {
			
 
				+		payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0);
			
 
				+		ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
			
 
				+		if (ret < 0)
			
 
				+			goto done;
			
 
				+
			
 
				+		if ((flags & CEPH_OSD_FLAG_WRITE) && buf) {
			
 
				+			ret = ceph_copy_to_page_vector(pages, buf, ofs, len);
			
 
				+			if (ret < 0)
			
 
				+				goto done_ops;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	ret = rbd_do_request(NULL, dev, snapc, snapid,
			
 
				+			  obj, ofs, len, NULL,
			
 
				+			  pages, num_pages,
			
 
				+			  flags,
			
 
				+			  ops,
			
 
				+			  2,
			
 
				+			  NULL);
			
 
				+	if (ret < 0)
			
 
				+		goto done_ops;
			
 
				+
			
 
				+	if ((flags & CEPH_OSD_FLAG_READ) && buf)
			
 
				+		ret = ceph_copy_from_page_vector(pages, buf, ofs, ret);
			
 
				+
			
 
				+done_ops:
			
 
				+	if (!orig_ops)
			
 
				+		rbd_destroy_ops(ops);
			
 
				+done:
			
 
				+	ceph_release_page_vector(pages, num_pages);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Do an asynchronous ceph osd operation
			
 
				+ */
			
 
				+static int rbd_do_op(struct request *rq,
			
 
				+		     struct rbd_device *rbd_dev ,
			
 
				+		     struct ceph_snap_context *snapc,
			
 
				+		     u64 snapid,
			
 
				+		     int opcode, int flags, int num_reply,
			
 
				+		     u64 ofs, u64 len,
			
 
				+		     struct bio *bio)
			
 
				+{
			
 
				+	char *seg_name;
			
 
				+	u64 seg_ofs;
			
 
				+	u64 seg_len;
			
 
				+	int ret;
			
 
				+	struct ceph_osd_req_op *ops;
			
 
				+	u32 payload_len;
			
 
				+
			
 
				+	seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
			
 
				+	if (!seg_name)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	seg_len = rbd_get_segment(&rbd_dev->header,
			
 
				+				  rbd_dev->header.block_name,
			
 
				+				  ofs, len,
			
 
				+				  seg_name, &seg_ofs);
			
 
				+
			
 
				+	payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
			
 
				+
			
 
				+	ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
			
 
				+	if (ret < 0)
			
 
				+		goto done;
			
 
				+
			
 
				+	/* we've taken care of segment sizes earlier when we
			
 
				+	   cloned the bios. We should never have a segment
			
 
				+	   truncated at this point */
			
 
				+	BUG_ON(seg_len < len);
			
 
				+
			
 
				+	ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
			
 
				+			     seg_name, seg_ofs, seg_len,
			
 
				+			     bio,
			
 
				+			     NULL, 0,
			
 
				+			     flags,
			
 
				+			     ops,
			
 
				+			     num_reply,
			
 
				+			     rbd_req_cb);
			
 
				+done:
			
 
				+	kfree(seg_name);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Request async osd write
			
 
				+ */
			
 
				+static int rbd_req_write(struct request *rq,
			
 
				+			 struct rbd_device *rbd_dev,
			
 
				+			 struct ceph_snap_context *snapc,
			
 
				+			 u64 ofs, u64 len,
			
 
				+			 struct bio *bio)
			
 
				+{
			
 
				+	return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
			
 
				+			 CEPH_OSD_OP_WRITE,
			
 
				+			 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
			
 
				+			 2,
			
 
				+			 ofs, len, bio);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Request async osd read
			
 
				+ */
			
 
				+static int rbd_req_read(struct request *rq,
			
 
				+			 struct rbd_device *rbd_dev,
			
 
				+			 u64 snapid,
			
 
				+			 u64 ofs, u64 len,
			
 
				+			 struct bio *bio)
			
 
				+{
			
 
				+	return rbd_do_op(rq, rbd_dev, NULL,
			
 
				+			 (snapid ? snapid : CEPH_NOSNAP),
			
 
				+			 CEPH_OSD_OP_READ,
			
 
				+			 CEPH_OSD_FLAG_READ,
			
 
				+			 2,
			
 
				+			 ofs, len, bio);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Request sync osd read
			
 
				+ */
			
 
				+static int rbd_req_sync_read(struct rbd_device *dev,
			
 
				+			  struct ceph_snap_context *snapc,
			
 
				+			  u64 snapid,
			
 
				+			  const char *obj,
			
 
				+			  u64 ofs, u64 len,
			
 
				+			  char *buf)
			
 
				+{
			
 
				+	return rbd_req_sync_op(dev, NULL,
			
 
				+			       (snapid ? snapid : CEPH_NOSNAP),
			
 
				+			       CEPH_OSD_OP_READ,
			
 
				+			       CEPH_OSD_FLAG_READ,
			
 
				+			       NULL,
			
 
				+			       1, obj, ofs, len, buf);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Request sync osd read
			
 
				+ */
			
 
				+static int rbd_req_sync_rollback_obj(struct rbd_device *dev,
			
 
				+				     u64 snapid,
			
 
				+				     const char *obj)
			
 
				+{
			
 
				+	struct ceph_osd_req_op *ops;
			
 
				+	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0);
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	ops[0].snap.snapid = snapid;
			
 
				+
			
 
				+	ret = rbd_req_sync_op(dev, NULL,
			
 
				+			       CEPH_NOSNAP,
			
 
				+			       0,
			
 
				+			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
			
 
				+			       ops,
			
 
				+			       1, obj, 0, 0, NULL);
			
 
				+
			
 
				+	rbd_destroy_ops(ops);
			
 
				+
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Request sync osd read
			
 
				+ */
			
 
				+static int rbd_req_sync_exec(struct rbd_device *dev,
			
 
				+			     const char *obj,
			
 
				+			     const char *cls,
			
 
				+			     const char *method,
			
 
				+			     const char *data,
			
 
				+			     int len)
			
 
				+{
			
 
				+	struct ceph_osd_req_op *ops;
			
 
				+	int cls_len = strlen(cls);
			
 
				+	int method_len = strlen(method);
			
 
				+	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
			
 
				+				    cls_len + method_len + len);
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	ops[0].cls.class_name = cls;
			
 
				+	ops[0].cls.class_len = (__u8)cls_len;
			
 
				+	ops[0].cls.method_name = method;
			
 
				+	ops[0].cls.method_len = (__u8)method_len;
			
 
				+	ops[0].cls.argc = 0;
			
 
				+	ops[0].cls.indata = data;
			
 
				+	ops[0].cls.indata_len = len;
			
 
				+
			
 
				+	ret = rbd_req_sync_op(dev, NULL,
			
 
				+			       CEPH_NOSNAP,
			
 
				+			       0,
			
 
				+			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
			
 
				+			       ops,
			
 
				+			       1, obj, 0, 0, NULL);
			
 
				+
			
 
				+	rbd_destroy_ops(ops);
			
 
				+
			
 
				+	dout("cls_exec returned %d\n", ret);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * block device queue callback
			
 
				+ */
			
 
				+static void rbd_rq_fn(struct request_queue *q)
			
 
				+{
			
 
				+	struct rbd_device *rbd_dev = q->queuedata;
			
 
				+	struct request *rq;
			
 
				+	struct bio_pair *bp = NULL;
			
 
				+
			
 
				+	rq = blk_fetch_request(q);
			
 
				+
			
 
				+	while (1) {
			
 
				+		struct bio *bio;
			
 
				+		struct bio *rq_bio, *next_bio = NULL;
			
 
				+		bool do_write;
			
 
				+		int size, op_size = 0;
			
 
				+		u64 ofs;
			
 
				+
			
 
				+		/* peek at request from block layer */
			
 
				+		if (!rq)
			
 
				+			break;
			
 
				+
			
 
				+		dout("fetched request\n");
			
 
				+
			
 
				+		/* filter out block requests we don't understand */
			
 
				+		if ((rq->cmd_type != REQ_TYPE_FS)) {
			
 
				+			__blk_end_request_all(rq, 0);
			
 
				+			goto next;
			
 
				+		}
			
 
				+
			
 
				+		/* deduce our operation (read, write) */
			
 
				+		do_write = (rq_data_dir(rq) == WRITE);
			
 
				+
			
 
				+		size = blk_rq_bytes(rq);
			
 
				+		ofs = blk_rq_pos(rq) * 512ULL;
			
 
				+		rq_bio = rq->bio;
			
 
				+		if (do_write && rbd_dev->read_only) {
			
 
				+			__blk_end_request_all(rq, -EROFS);
			
 
				+			goto next;
			
 
				+		}
			
 
				+
			
 
				+		spin_unlock_irq(q->queue_lock);
			
 
				+
			
 
				+		dout("%s 0x%x bytes at 0x%llx\n",
			
 
				+		     do_write ? "write" : "read",
			
 
				+		     size, blk_rq_pos(rq) * 512ULL);
			
 
				+
			
 
				+		do {
			
 
				+			/* a bio clone to be passed down to OSD req */
			
 
				+			dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
			
 
				+			op_size = rbd_get_segment(&rbd_dev->header,
			
 
				+						  rbd_dev->header.block_name,
			
 
				+						  ofs, size,
			
 
				+						  NULL, NULL);
			
 
				+			bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
			
 
				+					      op_size, GFP_ATOMIC);
			
 
				+			if (!bio) {
			
 
				+				spin_lock_irq(q->queue_lock);
			
 
				+				__blk_end_request_all(rq, -ENOMEM);
			
 
				+				goto next;
			
 
				+			}
			
 
				+
			
 
				+			/* init OSD command: write or read */
			
 
				+			if (do_write)
			
 
				+				rbd_req_write(rq, rbd_dev,
			
 
				+					      rbd_dev->header.snapc,
			
 
				+					      ofs,
			
 
				+					      op_size, bio);
			
 
				+			else
			
 
				+				rbd_req_read(rq, rbd_dev,
			
 
				+					     cur_snap_id(rbd_dev),
			
 
				+					     ofs,
			
 
				+					     op_size, bio);
			
 
				+
			
 
				+			size -= op_size;
			
 
				+			ofs += op_size;
			
 
				+
			
 
				+			rq_bio = next_bio;
			
 
				+		} while (size > 0);
			
 
				+
			
 
				+		if (bp)
			
 
				+			bio_pair_release(bp);
			
 
				+
			
 
				+		spin_lock_irq(q->queue_lock);
			
 
				+next:
			
 
				+		rq = blk_fetch_request(q);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * a queue callback. Makes sure that we don't create a bio that spans across
			
 
				+ * multiple osd objects. One exception would be with a single page bios,
			
 
				+ * which we handle later at bio_chain_clone
			
 
				+ */
			
 
				+static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
			
 
				+			  struct bio_vec *bvec)
			
 
				+{
			
 
				+	struct rbd_device *rbd_dev = q->queuedata;
			
 
				+	unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9);
			
 
				+	sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
			
 
				+	unsigned int bio_sectors = bmd->bi_size >> 9;
			
 
				+	int max;
			
 
				+
			
 
				+	max =  (chunk_sectors - ((sector & (chunk_sectors - 1))
			
 
				+				 + bio_sectors)) << 9;
			
 
				+	if (max < 0)
			
 
				+		max = 0; /* bio_add cannot handle a negative return */
			
 
				+	if (max <= bvec->bv_len && bio_sectors == 0)
			
 
				+		return bvec->bv_len;
			
 
				+	return max;
			
 
				+}
			
 
				+
			
 
				+static void rbd_free_disk(struct rbd_device *rbd_dev)
			
 
				+{
			
 
				+	struct gendisk *disk = rbd_dev->disk;
			
 
				+
			
 
				+	if (!disk)
			
 
				+		return;
			
 
				+
			
 
				+	rbd_header_free(&rbd_dev->header);
			
 
				+
			
 
				+	if (disk->flags & GENHD_FL_UP)
			
 
				+		del_gendisk(disk);
			
 
				+	if (disk->queue)
			
 
				+		blk_cleanup_queue(disk->queue);
			
 
				+	put_disk(disk);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * reload the ondisk the header 
			
 
				+ */
			
 
				+static int rbd_read_header(struct rbd_device *rbd_dev,
			
 
				+			   struct rbd_image_header *header)
			
 
				+{
			
 
				+	ssize_t rc;
			
 
				+	struct rbd_image_header_ondisk *dh;
			
 
				+	int snap_count = 0;
			
 
				+	u64 snap_names_len = 0;
			
 
				+
			
 
				+	while (1) {
			
 
				+		int len = sizeof(*dh) +
			
 
				+			  snap_count * sizeof(struct rbd_image_snap_ondisk) +
			
 
				+			  snap_names_len;
			
 
				+
			
 
				+		rc = -ENOMEM;
			
 
				+		dh = kmalloc(len, GFP_KERNEL);
			
 
				+		if (!dh)
			
 
				+			return -ENOMEM;
			
 
				+
			
 
				+		rc = rbd_req_sync_read(rbd_dev,
			
 
				+				       NULL, CEPH_NOSNAP,
			
 
				+				       rbd_dev->obj_md_name,
			
 
				+				       0, len,
			
 
				+				       (char *)dh);
			
 
				+		if (rc < 0)
			
 
				+			goto out_dh;
			
 
				+
			
 
				+		rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
			
 
				+		if (rc < 0)
			
 
				+			goto out_dh;
			
 
				+
			
 
				+		if (snap_count != header->total_snaps) {
			
 
				+			snap_count = header->total_snaps;
			
 
				+			snap_names_len = header->snap_names_len;
			
 
				+			rbd_header_free(header);
			
 
				+			kfree(dh);
			
 
				+			continue;
			
 
				+		}
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+out_dh:
			
 
				+	kfree(dh);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * create a snapshot
			
 
				+ */
			
 
				+static int rbd_header_add_snap(struct rbd_device *dev,
			
 
				+			       const char *snap_name,
			
 
				+			       gfp_t gfp_flags)
			
 
				+{
			
 
				+	int name_len = strlen(snap_name);
			
 
				+	u64 new_snapid;
			
 
				+	int ret;
			
 
				+	void *data, *data_start, *data_end;
			
 
				+
			
 
				+	/* we should create a snapshot only if we're pointing at the head */
			
 
				+	if (dev->cur_snap)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid,
			
 
				+				      &new_snapid);
			
 
				+	dout("created snapid=%lld\n", new_snapid);
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	data = kmalloc(name_len + 16, gfp_flags);
			
 
				+	if (!data)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	data_start = data;
			
 
				+	data_end = data + name_len + 16;
			
 
				+
			
 
				+	ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad);
			
 
				+	ceph_encode_64_safe(&data, data_end, new_snapid, bad);
			
 
				+
			
 
				+	ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
			
 
				+				data_start, data - data_start);
			
 
				+
			
 
				+	kfree(data_start);
			
 
				+
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	dev->header.snapc->seq =  new_snapid;
			
 
				+
			
 
				+	return 0;
			
 
				+bad:
			
 
				+	return -ERANGE;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * only read the first part of the ondisk header, without the snaps info
			
 
				+ */
			
 
				+static int rbd_update_snaps(struct rbd_device *rbd_dev)
			
 
				+{
			
 
				+	int ret;
			
 
				+	struct rbd_image_header h;
			
 
				+	u64 snap_seq;
			
 
				+
			
 
				+	ret = rbd_read_header(rbd_dev, &h);
			
 
				+	if (ret < 0)
			
 
				+		return ret;
			
 
				+
			
 
				+	down_write(&rbd_dev->header.snap_rwsem);
			
 
				+
			
 
				+	snap_seq = rbd_dev->header.snapc->seq;
			
 
				+
			
 
				+	kfree(rbd_dev->header.snapc);
			
 
				+	kfree(rbd_dev->header.snap_names);
			
 
				+	kfree(rbd_dev->header.snap_sizes);
			
 
				+
			
 
				+	rbd_dev->header.total_snaps = h.total_snaps;
			
 
				+	rbd_dev->header.snapc = h.snapc;
			
 
				+	rbd_dev->header.snap_names = h.snap_names;
			
 
				+	rbd_dev->header.snap_sizes = h.snap_sizes;
			
 
				+	rbd_dev->header.snapc->seq = snap_seq;
			
 
				+
			
 
				+	up_write(&rbd_dev->header.snap_rwsem);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int rbd_init_disk(struct rbd_device *rbd_dev)
			
 
				+{
			
 
				+	struct gendisk *disk;
			
 
				+	struct request_queue *q;
			
 
				+	int rc;
			
 
				+	u64 total_size = 0;
			
 
				+
			
 
				+	/* contact OSD, request size info about the object being mapped */
			
 
				+	rc = rbd_read_header(rbd_dev, &rbd_dev->header);
			
 
				+	if (rc)
			
 
				+		return rc;
			
 
				+
			
 
				+	rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size);
			
 
				+	if (rc)
			
 
				+		return rc;
			
 
				+
			
 
				+	/* create gendisk info */
			
 
				+	rc = -ENOMEM;
			
 
				+	disk = alloc_disk(RBD_MINORS_PER_MAJOR);
			
 
				+	if (!disk)
			
 
				+		goto out;
			
 
				+
			
 
				+	sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id);
			
 
				+	disk->major = rbd_dev->major;
			
 
				+	disk->first_minor = 0;
			
 
				+	disk->fops = &rbd_bd_ops;
			
 
				+	disk->private_data = rbd_dev;
			
 
				+
			
 
				+	/* init rq */
			
 
				+	rc = -ENOMEM;
			
 
				+	q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
			
 
				+	if (!q)
			
 
				+		goto out_disk;
			
 
				+	blk_queue_merge_bvec(q, rbd_merge_bvec);
			
 
				+	disk->queue = q;
			
 
				+
			
 
				+	q->queuedata = rbd_dev;
			
 
				+
			
 
				+	rbd_dev->disk = disk;
			
 
				+	rbd_dev->q = q;
			
 
				+
			
 
				+	/* finally, announce the disk to the world */
			
 
				+	set_capacity(disk, total_size / 512ULL);
			
 
				+	add_disk(disk);
			
 
				+
			
 
				+	pr_info("%s: added with size 0x%llx\n",
			
 
				+		disk->disk_name, (unsigned long long)total_size);
			
 
				+	return 0;
			
 
				+
			
 
				+out_disk:
			
 
				+	put_disk(disk);
			
 
				+out:
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+/********************************************************************
			
 
				+ * /sys/class/rbd/
			
 
				+ *                   add	map rados objects to blkdev
			
 
				+ *                   remove	unmap rados objects
			
 
				+ *                   list	show mappings
			
 
				+ *******************************************************************/
			
 
				+
			
 
				+static void class_rbd_release(struct class *cls)
			
 
				+{
			
 
				+	kfree(cls);
			
 
				+}
			
 
				+
			
 
				+static ssize_t class_rbd_list(struct class *c,
			
 
				+			      struct class_attribute *attr,
			
 
				+			      char *data)
			
 
				+{
			
 
				+	int n = 0;
			
 
				+	struct list_head *tmp;
			
 
				+	int max = PAGE_SIZE;
			
 
				+
			
 
				+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
			
 
				+
			
 
				+	n += snprintf(data, max,
			
 
				+		      "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n");
			
 
				+
			
 
				+	list_for_each(tmp, &rbd_dev_list) {
			
 
				+		struct rbd_device *rbd_dev;
			
 
				+
			
 
				+		rbd_dev = list_entry(tmp, struct rbd_device, node);
			
 
				+		n += snprintf(data+n, max-n,
			
 
				+			      "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n",
			
 
				+			      rbd_dev->id,
			
 
				+			      rbd_dev->major,
			
 
				+			      ceph_client_id(rbd_dev->client),
			
 
				+			      rbd_dev->pool_name,
			
 
				+			      rbd_dev->obj, rbd_dev->snap_name,
			
 
				+			      rbd_dev->header.image_size >> 10);
			
 
				+		if (n == max)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	mutex_unlock(&ctl_mutex);
			
 
				+	return n;
			
 
				+}
			
 
				+
			
 
				+static ssize_t class_rbd_add(struct class *c,
			
 
				+			     struct class_attribute *attr,
			
 
				+			     const char *buf, size_t count)
			
 
				+{
			
 
				+	struct ceph_osd_client *osdc;
			
 
				+	struct rbd_device *rbd_dev;
			
 
				+	ssize_t rc = -ENOMEM;
			
 
				+	int irc, new_id = 0;
			
 
				+	struct list_head *tmp;
			
 
				+	char *mon_dev_name;
			
 
				+	char *options;
			
 
				+
			
 
				+	if (!try_module_get(THIS_MODULE))
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
			
 
				+	if (!mon_dev_name)
			
 
				+		goto err_out_mod;
			
 
				+
			
 
				+	options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
			
 
				+	if (!options)
			
 
				+		goto err_mon_dev;
			
 
				+
			
 
				+	/* new rbd_device object */
			
 
				+	rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
			
 
				+	if (!rbd_dev)
			
 
				+		goto err_out_opt;
			
 
				+
			
 
				+	/* static rbd_device initialization */
			
 
				+	spin_lock_init(&rbd_dev->lock);
			
 
				+	INIT_LIST_HEAD(&rbd_dev->node);
			
 
				+
			
 
				+	/* generate unique id: find highest unique id, add one */
			
 
				+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
			
 
				+
			
 
				+	list_for_each(tmp, &rbd_dev_list) {
			
 
				+		struct rbd_device *rbd_dev;
			
 
				+
			
 
				+		rbd_dev = list_entry(tmp, struct rbd_device, node);
			
 
				+		if (rbd_dev->id >= new_id)
			
 
				+			new_id = rbd_dev->id + 1;
			
 
				+	}
			
 
				+
			
 
				+	rbd_dev->id = new_id;
			
 
				+
			
 
				+	/* add to global list */
			
 
				+	list_add_tail(&rbd_dev->node, &rbd_dev_list);
			
 
				+
			
 
				+	/* parse add command */
			
 
				+	if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s "
			
 
				+		   "%" __stringify(RBD_MAX_OPT_LEN) "s "
			
 
				+		   "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s "
			
 
				+		   "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s"
			
 
				+		   "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
			
 
				+		   mon_dev_name, options, rbd_dev->pool_name,
			
 
				+		   rbd_dev->obj, rbd_dev->snap_name) < 4) {
			
 
				+		rc = -EINVAL;
			
 
				+		goto err_out_slot;
			
 
				+	}
			
 
				+
			
 
				+	if (rbd_dev->snap_name[0] == 0)
			
 
				+		rbd_dev->snap_name[0] = '-';
			
 
				+
			
 
				+	rbd_dev->obj_len = strlen(rbd_dev->obj);
			
 
				+	snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s",
			
 
				+		 rbd_dev->obj, RBD_SUFFIX);
			
 
				+
			
 
				+	/* initialize rest of new object */
			
 
				+	snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id);
			
 
				+	rc = rbd_get_client(rbd_dev, mon_dev_name, options);
			
 
				+	if (rc < 0)
			
 
				+		goto err_out_slot;
			
 
				+
			
 
				+	mutex_unlock(&ctl_mutex);
			
 
				+
			
 
				+	/* pick the pool */
			
 
				+	osdc = &rbd_dev->client->osdc;
			
 
				+	rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
			
 
				+	if (rc < 0)
			
 
				+		goto err_out_client;
			
 
				+	rbd_dev->poolid = rc;
			
 
				+
			
 
				+	/* register our block device */
			
 
				+	irc = register_blkdev(0, rbd_dev->name);
			
 
				+	if (irc < 0) {
			
 
				+		rc = irc;
			
 
				+		goto err_out_client;
			
 
				+	}
			
 
				+	rbd_dev->major = irc;
			
 
				+
			
 
				+	/* set up and announce blkdev mapping */
			
 
				+	rc = rbd_init_disk(rbd_dev);
			
 
				+	if (rc)
			
 
				+		goto err_out_blkdev;
			
 
				+
			
 
				+	return count;
			
 
				+
			
 
				+err_out_blkdev:
			
 
				+	unregister_blkdev(rbd_dev->major, rbd_dev->name);
			
 
				+err_out_client:
			
 
				+	rbd_put_client(rbd_dev);
			
 
				+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
			
 
				+err_out_slot:
			
 
				+	list_del_init(&rbd_dev->node);
			
 
				+	mutex_unlock(&ctl_mutex);
			
 
				+
			
 
				+	kfree(rbd_dev);
			
 
				+err_out_opt:
			
 
				+	kfree(options);
			
 
				+err_mon_dev:
			
 
				+	kfree(mon_dev_name);
			
 
				+err_out_mod:
			
 
				+	dout("Error adding device %s\n", buf);
			
 
				+	module_put(THIS_MODULE);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static struct rbd_device *__rbd_get_dev(unsigned long id)
			
 
				+{
			
 
				+	struct list_head *tmp;
			
 
				+	struct rbd_device *rbd_dev;
			
 
				+
			
 
				+	list_for_each(tmp, &rbd_dev_list) {
			
 
				+		rbd_dev = list_entry(tmp, struct rbd_device, node);
			
 
				+		if (rbd_dev->id == id)
			
 
				+			return rbd_dev;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static ssize_t class_rbd_remove(struct class *c,
			
 
				+				struct class_attribute *attr,
			
 
				+				const char *buf,
			
 
				+				size_t count)
			
 
				+{
			
 
				+	struct rbd_device *rbd_dev = NULL;
			
 
				+	int target_id, rc;
			
 
				+	unsigned long ul;
			
 
				+
			
 
				+	rc = strict_strtoul(buf, 10, &ul);
			
 
				+	if (rc)
			
 
				+		return rc;
			
 
				+
			
 
				+	/* convert to int; abort if we lost anything in the conversion */
			
 
				+	target_id = (int) ul;
			
 
				+	if (target_id != ul)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* remove object from list immediately */
			
 
				+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
			
 
				+
			
 
				+	rbd_dev = __rbd_get_dev(target_id);
			
 
				+	if (rbd_dev)
			
 
				+		list_del_init(&rbd_dev->node);
			
 
				+
			
 
				+	mutex_unlock(&ctl_mutex);
			
 
				+
			
 
				+	if (!rbd_dev)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	rbd_put_client(rbd_dev);
			
 
				+
			
 
				+	/* clean up and free blkdev */
			
 
				+	rbd_free_disk(rbd_dev);
			
 
				+	unregister_blkdev(rbd_dev->major, rbd_dev->name);
			
 
				+	kfree(rbd_dev);
			
 
				+
			
 
				+	/* release module ref */
			
 
				+	module_put(THIS_MODULE);
			
 
				+
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+static ssize_t class_rbd_snaps_list(struct class *c,
			
 
				+			      struct class_attribute *attr,
			
 
				+			      char *data)
			
 
				+{
			
 
				+	struct rbd_device *rbd_dev = NULL;
			
 
				+	struct list_head *tmp;
			
 
				+	struct rbd_image_header *header;
			
 
				+	int i, n = 0, max = PAGE_SIZE;
			
 
				+	int ret;
			
 
				+
			
 
				+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
			
 
				+
			
 
				+	n += snprintf(data, max, "#id\tsnap\tKB\n");
			
 
				+
			
 
				+	list_for_each(tmp, &rbd_dev_list) {
			
 
				+		char *names, *p;
			
 
				+		struct ceph_snap_context *snapc;
			
 
				+
			
 
				+		rbd_dev = list_entry(tmp, struct rbd_device, node);
			
 
				+		header = &rbd_dev->header;
			
 
				+
			
 
				+		down_read(&header->snap_rwsem);
			
 
				+
			
 
				+		names = header->snap_names;
			
 
				+		snapc = header->snapc;
			
 
				+
			
 
				+		n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
			
 
				+			      rbd_dev->id, RBD_SNAP_HEAD_NAME,
			
 
				+			      header->image_size >> 10,
			
 
				+			      (!rbd_dev->cur_snap ? " (*)" : ""));
			
 
				+		if (n == max)
			
 
				+			break;
			
 
				+
			
 
				+		p = names;
			
 
				+		for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
			
 
				+			n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
			
 
				+			      rbd_dev->id, p, header->snap_sizes[i] >> 10,
			
 
				+			      (rbd_dev->cur_snap &&
			
 
				+			       (snap_index(header, i) == rbd_dev->cur_snap) ?
			
 
				+			       " (*)" : ""));
			
 
				+			if (n == max)
			
 
				+				break;
			
 
				+		}
			
 
				+
			
 
				+		up_read(&header->snap_rwsem);
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	ret = n;
			
 
				+	mutex_unlock(&ctl_mutex);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static ssize_t class_rbd_snaps_refresh(struct class *c,
			
 
				+				struct class_attribute *attr,
			
 
				+				const char *buf,
			
 
				+				size_t count)
			
 
				+{
			
 
				+	struct rbd_device *rbd_dev = NULL;
			
 
				+	int target_id, rc;
			
 
				+	unsigned long ul;
			
 
				+	int ret = count;
			
 
				+
			
 
				+	rc = strict_strtoul(buf, 10, &ul);
			
 
				+	if (rc)
			
 
				+		return rc;
			
 
				+
			
 
				+	/* convert to int; abort if we lost anything in the conversion */
			
 
				+	target_id = (int) ul;
			
 
				+	if (target_id != ul)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
			
 
				+
			
 
				+	rbd_dev = __rbd_get_dev(target_id);
			
 
				+	if (!rbd_dev) {
			
 
				+		ret = -ENOENT;
			
 
				+		goto done;
			
 
				+	}
			
 
				+
			
 
				+	rc = rbd_update_snaps(rbd_dev);
			
 
				+	if (rc < 0)
			
 
				+		ret = rc;
			
 
				+
			
 
				+done:
			
 
				+	mutex_unlock(&ctl_mutex);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static ssize_t class_rbd_snap_create(struct class *c,
			
 
				+				struct class_attribute *attr,
			
 
				+				const char *buf,
			
 
				+				size_t count)
			
 
				+{
			
 
				+	struct rbd_device *rbd_dev = NULL;
			
 
				+	int target_id, ret;
			
 
				+	char *name;
			
 
				+
			
 
				+	name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL);
			
 
				+	if (!name)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	/* parse snaps add command */
			
 
				+	if (sscanf(buf, "%d "
			
 
				+		   "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
			
 
				+		   &target_id,
			
 
				+		   name) != 2) {
			
 
				+		ret = -EINVAL;
			
 
				+		goto done;
			
 
				+	}
			
 
				+
			
 
				+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
			
 
				+
			
 
				+	rbd_dev = __rbd_get_dev(target_id);
			
 
				+	if (!rbd_dev) {
			
 
				+		ret = -ENOENT;
			
 
				+		goto done_unlock;
			
 
				+	}
			
 
				+
			
 
				+	ret = rbd_header_add_snap(rbd_dev,
			
 
				+				  name, GFP_KERNEL);
			
 
				+	if (ret < 0)
			
 
				+		goto done_unlock;
			
 
				+
			
 
				+	ret = rbd_update_snaps(rbd_dev);
			
 
				+	if (ret < 0)
			
 
				+		goto done_unlock;
			
 
				+
			
 
				+	ret = count;
			
 
				+done_unlock:
			
 
				+	mutex_unlock(&ctl_mutex);
			
 
				+done:
			
 
				+	kfree(name);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static ssize_t class_rbd_rollback(struct class *c,
			
 
				+				struct class_attribute *attr,
			
 
				+				const char *buf,
			
 
				+				size_t count)
			
 
				+{
			
 
				+	struct rbd_device *rbd_dev = NULL;
			
 
				+	int target_id, ret;
			
 
				+	u64 snapid;
			
 
				+	char snap_name[RBD_MAX_SNAP_NAME_LEN];
			
 
				+	u64 cur_ofs;
			
 
				+	char *seg_name;
			
 
				+
			
 
				+	/* parse snaps add command */
			
 
				+	if (sscanf(buf, "%d "
			
 
				+		   "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
			
 
				+		   &target_id,
			
 
				+		   snap_name) != 2) {
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	ret = -ENOMEM;
			
 
				+	seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
			
 
				+	if (!seg_name)
			
 
				+		return ret;
			
 
				+
			
 
				+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
			
 
				+
			
 
				+	rbd_dev = __rbd_get_dev(target_id);
			
 
				+	if (!rbd_dev) {
			
 
				+		ret = -ENOENT;
			
 
				+		goto done_unlock;
			
 
				+	}
			
 
				+
			
 
				+	ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL);
			
 
				+	if (ret < 0)
			
 
				+		goto done_unlock;
			
 
				+
			
 
				+	dout("snapid=%lld\n", snapid);
			
 
				+
			
 
				+	cur_ofs = 0;
			
 
				+	while (cur_ofs < rbd_dev->header.image_size) {
			
 
				+		cur_ofs += rbd_get_segment(&rbd_dev->header,
			
 
				+					   rbd_dev->obj,
			
 
				+					   cur_ofs, (u64)-1,
			
 
				+					   seg_name, NULL);
			
 
				+		dout("seg_name=%s\n", seg_name);
			
 
				+
			
 
				+		ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name);
			
 
				+		if (ret < 0)
			
 
				+			pr_warning("could not roll back obj %s err=%d\n",
			
 
				+				   seg_name, ret);
			
 
				+	}
			
 
				+
			
 
				+	ret = rbd_update_snaps(rbd_dev);
			
 
				+	if (ret < 0)
			
 
				+		goto done_unlock;
			
 
				+
			
 
				+	ret = count;
			
 
				+
			
 
				+done_unlock:
			
 
				+	mutex_unlock(&ctl_mutex);
			
 
				+	kfree(seg_name);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static struct class_attribute class_rbd_attrs[] = {
			
 
				+	__ATTR(add,		0200, NULL, class_rbd_add),
			
 
				+	__ATTR(remove,		0200, NULL, class_rbd_remove),
			
 
				+	__ATTR(list,		0444, class_rbd_list, NULL),
			
 
				+	__ATTR(snaps_refresh,	0200, NULL, class_rbd_snaps_refresh),
			
 
				+	__ATTR(snap_create,	0200, NULL, class_rbd_snap_create),
			
 
				+	__ATTR(snaps_list,	0444, class_rbd_snaps_list, NULL),
			
 
				+	__ATTR(snap_rollback,	0200, NULL, class_rbd_rollback),
			
 
				+	__ATTR_NULL
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * create control files in sysfs
			
 
				+ * /sys/class/rbd/...
			
 
				+ */
			
 
				+static int rbd_sysfs_init(void)
			
 
				+{
			
 
				+	int ret = -ENOMEM;
			
 
				+
			
 
				+	class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL);
			
 
				+	if (!class_rbd)
			
 
				+		goto out;
			
 
				+
			
 
				+	class_rbd->name = DRV_NAME;
			
 
				+	class_rbd->owner = THIS_MODULE;
			
 
				+	class_rbd->class_release = class_rbd_release;
			
 
				+	class_rbd->class_attrs = class_rbd_attrs;
			
 
				+
			
 
				+	ret = class_register(class_rbd);
			
 
				+	if (ret)
			
 
				+		goto out_class;
			
 
				+	return 0;
			
 
				+
			
 
				+out_class:
			
 
				+	kfree(class_rbd);
			
 
				+	class_rbd = NULL;
			
 
				+	pr_err(DRV_NAME ": failed to create class rbd\n");
			
 
				+out:
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void rbd_sysfs_cleanup(void)
			
 
				+{
			
 
				+	if (class_rbd)
			
 
				+		class_destroy(class_rbd);
			
 
				+	class_rbd = NULL;
			
 
				+}
			
 
				+
			
 
				+int __init rbd_init(void)
			
 
				+{
			
 
				+	int rc;
			
 
				+
			
 
				+	rc = rbd_sysfs_init();
			
 
				+	if (rc)
			
 
				+		return rc;
			
 
				+	spin_lock_init(&node_lock);
			
 
				+	pr_info("loaded " DRV_NAME_LONG "\n");
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void __exit rbd_exit(void)
			
 
				+{
			
 
				+	rbd_sysfs_cleanup();
			
 
				+}
			
 
				+
			
 
				+module_init(rbd_init);
			
 
				+module_exit(rbd_exit);
			
 
				+
			
 
				+MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
			
 
				+MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
			
 
				+MODULE_DESCRIPTION("rados block device");
			
 
				+
			
 
				+/* following authorship retained from original osdblk.c */
			
 
				+MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
			
 
				+
			
 
				+MODULE_LICENSE("GPL");
			
--- a/drivers/block/rbd_types.h
+++ b/drivers/block/rbd_types.h
@@ -0,0 +1,73 @@
 
				+/*
			
 
				+ * Ceph - scalable distributed file system
			
 
				+ *
			
 
				+ * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net>
			
 
				+ *
			
 
				+ * This is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU Lesser General Public
			
 
				+ * License version 2.1, as published by the Free Software
			
 
				+ * Foundation.  See file COPYING.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef CEPH_RBD_TYPES_H
			
 
				+#define CEPH_RBD_TYPES_H
			
 
				+
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				+/*
			
 
				+ * rbd image 'foo' consists of objects
			
 
				+ *   foo.rbd      - image metadata
			
 
				+ *   foo.00000000
			
 
				+ *   foo.00000001
			
 
				+ *   ...          - data
			
 
				+ */
			
 
				+
			
 
				+#define RBD_SUFFIX		".rbd"
			
 
				+#define RBD_DIRECTORY           "rbd_directory"
			
 
				+#define RBD_INFO                "rbd_info"
			
 
				+
			
 
				+#define RBD_DEFAULT_OBJ_ORDER	22   /* 4MB */
			
 
				+#define RBD_MIN_OBJ_ORDER       16
			
 
				+#define RBD_MAX_OBJ_ORDER       30
			
 
				+
			
 
				+#define RBD_MAX_OBJ_NAME_LEN	96
			
 
				+#define RBD_MAX_SEG_NAME_LEN	128
			
 
				+
			
 
				+#define RBD_COMP_NONE		0
			
 
				+#define RBD_CRYPT_NONE		0
			
 
				+
			
 
				+#define RBD_HEADER_TEXT		"<<< Rados Block Device Image >>>\n"
			
 
				+#define RBD_HEADER_SIGNATURE	"RBD"
			
 
				+#define RBD_HEADER_VERSION	"001.005"
			
 
				+
			
 
				+struct rbd_info {
			
 
				+	__le64 max_id;
			
 
				+} __attribute__ ((packed));
			
 
				+
			
 
				+struct rbd_image_snap_ondisk {
			
 
				+	__le64 id;
			
 
				+	__le64 image_size;
			
 
				+} __attribute__((packed));
			
 
				+
			
 
				+struct rbd_image_header_ondisk {
			
 
				+	char text[40];
			
 
				+	char block_name[24];
			
 
				+	char signature[4];
			
 
				+	char version[8];
			
 
				+	struct {
			
 
				+		__u8 order;
			
 
				+		__u8 crypt_type;
			
 
				+		__u8 comp_type;
			
 
				+		__u8 unused;
			
 
				+	} __attribute__((packed)) options;
			
 
				+	__le64 image_size;
			
 
				+	__le64 snap_seq;
			
 
				+	__le32 snap_count;
			
 
				+	__le32 reserved;
			
 
				+	__le64 snap_names_len;
			
 
				+	struct rbd_image_snap_ondisk snaps[0];
			
 
				+} __attribute__((packed));
			
 
				+
			
 
				+
			
 
				+#endif
			
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -1,9 +1,11 @@
 
				 config CEPH_FS
			
 
				         tristate "Ceph distributed file system (EXPERIMENTAL)"
			
 
				 	depends on INET && EXPERIMENTAL
			
 
				+	select CEPH_LIB
			
 
				 	select LIBCRC32C
			
 
				 	select CRYPTO_AES
			
 
				 	select CRYPTO
			
 
				+	default n
			
 
				 	help
			
 
				 	  Choose Y or M here to include support for mounting the
			
 
				 	  experimental Ceph distributed file system.  Ceph is an extremely
			
@@ -14,15 +16,3 @@ config CEPH_FS
 
				 
			
 
				 	  If unsure, say N.
			
 
				 
			
 
				-config CEPH_FS_PRETTYDEBUG
			
 
				-	bool "Include file:line in ceph debug output"
			
 
				-	depends on CEPH_FS
			
 
				-	default n
			
 
				-	help
			
 
				-	  If you say Y here, debug output will include a filename and
			
 
				-	  line to aid debugging.  This icnreases kernel size and slows
			
 
				-	  execution slightly when debug call sites are enabled (e.g.,
			
 
				-	  via CONFIG_DYNAMIC_DEBUG).
			
 
				-
			
 
				-	  If unsure, say N.
			
 
				-
			
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -8,15 +8,8 @@ obj-$(CONFIG_CEPH_FS) += ceph.o
 
				 
			
 
				 ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
			
 
				 	export.o caps.o snap.o xattr.o \
			
 
				-	messenger.o msgpool.o buffer.o pagelist.o \
			
 
				-	mds_client.o mdsmap.o \
			
 
				-	mon_client.o \
			
 
				-	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
			
 
				-	debugfs.o \
			
 
				-	auth.o auth_none.o \
			
 
				-	crypto.o armor.o \
			
 
				-	auth_x.o \
			
 
				-	ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o
			
 
				+	mds_client.o mdsmap.o strings.o ceph_frag.o \
			
 
				+	debugfs.o
			
 
				 
			
 
				 else
			
 
				 #Otherwise we were called directly from the command
			
--- a/fs/ceph/README
+++ b/fs/ceph/README
@@ -1,20 +0,0 @@
 
				-#
			
 
				-# The following files are shared by (and manually synchronized
			
 
				-# between) the Ceph userland and kernel client.
			
 
				-#
			
 
				-# userland                  kernel
			
 
				-src/include/ceph_fs.h	    fs/ceph/ceph_fs.h
			
 
				-src/include/ceph_fs.cc	    fs/ceph/ceph_fs.c
			
 
				-src/include/msgr.h	    fs/ceph/msgr.h
			
 
				-src/include/rados.h	    fs/ceph/rados.h
			
 
				-src/include/ceph_strings.cc fs/ceph/ceph_strings.c
			
 
				-src/include/ceph_frag.h	    fs/ceph/ceph_frag.h
			
 
				-src/include/ceph_frag.cc    fs/ceph/ceph_frag.c
			
 
				-src/include/ceph_hash.h	    fs/ceph/ceph_hash.h
			
 
				-src/include/ceph_hash.cc    fs/ceph/ceph_hash.c
			
 
				-src/crush/crush.c	    fs/ceph/crush/crush.c
			
 
				-src/crush/crush.h	    fs/ceph/crush/crush.h
			
 
				-src/crush/mapper.c	    fs/ceph/crush/mapper.c
			
 
				-src/crush/mapper.h	    fs/ceph/crush/mapper.h
			
 
				-src/crush/hash.h	    fs/ceph/crush/hash.h
			
 
				-src/crush/hash.c	    fs/ceph/crush/hash.c
			
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1,4 +1,4 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/backing-dev.h>
			
 
				 #include <linux/fs.h>
			
@@ -10,7 +10,8 @@
 
				 #include <linux/task_io_accounting_ops.h>
			
 
				 
			
 
				 #include "super.h"
			
 
				-#include "osd_client.h"
			
 
				+#include "mds_client.h"
			
 
				+#include <linux/ceph/osd_client.h>
			
 
				 
			
 
				 /*
			
 
				  * Ceph address space ops.
			
@@ -193,7 +194,8 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 
				 {
			
 
				 	struct inode *inode = filp->f_dentry->d_inode;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
			
 
				+	struct ceph_osd_client *osdc = 
			
 
				+		&ceph_inode_to_client(inode)->client->osdc;
			
 
				 	int err = 0;
			
 
				 	u64 len = PAGE_CACHE_SIZE;
			
 
				 
			
@@ -265,7 +267,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 
				 {
			
 
				 	struct inode *inode = file->f_dentry->d_inode;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc;
			
 
				+	struct ceph_osd_client *osdc =
			
 
				+		&ceph_inode_to_client(inode)->client->osdc;
			
 
				 	int rc = 0;
			
 
				 	struct page **pages;
			
 
				 	loff_t offset;
			
@@ -365,7 +368,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 
				 {
			
 
				 	struct inode *inode;
			
 
				 	struct ceph_inode_info *ci;
			
 
				-	struct ceph_client *client;
			
 
				+	struct ceph_fs_client *fsc;
			
 
				 	struct ceph_osd_client *osdc;
			
 
				 	loff_t page_off = page->index << PAGE_CACHE_SHIFT;
			
 
				 	int len = PAGE_CACHE_SIZE;
			
@@ -383,8 +386,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 
				 	}
			
 
				 	inode = page->mapping->host;
			
 
				 	ci = ceph_inode(inode);
			
 
				-	client = ceph_inode_to_client(inode);
			
 
				-	osdc = &client->osdc;
			
 
				+	fsc = ceph_inode_to_client(inode);
			
 
				+	osdc = &fsc->client->osdc;
			
 
				 
			
 
				 	/* verify this is a writeable snap context */
			
 
				 	snapc = (void *)page->private;
			
@@ -414,10 +417,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 
				 	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
			
 
				 	     inode, page, page->index, page_off, len, snapc);
			
 
				 
			
 
				-	writeback_stat = atomic_long_inc_return(&client->writeback_count);
			
 
				+	writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
			
 
				 	if (writeback_stat >
			
 
				-	    CONGESTION_ON_THRESH(client->mount_args->congestion_kb))
			
 
				-		set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC);
			
 
				+	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
			
 
				+		set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
			
 
				 
			
 
				 	set_page_writeback(page);
			
 
				 	err = ceph_osdc_writepages(osdc, ceph_vino(inode),
			
@@ -496,7 +499,7 @@ static void writepages_finish(struct ceph_osd_request *req,
 
				 	struct address_space *mapping = inode->i_mapping;
			
 
				 	__s32 rc = -EIO;
			
 
				 	u64 bytes = 0;
			
 
				-	struct ceph_client *client = ceph_inode_to_client(inode);
			
 
				+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
			
 
				 	long writeback_stat;
			
 
				 	unsigned issued = ceph_caps_issued(ci);
			
 
				 
			
@@ -529,10 +532,10 @@ static void writepages_finish(struct ceph_osd_request *req,
 
				 		WARN_ON(!PageUptodate(page));
			
 
				 
			
 
				 		writeback_stat =
			
 
				-			atomic_long_dec_return(&client->writeback_count);
			
 
				+			atomic_long_dec_return(&fsc->writeback_count);
			
 
				 		if (writeback_stat <
			
 
				-		    CONGESTION_OFF_THRESH(client->mount_args->congestion_kb))
			
 
				-			clear_bdi_congested(&client->backing_dev_info,
			
 
				+		    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
			
 
				+			clear_bdi_congested(&fsc->backing_dev_info,
			
 
				 					    BLK_RW_ASYNC);
			
 
				 
			
 
				 		ceph_put_snap_context((void *)page->private);
			
@@ -569,13 +572,13 @@ static void writepages_finish(struct ceph_osd_request *req,
 
				  * mempool.  we avoid the mempool if we can because req->r_num_pages
			
 
				  * may be less than the maximum write size.
			
 
				  */
			
 
				-static void alloc_page_vec(struct ceph_client *client,
			
 
				+static void alloc_page_vec(struct ceph_fs_client *fsc,
			
 
				 			   struct ceph_osd_request *req)
			
 
				 {
			
 
				 	req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages,
			
 
				 			       GFP_NOFS);
			
 
				 	if (!req->r_pages) {
			
 
				-		req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS);
			
 
				+		req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS);
			
 
				 		req->r_pages_from_pool = 1;
			
 
				 		WARN_ON(!req->r_pages);
			
 
				 	}
			
@@ -590,7 +593,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 
				 	struct inode *inode = mapping->host;
			
 
				 	struct backing_dev_info *bdi = mapping->backing_dev_info;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	struct ceph_client *client;
			
 
				+	struct ceph_fs_client *fsc;
			
 
				 	pgoff_t index, start, end;
			
 
				 	int range_whole = 0;
			
 
				 	int should_loop = 1;
			
@@ -617,13 +620,13 @@ static int ceph_writepages_start(struct address_space *mapping,
 
				 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
			
 
				 	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
			
 
				 
			
 
				-	client = ceph_inode_to_client(inode);
			
 
				-	if (client->mount_state == CEPH_MOUNT_SHUTDOWN) {
			
 
				+	fsc = ceph_inode_to_client(inode);
			
 
				+	if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
			
 
				 		pr_warning("writepage_start %p on forced umount\n", inode);
			
 
				 		return -EIO; /* we're in a forced umount, don't write! */
			
 
				 	}
			
 
				-	if (client->mount_args->wsize && client->mount_args->wsize < wsize)
			
 
				-		wsize = client->mount_args->wsize;
			
 
				+	if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
			
 
				+		wsize = fsc->mount_options->wsize;
			
 
				 	if (wsize < PAGE_CACHE_SIZE)
			
 
				 		wsize = PAGE_CACHE_SIZE;
			
 
				 	max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
			
@@ -769,7 +772,7 @@ get_more_pages:
 
				 				offset = (unsigned long long)page->index
			
 
				 					<< PAGE_CACHE_SHIFT;
			
 
				 				len = wsize;
			
 
				-				req = ceph_osdc_new_request(&client->osdc,
			
 
				+				req = ceph_osdc_new_request(&fsc->client->osdc,
			
 
				 					    &ci->i_layout,
			
 
				 					    ceph_vino(inode),
			
 
				 					    offset, &len,
			
@@ -782,7 +785,7 @@ get_more_pages:
 
				 					    &inode->i_mtime, true, 1);
			
 
				 				max_pages = req->r_num_pages;
			
 
				 
			
 
				-				alloc_page_vec(client, req);
			
 
				+				alloc_page_vec(fsc, req);
			
 
				 				req->r_callback = writepages_finish;
			
 
				 				req->r_inode = inode;
			
 
				 			}
			
@@ -794,10 +797,10 @@ get_more_pages:
 
				 			     inode, page, page->index);
			
 
				 
			
 
				 			writeback_stat =
			
 
				-			       atomic_long_inc_return(&client->writeback_count);
			
 
				+			       atomic_long_inc_return(&fsc->writeback_count);
			
 
				 			if (writeback_stat > CONGESTION_ON_THRESH(
			
 
				-				    client->mount_args->congestion_kb)) {
			
 
				-				set_bdi_congested(&client->backing_dev_info,
			
 
				+				    fsc->mount_options->congestion_kb)) {
			
 
				+				set_bdi_congested(&fsc->backing_dev_info,
			
 
				 						  BLK_RW_ASYNC);
			
 
				 			}
			
 
				 
			
@@ -846,7 +849,7 @@ get_more_pages:
 
				 		op->payload_len = cpu_to_le32(len);
			
 
				 		req->r_request->hdr.data_len = cpu_to_le32(len);
			
 
				 
			
 
				-		ceph_osdc_start_request(&client->osdc, req, true);
			
 
				+		ceph_osdc_start_request(&fsc->client->osdc, req, true);
			
 
				 		req = NULL;
			
 
				 
			
 
				 		/* continue? */
			
@@ -915,7 +918,7 @@ static int ceph_update_writeable_page(struct file *file,
 
				 {
			
 
				 	struct inode *inode = file->f_dentry->d_inode;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
			
 
				 	loff_t page_off = pos & PAGE_CACHE_MASK;
			
 
				 	int pos_in_page = pos & ~PAGE_CACHE_MASK;
			
 
				 	int end_in_page = pos_in_page + len;
			
@@ -1053,8 +1056,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 
				 			  struct page *page, void *fsdata)
			
 
				 {
			
 
				 	struct inode *inode = file->f_dentry->d_inode;
			
 
				-	struct ceph_client *client = ceph_inode_to_client(inode);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
			
 
				 	int check_cap = 0;
			
 
				 
			
@@ -1123,7 +1126,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 {
			
 
				 	struct inode *inode = vma->vm_file->f_dentry->d_inode;
			
 
				 	struct page *page = vmf->page;
			
 
				-	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
			
 
				 	loff_t off = page->index << PAGE_CACHE_SHIFT;
			
 
				 	loff_t size, len;
			
 
				 	int ret;
			
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1,4 +1,4 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/fs.h>
			
 
				 #include <linux/kernel.h>
			
@@ -9,8 +9,9 @@
 
				 #include <linux/writeback.h>
			
 
				 
			
 
				 #include "super.h"
			
 
				-#include "decode.h"
			
 
				-#include "messenger.h"
			
 
				+#include "mds_client.h"
			
 
				+#include <linux/ceph/decode.h>
			
 
				+#include <linux/ceph/messenger.h>
			
 
				 
			
 
				 /*
			
 
				  * Capability management
			
@@ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
 
				 	spin_unlock(&mdsc->caps_list_lock);
			
 
				 }
			
 
				 
			
 
				-void ceph_reservation_status(struct ceph_client *client,
			
 
				+void ceph_reservation_status(struct ceph_fs_client *fsc,
			
 
				 			     int *total, int *avail, int *used, int *reserved,
			
 
				 			     int *min)
			
 
				 {
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 
			
 
				 	if (total)
			
 
				 		*total = mdsc->caps_total_count;
			
@@ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
 
				 static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
			
 
				 			       struct ceph_inode_info *ci)
			
 
				 {
			
 
				-	struct ceph_mount_args *ma = mdsc->client->mount_args;
			
 
				+	struct ceph_mount_options *ma = mdsc->fsc->mount_options;
			
 
				 
			
 
				 	ci->i_hold_caps_min = round_jiffies(jiffies +
			
 
				 					    ma->caps_wanted_delay_min * HZ);
			
@@ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode,
 
				 		 unsigned seq, unsigned mseq, u64 realmino, int flags,
			
 
				 		 struct ceph_cap_reservation *caps_reservation)
			
 
				 {
			
 
				-	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				 	struct ceph_cap *new_cap = NULL;
			
 
				 	struct ceph_cap *cap;
			
@@ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap)
 
				 	struct ceph_mds_session *session = cap->session;
			
 
				 	struct ceph_inode_info *ci = cap->ci;
			
 
				 	struct ceph_mds_client *mdsc =
			
 
				-		&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
			
 
				+		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
			
 
				 	int removed = 0;
			
 
				 
			
 
				 	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
			
@@ -1210,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
 
				 	int mds;
			
 
				 	struct ceph_cap_snap *capsnap;
			
 
				 	u32 mseq;
			
 
				-	struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
			
 
				 	struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
			
 
				 						    session->s_mutex */
			
 
				 	u64 next_follows = 0;  /* keep track of how far we've gotten through the
			
@@ -1336,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
 
				 void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
			
 
				 {
			
 
				 	struct ceph_mds_client *mdsc =
			
 
				-		&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
			
 
				+		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
			
 
				 	struct inode *inode = &ci->vfs_inode;
			
 
				 	int was = ci->i_dirty_caps;
			
 
				 	int dirty = 0;
			
@@ -1378,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 
				 static int __mark_caps_flushing(struct inode *inode,
			
 
				 				 struct ceph_mds_session *session)
			
 
				 {
			
 
				-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				 	int flushing;
			
 
				 
			
@@ -1416,17 +1417,6 @@ static int __mark_caps_flushing(struct inode *inode,
 
				 /*
			
 
				  * try to invalidate mapping pages without blocking.
			
 
				  */
			
 
				-static int mapping_is_empty(struct address_space *mapping)
			
 
				-{
			
 
				-	struct page *page = find_get_page(mapping, 0);
			
 
				-
			
 
				-	if (!page)
			
 
				-		return 1;
			
 
				-
			
 
				-	put_page(page);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 static int try_nonblocking_invalidate(struct inode *inode)
			
 
				 {
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
@@ -1436,7 +1426,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
 
				 	invalidate_mapping_pages(&inode->i_data, 0, -1);
			
 
				 	spin_lock(&inode->i_lock);
			
 
				 
			
 
				-	if (mapping_is_empty(&inode->i_data) &&
			
 
				+	if (inode->i_data.nrpages == 0 &&
			
 
				 	    invalidating_gen == ci->i_rdcache_gen) {
			
 
				 		/* success. */
			
 
				 		dout("try_nonblocking_invalidate %p success\n", inode);
			
@@ -1462,8 +1452,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
 
				 void ceph_check_caps(struct ceph_inode_info *ci, int flags,
			
 
				 		     struct ceph_mds_session *session)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct inode *inode = &ci->vfs_inode;
			
 
				 	struct ceph_cap *cap;
			
 
				 	int file_wanted, used;
			
@@ -1533,7 +1523,7 @@ retry_locked:
 
				 	 */
			
 
				 	if ((!is_delayed || mdsc->stopping) &&
			
 
				 	    ci->i_wrbuffer_ref == 0 &&               /* no dirty pages... */
			
 
				-	    ci->i_rdcache_gen &&                     /* may have cached pages */
			
 
				+	    inode->i_data.nrpages &&                 /* have cached pages */
			
 
				 	    (file_wanted == 0 ||                     /* no open files */
			
 
				 	     (revoking & (CEPH_CAP_FILE_CACHE|
			
 
				 			  CEPH_CAP_FILE_LAZYIO))) && /*  or revoking cache */
			
@@ -1706,7 +1696,7 @@ ack:
 
				 static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
			
 
				 			  unsigned *flush_tid)
			
 
				 {
			
 
				-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				 	int unlock_session = session ? 0 : 1;
			
 
				 	int flushing = 0;
			
@@ -1872,7 +1862,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
 
				 				       caps_are_flushed(inode, flush_tid));
			
 
				 	} else {
			
 
				 		struct ceph_mds_client *mdsc =
			
 
				-			&ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				+			ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				 
			
 
				 		spin_lock(&inode->i_lock);
			
 
				 		if (__ceph_caps_dirty(ci))
			
@@ -2465,7 +2455,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 
				 	__releases(inode->i_lock)
			
 
				 {
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				 	unsigned seq = le32_to_cpu(m->seq);
			
 
				 	int dirty = le32_to_cpu(m->dirty);
			
 
				 	int cleaned = 0;
			
@@ -2713,7 +2703,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 
				 		      struct ceph_msg *msg)
			
 
				 {
			
 
				 	struct ceph_mds_client *mdsc = session->s_mdsc;
			
 
				-	struct super_block *sb = mdsc->client->sb;
			
 
				+	struct super_block *sb = mdsc->fsc->sb;
			
 
				 	struct inode *inode;
			
 
				 	struct ceph_cap *cap;
			
 
				 	struct ceph_mds_caps *h;
			
--- a/fs/ceph/ceph_frag.c
+++ b/fs/ceph/ceph_frag.c
@@ -1,7 +1,8 @@
 
				 /*
			
 
				  * Ceph 'frag' type
			
 
				  */
			
 
				-#include "types.h"
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/ceph/types.h>
			
 
				 
			
 
				 int ceph_frag_compare(__u32 a, __u32 b)
			
 
				 {
			
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -1,4 +1,4 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/device.h>
			
 
				 #include <linux/slab.h>
			
@@ -7,143 +7,49 @@
 
				 #include <linux/debugfs.h>
			
 
				 #include <linux/seq_file.h>
			
 
				 
			
 
				+#include <linux/ceph/libceph.h>
			
 
				+#include <linux/ceph/mon_client.h>
			
 
				+#include <linux/ceph/auth.h>
			
 
				+#include <linux/ceph/debugfs.h>
			
 
				+
			
 
				 #include "super.h"
			
 
				-#include "mds_client.h"
			
 
				-#include "mon_client.h"
			
 
				-#include "auth.h"
			
 
				 
			
 
				 #ifdef CONFIG_DEBUG_FS
			
 
				 
			
 
				-/*
			
 
				- * Implement /sys/kernel/debug/ceph fun
			
 
				- *
			
 
				- * /sys/kernel/debug/ceph/client*  - an instance of the ceph client
			
 
				- *      .../osdmap      - current osdmap
			
 
				- *      .../mdsmap      - current mdsmap
			
 
				- *      .../monmap      - current monmap
			
 
				- *      .../osdc        - active osd requests
			
 
				- *      .../mdsc        - active mds requests
			
 
				- *      .../monc        - mon client state
			
 
				- *      .../dentry_lru  - dump contents of dentry lru
			
 
				- *      .../caps        - expose cap (reservation) stats
			
 
				- *      .../bdi         - symlink to ../../bdi/something
			
 
				- */
			
 
				-
			
 
				-static struct dentry *ceph_debugfs_dir;
			
 
				-
			
 
				-static int monmap_show(struct seq_file *s, void *p)
			
 
				-{
			
 
				-	int i;
			
 
				-	struct ceph_client *client = s->private;
			
 
				-
			
 
				-	if (client->monc.monmap == NULL)
			
 
				-		return 0;
			
 
				-
			
 
				-	seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
			
 
				-	for (i = 0; i < client->monc.monmap->num_mon; i++) {
			
 
				-		struct ceph_entity_inst *inst =
			
 
				-			&client->monc.monmap->mon_inst[i];
			
 
				-
			
 
				-		seq_printf(s, "\t%s%lld\t%s\n",
			
 
				-			   ENTITY_NAME(inst->name),
			
 
				-			   pr_addr(&inst->addr.in_addr));
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				+#include "mds_client.h"
			
 
				 
			
 
				 static int mdsmap_show(struct seq_file *s, void *p)
			
 
				 {
			
 
				 	int i;
			
 
				-	struct ceph_client *client = s->private;
			
 
				+	struct ceph_fs_client *fsc = s->private;
			
 
				 
			
 
				-	if (client->mdsc.mdsmap == NULL)
			
 
				+	if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
			
 
				 		return 0;
			
 
				-	seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch);
			
 
				-	seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root);
			
 
				+	seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
			
 
				+	seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
			
 
				 	seq_printf(s, "session_timeout %d\n",
			
 
				-		       client->mdsc.mdsmap->m_session_timeout);
			
 
				+		       fsc->mdsc->mdsmap->m_session_timeout);
			
 
				 	seq_printf(s, "session_autoclose %d\n",
			
 
				-		       client->mdsc.mdsmap->m_session_autoclose);
			
 
				-	for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) {
			
 
				+		       fsc->mdsc->mdsmap->m_session_autoclose);
			
 
				+	for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
			
 
				 		struct ceph_entity_addr *addr =
			
 
				-			&client->mdsc.mdsmap->m_info[i].addr;
			
 
				-		int state = client->mdsc.mdsmap->m_info[i].state;
			
 
				+			&fsc->mdsc->mdsmap->m_info[i].addr;
			
 
				+		int state = fsc->mdsc->mdsmap->m_info[i].state;
			
 
				 
			
 
				-		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr),
			
 
				+		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
			
 
				+			       ceph_pr_addr(&addr->in_addr),
			
 
				 			       ceph_mds_state_name(state));
			
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int osdmap_show(struct seq_file *s, void *p)
			
 
				-{
			
 
				-	int i;
			
 
				-	struct ceph_client *client = s->private;
			
 
				-	struct rb_node *n;
			
 
				-
			
 
				-	if (client->osdc.osdmap == NULL)
			
 
				-		return 0;
			
 
				-	seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
			
 
				-	seq_printf(s, "flags%s%s\n",
			
 
				-		   (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
			
 
				-		   " NEARFULL" : "",
			
 
				-		   (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
			
 
				-		   " FULL" : "");
			
 
				-	for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
			
 
				-		struct ceph_pg_pool_info *pool =
			
 
				-			rb_entry(n, struct ceph_pg_pool_info, node);
			
 
				-		seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
			
 
				-			   pool->id, pool->v.pg_num, pool->pg_num_mask,
			
 
				-			   pool->v.lpg_num, pool->lpg_num_mask);
			
 
				-	}
			
 
				-	for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
			
 
				-		struct ceph_entity_addr *addr =
			
 
				-			&client->osdc.osdmap->osd_addr[i];
			
 
				-		int state = client->osdc.osdmap->osd_state[i];
			
 
				-		char sb[64];
			
 
				-
			
 
				-		seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
			
 
				-			   i, pr_addr(&addr->in_addr),
			
 
				-			   ((client->osdc.osdmap->osd_weight[i]*100) >> 16),
			
 
				-			   ceph_osdmap_state_str(sb, sizeof(sb), state));
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int monc_show(struct seq_file *s, void *p)
			
 
				-{
			
 
				-	struct ceph_client *client = s->private;
			
 
				-	struct ceph_mon_generic_request *req;
			
 
				-	struct ceph_mon_client *monc = &client->monc;
			
 
				-	struct rb_node *rp;
			
 
				-
			
 
				-	mutex_lock(&monc->mutex);
			
 
				-
			
 
				-	if (monc->have_mdsmap)
			
 
				-		seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
			
 
				-	if (monc->have_osdmap)
			
 
				-		seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
			
 
				-	if (monc->want_next_osdmap)
			
 
				-		seq_printf(s, "want next osdmap\n");
			
 
				-
			
 
				-	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
			
 
				-		__u16 op;
			
 
				-		req = rb_entry(rp, struct ceph_mon_generic_request, node);
			
 
				-		op = le16_to_cpu(req->request->hdr.type);
			
 
				-		if (op == CEPH_MSG_STATFS)
			
 
				-			seq_printf(s, "%lld statfs\n", req->tid);
			
 
				-		else
			
 
				-			seq_printf(s, "%lld unknown\n", req->tid);
			
 
				-	}
			
 
				-
			
 
				-	mutex_unlock(&monc->mutex);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				+/*
			
 
				+ * mdsc debugfs
			
 
				+ */
			
 
				 static int mdsc_show(struct seq_file *s, void *p)
			
 
				 {
			
 
				-	struct ceph_client *client = s->private;
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = s->private;
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	struct rb_node *rp;
			
 
				 	int pathlen;
			
@@ -214,61 +120,12 @@ static int mdsc_show(struct seq_file *s, void *p)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int osdc_show(struct seq_file *s, void *pp)
			
 
				-{
			
 
				-	struct ceph_client *client = s->private;
			
 
				-	struct ceph_osd_client *osdc = &client->osdc;
			
 
				-	struct rb_node *p;
			
 
				-
			
 
				-	mutex_lock(&osdc->request_mutex);
			
 
				-	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
			
 
				-		struct ceph_osd_request *req;
			
 
				-		struct ceph_osd_request_head *head;
			
 
				-		struct ceph_osd_op *op;
			
 
				-		int num_ops;
			
 
				-		int opcode, olen;
			
 
				-		int i;
			
 
				-
			
 
				-		req = rb_entry(p, struct ceph_osd_request, r_node);
			
 
				-
			
 
				-		seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
			
 
				-			   req->r_osd ? req->r_osd->o_osd : -1,
			
 
				-			   le32_to_cpu(req->r_pgid.pool),
			
 
				-			   le16_to_cpu(req->r_pgid.ps));
			
 
				-
			
 
				-		head = req->r_request->front.iov_base;
			
 
				-		op = (void *)(head + 1);
			
 
				-
			
 
				-		num_ops = le16_to_cpu(head->num_ops);
			
 
				-		olen = le32_to_cpu(head->object_len);
			
 
				-		seq_printf(s, "%.*s", olen,
			
 
				-			   (const char *)(head->ops + num_ops));
			
 
				-
			
 
				-		if (req->r_reassert_version.epoch)
			
 
				-			seq_printf(s, "\t%u'%llu",
			
 
				-			   (unsigned)le32_to_cpu(req->r_reassert_version.epoch),
			
 
				-			   le64_to_cpu(req->r_reassert_version.version));
			
 
				-		else
			
 
				-			seq_printf(s, "\t");
			
 
				-
			
 
				-		for (i = 0; i < num_ops; i++) {
			
 
				-			opcode = le16_to_cpu(op->op);
			
 
				-			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
			
 
				-			op++;
			
 
				-		}
			
 
				-
			
 
				-		seq_printf(s, "\n");
			
 
				-	}
			
 
				-	mutex_unlock(&osdc->request_mutex);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 static int caps_show(struct seq_file *s, void *p)
			
 
				 {
			
 
				-	struct ceph_client *client = s->private;
			
 
				+	struct ceph_fs_client *fsc = s->private;
			
 
				 	int total, avail, used, reserved, min;
			
 
				 
			
 
				-	ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
			
 
				+	ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
			
 
				 	seq_printf(s, "total\t\t%d\n"
			
 
				 		   "avail\t\t%d\n"
			
 
				 		   "used\t\t%d\n"
			
@@ -280,8 +137,8 @@ static int caps_show(struct seq_file *s, void *p)
 
				 
			
 
				 static int dentry_lru_show(struct seq_file *s, void *ptr)
			
 
				 {
			
 
				-	struct ceph_client *client = s->private;
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = s->private;
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_dentry_info *di;
			
 
				 
			
 
				 	spin_lock(&mdsc->dentry_lru_lock);
			
@@ -295,199 +152,124 @@ static int dentry_lru_show(struct seq_file *s, void *ptr)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-#define DEFINE_SHOW_FUNC(name)						\
			
 
				-static int name##_open(struct inode *inode, struct file *file)		\
			
 
				-{									\
			
 
				-	struct seq_file *sf;						\
			
 
				-	int ret;							\
			
 
				-									\
			
 
				-	ret = single_open(file, name, NULL);				\
			
 
				-	sf = file->private_data;					\
			
 
				-	sf->private = inode->i_private;					\
			
 
				-	return ret;							\
			
 
				-}									\
			
 
				-									\
			
 
				-static const struct file_operations name##_fops = {			\
			
 
				-	.open		= name##_open,					\
			
 
				-	.read		= seq_read,					\
			
 
				-	.llseek		= seq_lseek,					\
			
 
				-	.release	= single_release,				\
			
 
				-};
			
 
				-
			
 
				-DEFINE_SHOW_FUNC(monmap_show)
			
 
				-DEFINE_SHOW_FUNC(mdsmap_show)
			
 
				-DEFINE_SHOW_FUNC(osdmap_show)
			
 
				-DEFINE_SHOW_FUNC(monc_show)
			
 
				-DEFINE_SHOW_FUNC(mdsc_show)
			
 
				-DEFINE_SHOW_FUNC(osdc_show)
			
 
				-DEFINE_SHOW_FUNC(dentry_lru_show)
			
 
				-DEFINE_SHOW_FUNC(caps_show)
			
 
				+CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
			
 
				+CEPH_DEFINE_SHOW_FUNC(mdsc_show)
			
 
				+CEPH_DEFINE_SHOW_FUNC(caps_show)
			
 
				+CEPH_DEFINE_SHOW_FUNC(dentry_lru_show)
			
 
				+
			
 
				 
			
 
				+/*
			
 
				+ * debugfs
			
 
				+ */
			
 
				 static int congestion_kb_set(void *data, u64 val)
			
 
				 {
			
 
				-	struct ceph_client *client = (struct ceph_client *)data;
			
 
				-
			
 
				-	if (client)
			
 
				-		client->mount_args->congestion_kb = (int)val;
			
 
				+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
			
 
				 
			
 
				+	fsc->mount_options->congestion_kb = (int)val;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				 static int congestion_kb_get(void *data, u64 *val)
			
 
				 {
			
 
				-	struct ceph_client *client = (struct ceph_client *)data;
			
 
				-
			
 
				-	if (client)
			
 
				-		*val = (u64)client->mount_args->congestion_kb;
			
 
				+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
			
 
				 
			
 
				+	*val = (u64)fsc->mount_options->congestion_kb;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
			
 
				 			congestion_kb_set, "%llu\n");
			
 
				 
			
 
				-int __init ceph_debugfs_init(void)
			
 
				-{
			
 
				-	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
			
 
				-	if (!ceph_debugfs_dir)
			
 
				-		return -ENOMEM;
			
 
				-	return 0;
			
 
				-}
			
 
				 
			
 
				-void ceph_debugfs_cleanup(void)
			
 
				+void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
			
 
				 {
			
 
				-	debugfs_remove(ceph_debugfs_dir);
			
 
				+	dout("ceph_fs_debugfs_cleanup\n");
			
 
				+	debugfs_remove(fsc->debugfs_bdi);
			
 
				+	debugfs_remove(fsc->debugfs_congestion_kb);
			
 
				+	debugfs_remove(fsc->debugfs_mdsmap);
			
 
				+	debugfs_remove(fsc->debugfs_caps);
			
 
				+	debugfs_remove(fsc->debugfs_mdsc);
			
 
				+	debugfs_remove(fsc->debugfs_dentry_lru);
			
 
				 }
			
 
				 
			
 
				-int ceph_debugfs_client_init(struct ceph_client *client)
			
 
				+int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
			
 
				 {
			
 
				-	int ret = 0;
			
 
				-	char name[80];
			
 
				-
			
 
				-	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
			
 
				-		 client->monc.auth->global_id);
			
 
				+	char name[100];
			
 
				+	int err = -ENOMEM;
			
 
				 
			
 
				-	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
			
 
				-	if (!client->debugfs_dir)
			
 
				-		goto out;
			
 
				-
			
 
				-	client->monc.debugfs_file = debugfs_create_file("monc",
			
 
				-						      0600,
			
 
				-						      client->debugfs_dir,
			
 
				-						      client,
			
 
				-						      &monc_show_fops);
			
 
				-	if (!client->monc.debugfs_file)
			
 
				+	dout("ceph_fs_debugfs_init\n");
			
 
				+	fsc->debugfs_congestion_kb =
			
 
				+		debugfs_create_file("writeback_congestion_kb",
			
 
				+				    0600,
			
 
				+				    fsc->client->debugfs_dir,
			
 
				+				    fsc,
			
 
				+				    &congestion_kb_fops);
			
 
				+	if (!fsc->debugfs_congestion_kb)
			
 
				 		goto out;
			
 
				 
			
 
				-	client->mdsc.debugfs_file = debugfs_create_file("mdsc",
			
 
				-						      0600,
			
 
				-						      client->debugfs_dir,
			
 
				-						      client,
			
 
				-						      &mdsc_show_fops);
			
 
				-	if (!client->mdsc.debugfs_file)
			
 
				-		goto out;
			
 
				+	dout("a\n");
			
 
				 
			
 
				-	client->osdc.debugfs_file = debugfs_create_file("osdc",
			
 
				-						      0600,
			
 
				-						      client->debugfs_dir,
			
 
				-						      client,
			
 
				-						      &osdc_show_fops);
			
 
				-	if (!client->osdc.debugfs_file)
			
 
				+	snprintf(name, sizeof(name), "../../bdi/%s",
			
 
				+		 dev_name(fsc->backing_dev_info.dev));
			
 
				+	fsc->debugfs_bdi =
			
 
				+		debugfs_create_symlink("bdi",
			
 
				+				       fsc->client->debugfs_dir,
			
 
				+				       name);
			
 
				+	if (!fsc->debugfs_bdi)
			
 
				 		goto out;
			
 
				 
			
 
				-	client->debugfs_monmap = debugfs_create_file("monmap",
			
 
				+	dout("b\n");
			
 
				+	fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
			
 
				 					0600,
			
 
				-					client->debugfs_dir,
			
 
				-					client,
			
 
				-					&monmap_show_fops);
			
 
				-	if (!client->debugfs_monmap)
			
 
				-		goto out;
			
 
				-
			
 
				-	client->debugfs_mdsmap = debugfs_create_file("mdsmap",
			
 
				-					0600,
			
 
				-					client->debugfs_dir,
			
 
				-					client,
			
 
				+					fsc->client->debugfs_dir,
			
 
				+					fsc,
			
 
				 					&mdsmap_show_fops);
			
 
				-	if (!client->debugfs_mdsmap)
			
 
				-		goto out;
			
 
				-
			
 
				-	client->debugfs_osdmap = debugfs_create_file("osdmap",
			
 
				-					0600,
			
 
				-					client->debugfs_dir,
			
 
				-					client,
			
 
				-					&osdmap_show_fops);
			
 
				-	if (!client->debugfs_osdmap)
			
 
				+	if (!fsc->debugfs_mdsmap)
			
 
				 		goto out;
			
 
				 
			
 
				-	client->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
			
 
				-					0600,
			
 
				-					client->debugfs_dir,
			
 
				-					client,
			
 
				-					&dentry_lru_show_fops);
			
 
				-	if (!client->debugfs_dentry_lru)
			
 
				+	dout("ca\n");
			
 
				+	fsc->debugfs_mdsc = debugfs_create_file("mdsc",
			
 
				+						0600,
			
 
				+						fsc->client->debugfs_dir,
			
 
				+						fsc,
			
 
				+						&mdsc_show_fops);
			
 
				+	if (!fsc->debugfs_mdsc)
			
 
				 		goto out;
			
 
				 
			
 
				-	client->debugfs_caps = debugfs_create_file("caps",
			
 
				+	dout("da\n");
			
 
				+	fsc->debugfs_caps = debugfs_create_file("caps",
			
 
				 						   0400,
			
 
				-						   client->debugfs_dir,
			
 
				-						   client,
			
 
				+						   fsc->client->debugfs_dir,
			
 
				+						   fsc,
			
 
				 						   &caps_show_fops);
			
 
				-	if (!client->debugfs_caps)
			
 
				+	if (!fsc->debugfs_caps)
			
 
				 		goto out;
			
 
				 
			
 
				-	client->debugfs_congestion_kb =
			
 
				-		debugfs_create_file("writeback_congestion_kb",
			
 
				-				    0600,
			
 
				-				    client->debugfs_dir,
			
 
				-				    client,
			
 
				-				    &congestion_kb_fops);
			
 
				-	if (!client->debugfs_congestion_kb)
			
 
				+	dout("ea\n");
			
 
				+	fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
			
 
				+					0600,
			
 
				+					fsc->client->debugfs_dir,
			
 
				+					fsc,
			
 
				+					&dentry_lru_show_fops);
			
 
				+	if (!fsc->debugfs_dentry_lru)
			
 
				 		goto out;
			
 
				 
			
 
				-	sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev));
			
 
				-	client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir,
			
 
				-						     name);
			
 
				-
			
 
				 	return 0;
			
 
				 
			
 
				 out:
			
 
				-	ceph_debugfs_client_cleanup(client);
			
 
				-	return ret;
			
 
				+	ceph_fs_debugfs_cleanup(fsc);
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				-void ceph_debugfs_client_cleanup(struct ceph_client *client)
			
 
				-{
			
 
				-	debugfs_remove(client->debugfs_bdi);
			
 
				-	debugfs_remove(client->debugfs_caps);
			
 
				-	debugfs_remove(client->debugfs_dentry_lru);
			
 
				-	debugfs_remove(client->debugfs_osdmap);
			
 
				-	debugfs_remove(client->debugfs_mdsmap);
			
 
				-	debugfs_remove(client->debugfs_monmap);
			
 
				-	debugfs_remove(client->osdc.debugfs_file);
			
 
				-	debugfs_remove(client->mdsc.debugfs_file);
			
 
				-	debugfs_remove(client->monc.debugfs_file);
			
 
				-	debugfs_remove(client->debugfs_congestion_kb);
			
 
				-	debugfs_remove(client->debugfs_dir);
			
 
				-}
			
 
				 
			
 
				 #else  /* CONFIG_DEBUG_FS */
			
 
				 
			
 
				-int __init ceph_debugfs_init(void)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void ceph_debugfs_cleanup(void)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-int ceph_debugfs_client_init(struct ceph_client *client)
			
 
				+int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
			
 
				 {
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-void ceph_debugfs_client_cleanup(struct ceph_client *client)
			
 
				+void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
			
 
				 {
			
 
				 }
			
 
				 
			
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1,4 +1,4 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/spinlock.h>
			
 
				 #include <linux/fs_struct.h>
			
@@ -7,6 +7,7 @@
 
				 #include <linux/sched.h>
			
 
				 
			
 
				 #include "super.h"
			
 
				+#include "mds_client.h"
			
 
				 
			
 
				 /*
			
 
				  * Directory operations: readdir, lookup, create, link, unlink,
			
@@ -94,10 +95,7 @@ static unsigned fpos_off(loff_t p)
 
				  */
			
 
				 static int __dcache_readdir(struct file *filp,
			
 
				 			    void *dirent, filldir_t filldir)
			
 
				-		__releases(inode->i_lock)
			
 
				-		__acquires(inode->i_lock)
			
 
				 {
			
 
				-	struct inode *inode = filp->f_dentry->d_inode;
			
 
				 	struct ceph_file_info *fi = filp->private_data;
			
 
				 	struct dentry *parent = filp->f_dentry;
			
 
				 	struct inode *dir = parent->d_inode;
			
@@ -153,7 +151,6 @@ more:
 
				 
			
 
				 	atomic_inc(&dentry->d_count);
			
 
				 	spin_unlock(&dcache_lock);
			
 
				-	spin_unlock(&inode->i_lock);
			
 
				 
			
 
				 	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
			
 
				 	     dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
			
@@ -171,35 +168,30 @@ more:
 
				 		} else {
			
 
				 			dput(last);
			
 
				 		}
			
 
				-		last = NULL;
			
 
				 	}
			
 
				-
			
 
				-	spin_lock(&inode->i_lock);
			
 
				-	spin_lock(&dcache_lock);
			
 
				-
			
 
				 	last = dentry;
			
 
				 
			
 
				 	if (err < 0)
			
 
				-		goto out_unlock;
			
 
				+		goto out;
			
 
				 
			
 
				-	p = p->prev;
			
 
				 	filp->f_pos++;
			
 
				 
			
 
				 	/* make sure a dentry wasn't dropped while we didn't have dcache_lock */
			
 
				-	if ((ceph_inode(dir)->i_ceph_flags & CEPH_I_COMPLETE))
			
 
				-		goto more;
			
 
				-	dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
			
 
				-	err = -EAGAIN;
			
 
				+	if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
			
 
				+		dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
			
 
				+		err = -EAGAIN;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&dcache_lock);
			
 
				+	p = p->prev;	/* advance to next dentry */
			
 
				+	goto more;
			
 
				 
			
 
				 out_unlock:
			
 
				 	spin_unlock(&dcache_lock);
			
 
				-
			
 
				-	if (last) {
			
 
				-		spin_unlock(&inode->i_lock);
			
 
				+out:
			
 
				+	if (last)
			
 
				 		dput(last);
			
 
				-		spin_lock(&inode->i_lock);
			
 
				-	}
			
 
				-
			
 
				 	return err;
			
 
				 }
			
 
				 
			
@@ -227,15 +219,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
				 	struct ceph_file_info *fi = filp->private_data;
			
 
				 	struct inode *inode = filp->f_dentry->d_inode;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	struct ceph_client *client = ceph_inode_to_client(inode);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	unsigned frag = fpos_frag(filp->f_pos);
			
 
				 	int off = fpos_off(filp->f_pos);
			
 
				 	int err;
			
 
				 	u32 ftype;
			
 
				 	struct ceph_mds_reply_info_parsed *rinfo;
			
 
				-	const int max_entries = client->mount_args->max_readdir;
			
 
				-	const int max_bytes = client->mount_args->max_readdir_bytes;
			
 
				+	const int max_entries = fsc->mount_options->max_readdir;
			
 
				+	const int max_bytes = fsc->mount_options->max_readdir_bytes;
			
 
				 
			
 
				 	dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
			
 
				 	if (fi->at_end)
			
@@ -267,17 +259,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
				 	/* can we use the dcache? */
			
 
				 	spin_lock(&inode->i_lock);
			
 
				 	if ((filp->f_pos == 2 || fi->dentry) &&
			
 
				-	    !ceph_test_opt(client, NOASYNCREADDIR) &&
			
 
				+	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
			
 
				 	    ceph_snap(inode) != CEPH_SNAPDIR &&
			
 
				 	    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
			
 
				 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
			
 
				+		spin_unlock(&inode->i_lock);
			
 
				 		err = __dcache_readdir(filp, dirent, filldir);
			
 
				-		if (err != -EAGAIN) {
			
 
				-			spin_unlock(&inode->i_lock);
			
 
				+		if (err != -EAGAIN)
			
 
				 			return err;
			
 
				-		}
			
 
				+	} else {
			
 
				+		spin_unlock(&inode->i_lock);
			
 
				 	}
			
 
				-	spin_unlock(&inode->i_lock);
			
 
				 	if (fi->dentry) {
			
 
				 		err = note_last_dentry(fi, fi->dentry->d_name.name,
			
 
				 				       fi->dentry->d_name.len);
			
@@ -487,14 +479,13 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
 
				 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
			
 
				 				  struct dentry *dentry, int err)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
			
 
				 	struct inode *parent = dentry->d_parent->d_inode;
			
 
				 
			
 
				 	/* .snap dir? */
			
 
				 	if (err == -ENOENT &&
			
 
				-	    ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */
			
 
				 	    strcmp(dentry->d_name.name,
			
 
				-		   client->mount_args->snapdir_name) == 0) {
			
 
				+		   fsc->mount_options->snapdir_name) == 0) {
			
 
				 		struct inode *inode = ceph_get_snapdir(parent);
			
 
				 		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
			
 
				 		     dentry, dentry->d_name.len, dentry->d_name.name, inode);
			
@@ -539,8 +530,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
 
				 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
			
 
				 				  struct nameidata *nd)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	int op;
			
 
				 	int err;
			
@@ -572,7 +563,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 
				 		spin_lock(&dir->i_lock);
			
 
				 		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
			
 
				 		if (strncmp(dentry->d_name.name,
			
 
				-			    client->mount_args->snapdir_name,
			
 
				+			    fsc->mount_options->snapdir_name,
			
 
				 			    dentry->d_name.len) &&
			
 
				 		    !is_root_ceph_dentry(dir, dentry) &&
			
 
				 		    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
			
@@ -629,8 +620,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
 
				 static int ceph_mknod(struct inode *dir, struct dentry *dentry,
			
 
				 		      int mode, dev_t rdev)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	int err;
			
 
				 
			
@@ -685,8 +676,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode,
 
				 static int ceph_symlink(struct inode *dir, struct dentry *dentry,
			
 
				 			    const char *dest)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	int err;
			
 
				 
			
@@ -716,8 +707,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 
				 
			
 
				 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	int err = -EROFS;
			
 
				 	int op;
			
@@ -758,8 +749,8 @@ out:
 
				 static int ceph_link(struct dentry *old_dentry, struct inode *dir,
			
 
				 		     struct dentry *dentry)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	int err;
			
 
				 
			
@@ -813,8 +804,8 @@ static int drop_caps_for_unlink(struct inode *inode)
 
				  */
			
 
				 static int ceph_unlink(struct inode *dir, struct dentry *dentry)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct inode *inode = dentry->d_inode;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	int err = -EROFS;
			
@@ -854,8 +845,8 @@ out:
 
				 static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
			
 
				 		       struct inode *new_dir, struct dentry *new_dentry)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	int err;
			
 
				 
			
@@ -1076,7 +1067,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				 	int left;
			
 
				 
			
 
				-	if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
			
 
				+	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
			
 
				 		return -EISDIR;
			
 
				 
			
 
				 	if (!cf->dir_info) {
			
@@ -1177,7 +1168,7 @@ void ceph_dentry_lru_add(struct dentry *dn)
 
				 	dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
			
 
				 	     dn->d_name.len, dn->d_name.name);
			
 
				 	if (di) {
			
 
				-		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
			
 
				+		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
			
 
				 		spin_lock(&mdsc->dentry_lru_lock);
			
 
				 		list_add_tail(&di->lru, &mdsc->dentry_lru);
			
 
				 		mdsc->num_dentry++;
			
@@ -1193,7 +1184,7 @@ void ceph_dentry_lru_touch(struct dentry *dn)
 
				 	dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
			
 
				 	     dn->d_name.len, dn->d_name.name, di->offset);
			
 
				 	if (di) {
			
 
				-		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
			
 
				+		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
			
 
				 		spin_lock(&mdsc->dentry_lru_lock);
			
 
				 		list_move_tail(&di->lru, &mdsc->dentry_lru);
			
 
				 		spin_unlock(&mdsc->dentry_lru_lock);
			
@@ -1208,7 +1199,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
 
				 	dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
			
 
				 	     dn->d_name.len, dn->d_name.name);
			
 
				 	if (di) {
			
 
				-		mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
			
 
				+		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
			
 
				 		spin_lock(&mdsc->dentry_lru_lock);
			
 
				 		list_del_init(&di->lru);
			
 
				 		mdsc->num_dentry--;
			
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -1,10 +1,11 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/exportfs.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <asm/unaligned.h>
			
 
				 
			
 
				 #include "super.h"
			
 
				+#include "mds_client.h"
			
 
				 
			
 
				 /*
			
 
				  * NFS export support
			
@@ -120,7 +121,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
 
				 static struct dentry *__cfh_to_dentry(struct super_block *sb,
			
 
				 				      struct ceph_nfs_confh *cfh)
			
 
				 {
			
 
				-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
			
 
				 	struct inode *inode;
			
 
				 	struct dentry *dentry;
			
 
				 	struct ceph_vino vino;
			
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1,5 +1,6 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				+#include <linux/module.h>
			
 
				 #include <linux/sched.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/file.h>
			
@@ -38,8 +39,8 @@
 
				 static struct ceph_mds_request *
			
 
				 prepare_open_request(struct super_block *sb, int flags, int create_mode)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	int want_auth = USE_ANY_MDS;
			
 
				 	int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
			
@@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 
				 int ceph_open(struct inode *inode, struct file *file)
			
 
				 {
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	struct ceph_file_info *cf = file->private_data;
			
 
				 	struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
			
@@ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
 
				 				struct nameidata *nd, int mode,
			
 
				 				int locked_dir)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct file *file = nd->intent.open.file;
			
 
				 	struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
			
 
				 	struct ceph_mds_request *req;
			
@@ -269,163 +270,6 @@ int ceph_release(struct inode *inode, struct file *file)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * build a vector of user pages
			
 
				- */
			
 
				-static struct page **get_direct_page_vector(const char __user *data,
			
 
				-					    int num_pages,
			
 
				-					    loff_t off, size_t len)
			
 
				-{
			
 
				-	struct page **pages;
			
 
				-	int rc;
			
 
				-
			
 
				-	pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
			
 
				-	if (!pages)
			
 
				-		return ERR_PTR(-ENOMEM);
			
 
				-
			
 
				-	down_read(&current->mm->mmap_sem);
			
 
				-	rc = get_user_pages(current, current->mm, (unsigned long)data,
			
 
				-			    num_pages, 0, 0, pages, NULL);
			
 
				-	up_read(&current->mm->mmap_sem);
			
 
				-	if (rc < 0)
			
 
				-		goto fail;
			
 
				-	return pages;
			
 
				-
			
 
				-fail:
			
 
				-	kfree(pages);
			
 
				-	return ERR_PTR(rc);
			
 
				-}
			
 
				-
			
 
				-static void put_page_vector(struct page **pages, int num_pages)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < num_pages; i++)
			
 
				-		put_page(pages[i]);
			
 
				-	kfree(pages);
			
 
				-}
			
 
				-
			
 
				-void ceph_release_page_vector(struct page **pages, int num_pages)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < num_pages; i++)
			
 
				-		__free_pages(pages[i], 0);
			
 
				-	kfree(pages);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * allocate a vector new pages
			
 
				- */
			
 
				-static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
			
 
				-{
			
 
				-	struct page **pages;
			
 
				-	int i;
			
 
				-
			
 
				-	pages = kmalloc(sizeof(*pages) * num_pages, flags);
			
 
				-	if (!pages)
			
 
				-		return ERR_PTR(-ENOMEM);
			
 
				-	for (i = 0; i < num_pages; i++) {
			
 
				-		pages[i] = __page_cache_alloc(flags);
			
 
				-		if (pages[i] == NULL) {
			
 
				-			ceph_release_page_vector(pages, i);
			
 
				-			return ERR_PTR(-ENOMEM);
			
 
				-		}
			
 
				-	}
			
 
				-	return pages;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * copy user data into a page vector
			
 
				- */
			
 
				-static int copy_user_to_page_vector(struct page **pages,
			
 
				-				    const char __user *data,
			
 
				-				    loff_t off, size_t len)
			
 
				-{
			
 
				-	int i = 0;
			
 
				-	int po = off & ~PAGE_CACHE_MASK;
			
 
				-	int left = len;
			
 
				-	int l, bad;
			
 
				-
			
 
				-	while (left > 0) {
			
 
				-		l = min_t(int, PAGE_CACHE_SIZE-po, left);
			
 
				-		bad = copy_from_user(page_address(pages[i]) + po, data, l);
			
 
				-		if (bad == l)
			
 
				-			return -EFAULT;
			
 
				-		data += l - bad;
			
 
				-		left -= l - bad;
			
 
				-		po += l - bad;
			
 
				-		if (po == PAGE_CACHE_SIZE) {
			
 
				-			po = 0;
			
 
				-			i++;
			
 
				-		}
			
 
				-	}
			
 
				-	return len;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * copy user data from a page vector into a user pointer
			
 
				- */
			
 
				-static int copy_page_vector_to_user(struct page **pages, char __user *data,
			
 
				-				    loff_t off, size_t len)
			
 
				-{
			
 
				-	int i = 0;
			
 
				-	int po = off & ~PAGE_CACHE_MASK;
			
 
				-	int left = len;
			
 
				-	int l, bad;
			
 
				-
			
 
				-	while (left > 0) {
			
 
				-		l = min_t(int, left, PAGE_CACHE_SIZE-po);
			
 
				-		bad = copy_to_user(data, page_address(pages[i]) + po, l);
			
 
				-		if (bad == l)
			
 
				-			return -EFAULT;
			
 
				-		data += l - bad;
			
 
				-		left -= l - bad;
			
 
				-		if (po) {
			
 
				-			po += l - bad;
			
 
				-			if (po == PAGE_CACHE_SIZE)
			
 
				-				po = 0;
			
 
				-		}
			
 
				-		i++;
			
 
				-	}
			
 
				-	return len;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Zero an extent within a page vector.  Offset is relative to the
			
 
				- * start of the first page.
			
 
				- */
			
 
				-static void zero_page_vector_range(int off, int len, struct page **pages)
			
 
				-{
			
 
				-	int i = off >> PAGE_CACHE_SHIFT;
			
 
				-
			
 
				-	off &= ~PAGE_CACHE_MASK;
			
 
				-
			
 
				-	dout("zero_page_vector_page %u~%u\n", off, len);
			
 
				-
			
 
				-	/* leading partial page? */
			
 
				-	if (off) {
			
 
				-		int end = min((int)PAGE_CACHE_SIZE, off + len);
			
 
				-		dout("zeroing %d %p head from %d\n", i, pages[i],
			
 
				-		     (int)off);
			
 
				-		zero_user_segment(pages[i], off, end);
			
 
				-		len -= (end - off);
			
 
				-		i++;
			
 
				-	}
			
 
				-	while (len >= PAGE_CACHE_SIZE) {
			
 
				-		dout("zeroing %d %p len=%d\n", i, pages[i], len);
			
 
				-		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
			
 
				-		len -= PAGE_CACHE_SIZE;
			
 
				-		i++;
			
 
				-	}
			
 
				-	/* trailing partial page? */
			
 
				-	if (len) {
			
 
				-		dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
			
 
				-		zero_user_segment(pages[i], 0, len);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-
			
 
				 /*
			
 
				  * Read a range of bytes striped over one or more objects.  Iterate over
			
 
				  * objects we stripe over.  (That's not atomic, but good enough for now.)
			
@@ -438,7 +282,7 @@ static int striped_read(struct inode *inode,
 
				 			struct page **pages, int num_pages,
			
 
				 			int *checkeof)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_inode_to_client(inode);
			
 
				+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				 	u64 pos, this_len;
			
 
				 	int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
			
@@ -459,7 +303,7 @@ static int striped_read(struct inode *inode,
 
				 
			
 
				 more:
			
 
				 	this_len = left;
			
 
				-	ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode),
			
 
				+	ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
			
 
				 				  &ci->i_layout, pos, &this_len,
			
 
				 				  ci->i_truncate_seq,
			
 
				 				  ci->i_truncate_size,
			
@@ -477,8 +321,8 @@ more:
 
				 
			
 
				 		if (read < pos - off) {
			
 
				 			dout(" zero gap %llu to %llu\n", off + read, pos);
			
 
				-			zero_page_vector_range(page_off + read,
			
 
				-					       pos - off - read, pages);
			
 
				+			ceph_zero_page_vector_range(page_off + read,
			
 
				+						    pos - off - read, pages);
			
 
				 		}
			
 
				 		pos += ret;
			
 
				 		read = pos - off;
			
@@ -495,8 +339,8 @@ more:
 
				 		/* was original extent fully inside i_size? */
			
 
				 		if (pos + left <= inode->i_size) {
			
 
				 			dout("zero tail\n");
			
 
				-			zero_page_vector_range(page_off + read, len - read,
			
 
				-					       pages);
			
 
				+			ceph_zero_page_vector_range(page_off + read, len - read,
			
 
				+						    pages);
			
 
				 			read = len;
			
 
				 			goto out;
			
 
				 		}
			
@@ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 
				 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
			
 
				 
			
 
				 	if (file->f_flags & O_DIRECT) {
			
 
				-		pages = get_direct_page_vector(data, num_pages, off, len);
			
 
				+		pages = ceph_get_direct_page_vector(data, num_pages, off, len);
			
 
				 
			
 
				 		/*
			
 
				 		 * flush any page cache pages in this range.  this
			
@@ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 
				 	ret = striped_read(inode, off, len, pages, num_pages, checkeof);
			
 
				 
			
 
				 	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
			
 
				-		ret = copy_page_vector_to_user(pages, data, off, ret);
			
 
				+		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
			
 
				 	if (ret >= 0)
			
 
				 		*poff = off + ret;
			
 
				 
			
 
				 done:
			
 
				 	if (file->f_flags & O_DIRECT)
			
 
				-		put_page_vector(pages, num_pages);
			
 
				+		ceph_put_page_vector(pages, num_pages);
			
 
				 	else
			
 
				 		ceph_release_page_vector(pages, num_pages);
			
 
				 	dout("sync_read result %d\n", ret);
			
@@ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 
				 {
			
 
				 	struct inode *inode = file->f_dentry->d_inode;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	struct ceph_client *client = ceph_inode_to_client(inode);
			
 
				+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
			
 
				 	struct ceph_osd_request *req;
			
 
				 	struct page **pages;
			
 
				 	int num_pages;
			
@@ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 
				 	 */
			
 
				 more:
			
 
				 	len = left;
			
 
				-	req = ceph_osdc_new_request(&client->osdc, &ci->i_layout,
			
 
				+	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
			
 
				 				    ceph_vino(inode), pos, &len,
			
 
				 				    CEPH_OSD_OP_WRITE, flags,
			
 
				 				    ci->i_snap_realm->cached_context,
			
@@ -655,7 +499,7 @@ more:
 
				 	num_pages = calc_pages_for(pos, len);
			
 
				 
			
 
				 	if (file->f_flags & O_DIRECT) {
			
 
				-		pages = get_direct_page_vector(data, num_pages, pos, len);
			
 
				+		pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
			
 
				 		if (IS_ERR(pages)) {
			
 
				 			ret = PTR_ERR(pages);
			
 
				 			goto out;
			
@@ -673,7 +517,7 @@ more:
 
				 			ret = PTR_ERR(pages);
			
 
				 			goto out;
			
 
				 		}
			
 
				-		ret = copy_user_to_page_vector(pages, data, pos, len);
			
 
				+		ret = ceph_copy_user_to_page_vector(pages, data, pos, len);
			
 
				 		if (ret < 0) {
			
 
				 			ceph_release_page_vector(pages, num_pages);
			
 
				 			goto out;
			
@@ -689,7 +533,7 @@ more:
 
				 	req->r_num_pages = num_pages;
			
 
				 	req->r_inode = inode;
			
 
				 
			
 
				-	ret = ceph_osdc_start_request(&client->osdc, req, false);
			
 
				+	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
			
 
				 	if (!ret) {
			
 
				 		if (req->r_safe_callback) {
			
 
				 			/*
			
@@ -701,11 +545,11 @@ more:
 
				 			spin_unlock(&ci->i_unsafe_lock);
			
 
				 			ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
			
 
				 		}
			
 
				-		ret = ceph_osdc_wait_request(&client->osdc, req);
			
 
				+		ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
			
 
				 	}
			
 
				 
			
 
				 	if (file->f_flags & O_DIRECT)
			
 
				-		put_page_vector(pages, num_pages);
			
 
				+		ceph_put_page_vector(pages, num_pages);
			
 
				 	else if (file->f_flags & O_SYNC)
			
 
				 		ceph_release_page_vector(pages, num_pages);
			
 
				 
			
@@ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
				 	struct ceph_file_info *fi = file->private_data;
			
 
				 	struct inode *inode = file->f_dentry->d_inode;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
			
 
				+	struct ceph_osd_client *osdc =
			
 
				+		&ceph_sb_to_client(inode->i_sb)->client->osdc;
			
 
				 	loff_t endoff = pos + iov->iov_len;
			
 
				 	int want, got = 0;
			
 
				 	int ret, err;
			
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1,4 +1,4 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/fs.h>
			
@@ -13,7 +13,8 @@
 
				 #include <linux/pagevec.h>
			
 
				 
			
 
				 #include "super.h"
			
 
				-#include "decode.h"
			
 
				+#include "mds_client.h"
			
 
				+#include <linux/ceph/decode.h>
			
 
				 
			
 
				 /*
			
 
				  * Ceph inode operations
			
@@ -384,7 +385,7 @@ void ceph_destroy_inode(struct inode *inode)
 
				 	 */
			
 
				 	if (ci->i_snap_realm) {
			
 
				 		struct ceph_mds_client *mdsc =
			
 
				-			&ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
			
 
				+			ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
			
 
				 		struct ceph_snap_realm *realm = ci->i_snap_realm;
			
 
				 
			
 
				 		dout(" dropping residual ref to snap realm %p\n", realm);
			
@@ -685,7 +686,7 @@ static int fill_inode(struct inode *inode,
 
				 		}
			
 
				 
			
 
				 		/* it may be better to set st_size in getattr instead? */
			
 
				-		if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
			
 
				+		if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
			
 
				 			inode->i_size = ci->i_rbytes;
			
 
				 		break;
			
 
				 	default:
			
@@ -901,7 +902,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 
				 	struct inode *in = NULL;
			
 
				 	struct ceph_mds_reply_inode *ininfo;
			
 
				 	struct ceph_vino vino;
			
 
				-	struct ceph_client *client = ceph_sb_to_client(sb);
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
			
 
				 	int i = 0;
			
 
				 	int err = 0;
			
 
				 
			
@@ -965,7 +966,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 
				 	 */
			
 
				 	if (rinfo->head->is_dentry && !req->r_aborted &&
			
 
				 	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
			
 
				-					       client->mount_args->snapdir_name,
			
 
				+					       fsc->mount_options->snapdir_name,
			
 
				 					       req->r_dentry->d_name.len))) {
			
 
				 		/*
			
 
				 		 * lookup link rename   : null -> possibly existing inode
			
@@ -1533,7 +1534,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
 
				 	struct inode *parent_inode = dentry->d_parent->d_inode;
			
 
				 	const unsigned int ia_valid = attr->ia_valid;
			
 
				 	struct ceph_mds_request *req;
			
 
				-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
			
 
				 	int issued;
			
 
				 	int release = 0, dirtied = 0;
			
 
				 	int mask = 0;
			
@@ -1728,8 +1729,8 @@ out:
 
				  */
			
 
				 int ceph_do_getattr(struct inode *inode, int mask)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	int err;
			
 
				 
			
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -1,8 +1,10 @@
 
				 #include <linux/in.h>
			
 
				 
			
 
				-#include "ioctl.h"
			
 
				 #include "super.h"
			
 
				-#include "ceph_debug.h"
			
 
				+#include "mds_client.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				+
			
 
				+#include "ioctl.h"
			
 
				 
			
 
				 
			
 
				 /*
			
@@ -37,7 +39,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 
				 {
			
 
				 	struct inode *inode = file->f_dentry->d_inode;
			
 
				 	struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
			
 
				-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	struct ceph_ioctl_layout l;
			
 
				 	int err, i;
			
@@ -89,6 +91,68 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Set a layout policy on a directory inode. All items in the tree
			
 
				+ * rooted at this inode will inherit this layout on creation,
			
 
				+ * (It doesn't apply retroactively )
			
 
				+ * unless a subdirectory has its own layout policy.
			
 
				+ */
			
 
				+static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
			
 
				+{
			
 
				+	struct inode *inode = file->f_dentry->d_inode;
			
 
				+	struct ceph_mds_request *req;
			
 
				+	struct ceph_ioctl_layout l;
			
 
				+	int err, i;
			
 
				+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				+
			
 
				+	/* copy and validate */
			
 
				+	if (copy_from_user(&l, arg, sizeof(l)))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	if ((l.object_size & ~PAGE_MASK) ||
			
 
				+	    (l.stripe_unit & ~PAGE_MASK) ||
			
 
				+	    !l.stripe_unit ||
			
 
				+	    (l.object_size &&
			
 
				+	        (unsigned)l.object_size % (unsigned)l.stripe_unit))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* make sure it's a valid data pool */
			
 
				+	if (l.data_pool > 0) {
			
 
				+		mutex_lock(&mdsc->mutex);
			
 
				+		err = -EINVAL;
			
 
				+		for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
			
 
				+			if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
			
 
				+				err = 0;
			
 
				+				break;
			
 
				+			}
			
 
				+		mutex_unlock(&mdsc->mutex);
			
 
				+		if (err)
			
 
				+			return err;
			
 
				+	}
			
 
				+
			
 
				+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT,
			
 
				+				       USE_AUTH_MDS);
			
 
				+
			
 
				+	if (IS_ERR(req))
			
 
				+		return PTR_ERR(req);
			
 
				+	req->r_inode = igrab(inode);
			
 
				+
			
 
				+	req->r_args.setlayout.layout.fl_stripe_unit =
			
 
				+			cpu_to_le32(l.stripe_unit);
			
 
				+	req->r_args.setlayout.layout.fl_stripe_count =
			
 
				+			cpu_to_le32(l.stripe_count);
			
 
				+	req->r_args.setlayout.layout.fl_object_size =
			
 
				+			cpu_to_le32(l.object_size);
			
 
				+	req->r_args.setlayout.layout.fl_pg_pool =
			
 
				+			cpu_to_le32(l.data_pool);
			
 
				+	req->r_args.setlayout.layout.fl_pg_preferred =
			
 
				+			cpu_to_le32(l.preferred_osd);
			
 
				+
			
 
				+	err = ceph_mdsc_do_request(mdsc, inode, req);
			
 
				+	ceph_mdsc_put_request(req);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Return object name, size/offset information, and location (OSD
			
 
				  * number, network address) for a given file offset.
			
@@ -98,7 +162,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 
				 	struct ceph_ioctl_dataloc dl;
			
 
				 	struct inode *inode = file->f_dentry->d_inode;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
			
 
				+	struct ceph_osd_client *osdc =
			
 
				+		&ceph_sb_to_client(inode->i_sb)->client->osdc;
			
 
				 	u64 len = 1, olen;
			
 
				 	u64 tmp;
			
 
				 	struct ceph_object_layout ol;
			
@@ -174,11 +239,15 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
				 	case CEPH_IOC_SET_LAYOUT:
			
 
				 		return ceph_ioctl_set_layout(file, (void __user *)arg);
			
 
				 
			
 
				+	case CEPH_IOC_SET_LAYOUT_POLICY:
			
 
				+		return ceph_ioctl_set_layout_policy(file, (void __user *)arg);
			
 
				+
			
 
				 	case CEPH_IOC_GET_DATALOC:
			
 
				 		return ceph_ioctl_get_dataloc(file, (void __user *)arg);
			
 
				 
			
 
				 	case CEPH_IOC_LAZYIO:
			
 
				 		return ceph_ioctl_lazyio(file);
			
 
				 	}
			
 
				+
			
 
				 	return -ENOTTY;
			
 
				 }
			
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -4,7 +4,7 @@
 
				 #include <linux/ioctl.h>
			
 
				 #include <linux/types.h>
			
 
				 
			
 
				-#define CEPH_IOCTL_MAGIC 0x97
			
 
				+#define CEPH_IOCTL_MAGIC 0x98
			
 
				 
			
 
				 /* just use u64 to align sanely on all archs */
			
 
				 struct ceph_ioctl_layout {
			
@@ -17,6 +17,8 @@ struct ceph_ioctl_layout {
 
				 				   struct ceph_ioctl_layout)
			
 
				 #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2,		\
			
 
				 				   struct ceph_ioctl_layout)
			
 
				+#define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5,	\
			
 
				+				   struct ceph_ioctl_layout)
			
 
				 
			
 
				 /*
			
 
				  * Extract identity, address of the OSD and object storing a given
			
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -1,11 +1,11 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/file.h>
			
 
				 #include <linux/namei.h>
			
 
				 
			
 
				 #include "super.h"
			
 
				 #include "mds_client.h"
			
 
				-#include "pagelist.h"
			
 
				+#include <linux/ceph/pagelist.h>
			
 
				 
			
 
				 /**
			
 
				  * Implement fcntl and flock locking functions.
			
@@ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 
				 {
			
 
				 	struct inode *inode = file->f_dentry->d_inode;
			
 
				 	struct ceph_mds_client *mdsc =
			
 
				-		&ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				+		ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				 	struct ceph_mds_request *req;
			
 
				 	int err;
			
 
				 
			
@@ -181,8 +181,9 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
 
				  * Encode the flock and fcntl locks for the given inode into the pagelist.
			
 
				  * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
			
 
				  * sequential flock locks.
			
 
				- * Must be called with BLK already held, and the lock numbers should have
			
 
				- * been gathered under the same lock holding window.
			
 
				+ * Must be called with lock_flocks() already held.
			
 
				+ * If we encounter more of a specific lock type than expected,
			
 
				+ * we return the value 1.
			
 
				  */
			
 
				 int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
			
 
				 		      int num_fcntl_locks, int num_flock_locks)
			
@@ -190,6 +191,8 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
 
				 	struct file_lock *lock;
			
 
				 	struct ceph_filelock cephlock;
			
 
				 	int err = 0;
			
 
				+	int seen_fcntl = 0;
			
 
				+	int seen_flock = 0;
			
 
				 
			
 
				 	dout("encoding %d flock and %d fcntl locks", num_flock_locks,
			
 
				 	     num_fcntl_locks);
			
@@ -198,6 +201,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
 
				 		goto fail;
			
 
				 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
			
 
				 		if (lock->fl_flags & FL_POSIX) {
			
 
				+			++seen_fcntl;
			
 
				+			if (seen_fcntl > num_fcntl_locks) {
			
 
				+				err = -ENOSPC;
			
 
				+				goto fail;
			
 
				+			}
			
 
				 			err = lock_to_ceph_filelock(lock, &cephlock);
			
 
				 			if (err)
			
 
				 				goto fail;
			
@@ -213,6 +221,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
 
				 		goto fail;
			
 
				 	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
			
 
				 		if (lock->fl_flags & FL_FLOCK) {
			
 
				+			++seen_flock;
			
 
				+			if (seen_flock > num_flock_locks) {
			
 
				+				err = -ENOSPC;
			
 
				+				goto fail;
			
 
				+			}
			
 
				 			err = lock_to_ceph_filelock(lock, &cephlock);
			
 
				 			if (err)
			
 
				 				goto fail;
			
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1,17 +1,21 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				+#include <linux/fs.h>
			
 
				 #include <linux/wait.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/sched.h>
			
 
				+#include <linux/debugfs.h>
			
 
				+#include <linux/seq_file.h>
			
 
				 #include <linux/smp_lock.h>
			
 
				 
			
 
				-#include "mds_client.h"
			
 
				-#include "mon_client.h"
			
 
				 #include "super.h"
			
 
				-#include "messenger.h"
			
 
				-#include "decode.h"
			
 
				-#include "auth.h"
			
 
				-#include "pagelist.h"
			
 
				+#include "mds_client.h"
			
 
				+
			
 
				+#include <linux/ceph/messenger.h>
			
 
				+#include <linux/ceph/decode.h>
			
 
				+#include <linux/ceph/pagelist.h>
			
 
				+#include <linux/ceph/auth.h>
			
 
				+#include <linux/ceph/debugfs.h>
			
 
				 
			
 
				 /*
			
 
				  * A cluster of MDS (metadata server) daemons is responsible for
			
@@ -286,8 +290,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
 
				 	     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
			
 
				 	if (atomic_dec_and_test(&s->s_ref)) {
			
 
				 		if (s->s_authorizer)
			
 
				-			s->s_mdsc->client->monc.auth->ops->destroy_authorizer(
			
 
				-				s->s_mdsc->client->monc.auth, s->s_authorizer);
			
 
				+		     s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
			
 
				+			     s->s_mdsc->fsc->client->monc.auth,
			
 
				+			     s->s_authorizer);
			
 
				 		kfree(s);
			
 
				 	}
			
 
				 }
			
@@ -344,7 +349,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 
				 	s->s_seq = 0;
			
 
				 	mutex_init(&s->s_mutex);
			
 
				 
			
 
				-	ceph_con_init(mdsc->client->msgr, &s->s_con);
			
 
				+	ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
			
 
				 	s->s_con.private = s;
			
 
				 	s->s_con.ops = &mds_con_ops;
			
 
				 	s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
			
@@ -599,7 +604,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 
				 	} else if (req->r_dentry) {
			
 
				 		struct inode *dir = req->r_dentry->d_parent->d_inode;
			
 
				 
			
 
				-		if (dir->i_sb != mdsc->client->sb) {
			
 
				+		if (dir->i_sb != mdsc->fsc->sb) {
			
 
				 			/* not this fs! */
			
 
				 			inode = req->r_dentry->d_inode;
			
 
				 		} else if (ceph_snap(dir) != CEPH_NOSNAP) {
			
@@ -884,7 +889,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 
				 	__ceph_remove_cap(cap);
			
 
				 	if (!__ceph_is_any_real_caps(ci)) {
			
 
				 		struct ceph_mds_client *mdsc =
			
 
				-			&ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				+			ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				 
			
 
				 		spin_lock(&mdsc->cap_dirty_lock);
			
 
				 		if (!list_empty(&ci->i_dirty_item)) {
			
@@ -1146,7 +1151,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
 
				 	struct ceph_msg *msg, *partial = NULL;
			
 
				 	struct ceph_mds_cap_release *head;
			
 
				 	int err = -ENOMEM;
			
 
				-	int extra = mdsc->client->mount_args->cap_release_safety;
			
 
				+	int extra = mdsc->fsc->mount_options->cap_release_safety;
			
 
				 	int num;
			
 
				 
			
 
				 	dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds,
			
@@ -2085,7 +2090,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
				 
			
 
				 	/* insert trace into our cache */
			
 
				 	mutex_lock(&req->r_fill_mutex);
			
 
				-	err = ceph_fill_trace(mdsc->client->sb, req, req->r_session);
			
 
				+	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
			
 
				 	if (err == 0) {
			
 
				 		if (result == 0 && rinfo->dir_nr)
			
 
				 			ceph_readdir_prepopulate(req, req->r_session);
			
@@ -2361,19 +2366,35 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 
				 
			
 
				 	if (recon_state->flock) {
			
 
				 		int num_fcntl_locks, num_flock_locks;
			
 
				-
			
 
				-		lock_kernel();
			
 
				-		ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
			
 
				-		rec.v2.flock_len = (2*sizeof(u32) +
			
 
				-				    (num_fcntl_locks+num_flock_locks) *
			
 
				-				    sizeof(struct ceph_filelock));
			
 
				-
			
 
				-		err = ceph_pagelist_append(pagelist, &rec, reclen);
			
 
				-		if (!err)
			
 
				-			err = ceph_encode_locks(inode, pagelist,
			
 
				-						num_fcntl_locks,
			
 
				-						num_flock_locks);
			
 
				-		unlock_kernel();
			
 
				+		struct ceph_pagelist_cursor trunc_point;
			
 
				+
			
 
				+		ceph_pagelist_set_cursor(pagelist, &trunc_point);
			
 
				+		do {
			
 
				+			lock_flocks();
			
 
				+			ceph_count_locks(inode, &num_fcntl_locks,
			
 
				+					 &num_flock_locks);
			
 
				+			rec.v2.flock_len = (2*sizeof(u32) +
			
 
				+					    (num_fcntl_locks+num_flock_locks) *
			
 
				+					    sizeof(struct ceph_filelock));
			
 
				+			unlock_flocks();
			
 
				+
			
 
				+			/* pre-alloc pagelist */
			
 
				+			ceph_pagelist_truncate(pagelist, &trunc_point);
			
 
				+			err = ceph_pagelist_append(pagelist, &rec, reclen);
			
 
				+			if (!err)
			
 
				+				err = ceph_pagelist_reserve(pagelist,
			
 
				+							    rec.v2.flock_len);
			
 
				+
			
 
				+			/* encode locks */
			
 
				+			if (!err) {
			
 
				+				lock_flocks();
			
 
				+				err = ceph_encode_locks(inode,
			
 
				+							pagelist,
			
 
				+							num_fcntl_locks,
			
 
				+							num_flock_locks);
			
 
				+				unlock_flocks();
			
 
				+			}
			
 
				+		} while (err == -ENOSPC);
			
 
				 	} else {
			
 
				 		err = ceph_pagelist_append(pagelist, &rec, reclen);
			
 
				 	}
			
@@ -2613,7 +2634,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 
				 			 struct ceph_mds_session *session,
			
 
				 			 struct ceph_msg *msg)
			
 
				 {
			
 
				-	struct super_block *sb = mdsc->client->sb;
			
 
				+	struct super_block *sb = mdsc->fsc->sb;
			
 
				 	struct inode *inode;
			
 
				 	struct ceph_inode_info *ci;
			
 
				 	struct dentry *parent, *dentry;
			
@@ -2891,10 +2912,16 @@ static void delayed_work(struct work_struct *work)
 
				 	schedule_delayed(mdsc);
			
 
				 }
			
 
				 
			
 
				+int ceph_mdsc_init(struct ceph_fs_client *fsc)
			
 
				 
			
 
				-int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
			
 
				 {
			
 
				-	mdsc->client = client;
			
 
				+	struct ceph_mds_client *mdsc;
			
 
				+
			
 
				+	mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
			
 
				+	if (!mdsc)
			
 
				+		return -ENOMEM;
			
 
				+	mdsc->fsc = fsc;
			
 
				+	fsc->mdsc = mdsc;
			
 
				 	mutex_init(&mdsc->mutex);
			
 
				 	mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
			
 
				 	if (mdsc->mdsmap == NULL)
			
@@ -2927,7 +2954,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
 
				 	INIT_LIST_HEAD(&mdsc->dentry_lru);
			
 
				 
			
 
				 	ceph_caps_init(mdsc);
			
 
				-	ceph_adjust_min_caps(mdsc, client->min_caps);
			
 
				+	ceph_adjust_min_caps(mdsc, fsc->min_caps);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -2939,7 +2966,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
 
				 static void wait_requests(struct ceph_mds_client *mdsc)
			
 
				 {
			
 
				 	struct ceph_mds_request *req;
			
 
				-	struct ceph_client *client = mdsc->client;
			
 
				+	struct ceph_fs_client *fsc = mdsc->fsc;
			
 
				 
			
 
				 	mutex_lock(&mdsc->mutex);
			
 
				 	if (__get_oldest_req(mdsc)) {
			
@@ -2947,7 +2974,7 @@ static void wait_requests(struct ceph_mds_client *mdsc)
 
				 
			
 
				 		dout("wait_requests waiting for requests\n");
			
 
				 		wait_for_completion_timeout(&mdsc->safe_umount_waiters,
			
 
				-				    client->mount_args->mount_timeout * HZ);
			
 
				+				    fsc->client->options->mount_timeout * HZ);
			
 
				 
			
 
				 		/* tear down remaining requests */
			
 
				 		mutex_lock(&mdsc->mutex);
			
@@ -3030,7 +3057,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 
				 {
			
 
				 	u64 want_tid, want_flush;
			
 
				 
			
 
				-	if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
			
 
				+	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
			
 
				 		return;
			
 
				 
			
 
				 	dout("sync\n");
			
@@ -3053,7 +3080,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc)
 
				 {
			
 
				 	int i, n = 0;
			
 
				 
			
 
				-	if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
			
 
				+	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
			
 
				 		return true;
			
 
				 
			
 
				 	mutex_lock(&mdsc->mutex);
			
@@ -3071,8 +3098,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 
				 {
			
 
				 	struct ceph_mds_session *session;
			
 
				 	int i;
			
 
				-	struct ceph_client *client = mdsc->client;
			
 
				-	unsigned long timeout = client->mount_args->mount_timeout * HZ;
			
 
				+	struct ceph_fs_client *fsc = mdsc->fsc;
			
 
				+	unsigned long timeout = fsc->client->options->mount_timeout * HZ;
			
 
				 
			
 
				 	dout("close_sessions\n");
			
 
				 
			
@@ -3119,7 +3146,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
 
				 	dout("stopped\n");
			
 
				 }
			
 
				 
			
 
				-void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
			
 
				+static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
			
 
				 {
			
 
				 	dout("stop\n");
			
 
				 	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
			
@@ -3129,6 +3156,15 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 
				 	ceph_caps_finalize(mdsc);
			
 
				 }
			
 
				 
			
 
				+void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
			
 
				+{
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				+
			
 
				+	ceph_mdsc_stop(mdsc);
			
 
				+	fsc->mdsc = NULL;
			
 
				+	kfree(mdsc);
			
 
				+}
			
 
				+
			
 
				 
			
 
				 /*
			
 
				  * handle mds map update.
			
@@ -3145,14 +3181,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 
				 
			
 
				 	ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad);
			
 
				 	ceph_decode_copy(&p, &fsid, sizeof(fsid));
			
 
				-	if (ceph_check_fsid(mdsc->client, &fsid) < 0)
			
 
				+	if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0)
			
 
				 		return;
			
 
				 	epoch = ceph_decode_32(&p);
			
 
				 	maplen = ceph_decode_32(&p);
			
 
				 	dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
			
 
				 
			
 
				 	/* do we need it? */
			
 
				-	ceph_monc_got_mdsmap(&mdsc->client->monc, epoch);
			
 
				+	ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
			
 
				 	mutex_lock(&mdsc->mutex);
			
 
				 	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
			
 
				 		dout("handle_map epoch %u <= our %u\n",
			
@@ -3176,7 +3212,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 
				 	} else {
			
 
				 		mdsc->mdsmap = newmap;  /* first mds map */
			
 
				 	}
			
 
				-	mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
			
 
				+	mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
			
 
				 
			
 
				 	__wake_requests(mdsc, &mdsc->waiting_for_map);
			
 
				 
			
@@ -3277,7 +3313,7 @@ static int get_authorizer(struct ceph_connection *con,
 
				 {
			
 
				 	struct ceph_mds_session *s = con->private;
			
 
				 	struct ceph_mds_client *mdsc = s->s_mdsc;
			
 
				-	struct ceph_auth_client *ac = mdsc->client->monc.auth;
			
 
				+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	if (force_new && s->s_authorizer) {
			
@@ -3311,7 +3347,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
 
				 {
			
 
				 	struct ceph_mds_session *s = con->private;
			
 
				 	struct ceph_mds_client *mdsc = s->s_mdsc;
			
 
				-	struct ceph_auth_client *ac = mdsc->client->monc.auth;
			
 
				+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
			
 
				 
			
 
				 	return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
			
 
				 }
			
@@ -3320,12 +3356,12 @@ static int invalidate_authorizer(struct ceph_connection *con)
 
				 {
			
 
				 	struct ceph_mds_session *s = con->private;
			
 
				 	struct ceph_mds_client *mdsc = s->s_mdsc;
			
 
				-	struct ceph_auth_client *ac = mdsc->client->monc.auth;
			
 
				+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
			
 
				 
			
 
				 	if (ac->ops->invalidate_authorizer)
			
 
				 		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
			
 
				 
			
 
				-	return ceph_monc_validate_auth(&mdsc->client->monc);
			
 
				+	return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
			
 
				 }
			
 
				 
			
 
				 static const struct ceph_connection_operations mds_con_ops = {
			
@@ -3338,7 +3374,4 @@ static const struct ceph_connection_operations mds_con_ops = {
 
				 	.peer_reset = peer_reset,
			
 
				 };
			
 
				 
			
 
				-
			
 
				-
			
 
				-
			
 
				 /* eof */
			
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -8,9 +8,9 @@
 
				 #include <linux/rbtree.h>
			
 
				 #include <linux/spinlock.h>
			
 
				 
			
 
				-#include "types.h"
			
 
				-#include "messenger.h"
			
 
				-#include "mdsmap.h"
			
 
				+#include <linux/ceph/types.h>
			
 
				+#include <linux/ceph/messenger.h>
			
 
				+#include <linux/ceph/mdsmap.h>
			
 
				 
			
 
				 /*
			
 
				  * Some lock dependencies:
			
@@ -26,7 +26,7 @@
 
				  *
			
 
				  */
			
 
				 
			
 
				-struct ceph_client;
			
 
				+struct ceph_fs_client;
			
 
				 struct ceph_cap;
			
 
				 
			
 
				 /*
			
@@ -230,7 +230,7 @@ struct ceph_mds_request {
 
				  * mds client state
			
 
				  */
			
 
				 struct ceph_mds_client {
			
 
				-	struct ceph_client      *client;
			
 
				+	struct ceph_fs_client  *fsc;
			
 
				 	struct mutex            mutex;         /* all nested structures */
			
 
				 
			
 
				 	struct ceph_mdsmap      *mdsmap;
			
@@ -289,11 +289,6 @@ struct ceph_mds_client {
 
				 	int		caps_avail_count;    /* unused, unreserved */
			
 
				 	int		caps_min_count;      /* keep at least this many
			
 
				 						(unreserved) */
			
 
				-
			
 
				-#ifdef CONFIG_DEBUG_FS
			
 
				-	struct dentry 	  *debugfs_file;
			
 
				-#endif
			
 
				-
			
 
				 	spinlock_t	  dentry_lru_lock;
			
 
				 	struct list_head  dentry_lru;
			
 
				 	int		  num_dentry;
			
@@ -316,10 +311,9 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s);
 
				 extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
			
 
				 			     struct ceph_msg *msg, int mds);
			
 
				 
			
 
				-extern int ceph_mdsc_init(struct ceph_mds_client *mdsc,
			
 
				-			   struct ceph_client *client);
			
 
				+extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
			
 
				 extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
			
 
				-extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc);
			
 
				+extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc);
			
 
				 
			
 
				 extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
			
 
				 
			
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -1,4 +1,4 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/bug.h>
			
 
				 #include <linux/err.h>
			
@@ -6,9 +6,9 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/types.h>
			
 
				 
			
 
				-#include "mdsmap.h"
			
 
				-#include "messenger.h"
			
 
				-#include "decode.h"
			
 
				+#include <linux/ceph/mdsmap.h>
			
 
				+#include <linux/ceph/messenger.h>
			
 
				+#include <linux/ceph/decode.h>
			
 
				 
			
 
				 #include "super.h"
			
 
				 
			
@@ -117,7 +117,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 
				 		}
			
 
				 
			
 
				 		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
			
 
				-		     i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr),
			
 
				+		     i+1, n, global_id, mds, inc,
			
 
				+		     ceph_pr_addr(&addr.in_addr),
			
 
				 		     ceph_mds_state_name(state));
			
 
				 		if (mds >= 0 && mds < m->m_max_mds && state > 0) {
			
 
				 			m->m_info[mds].global_id = global_id;
			
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -1,63 +0,0 @@
 
				-
			
 
				-#include <linux/gfp.h>
			
 
				-#include <linux/pagemap.h>
			
 
				-#include <linux/highmem.h>
			
 
				-
			
 
				-#include "pagelist.h"
			
 
				-
			
 
				-static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
			
 
				-{
			
 
				-	struct page *page = list_entry(pl->head.prev, struct page,
			
 
				-				       lru);
			
 
				-	kunmap(page);
			
 
				-}
			
 
				-
			
 
				-int ceph_pagelist_release(struct ceph_pagelist *pl)
			
 
				-{
			
 
				-	if (pl->mapped_tail)
			
 
				-		ceph_pagelist_unmap_tail(pl);
			
 
				-
			
 
				-	while (!list_empty(&pl->head)) {
			
 
				-		struct page *page = list_first_entry(&pl->head, struct page,
			
 
				-						     lru);
			
 
				-		list_del(&page->lru);
			
 
				-		__free_page(page);
			
 
				-	}
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
			
 
				-{
			
 
				-	struct page *page = __page_cache_alloc(GFP_NOFS);
			
 
				-	if (!page)
			
 
				-		return -ENOMEM;
			
 
				-	pl->room += PAGE_SIZE;
			
 
				-	list_add_tail(&page->lru, &pl->head);
			
 
				-	if (pl->mapped_tail)
			
 
				-		ceph_pagelist_unmap_tail(pl);
			
 
				-	pl->mapped_tail = kmap(page);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-int ceph_pagelist_append(struct ceph_pagelist *pl, void *buf, size_t len)
			
 
				-{
			
 
				-	while (pl->room < len) {
			
 
				-		size_t bit = pl->room;
			
 
				-		int ret;
			
 
				-
			
 
				-		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
			
 
				-		       buf, bit);
			
 
				-		pl->length += bit;
			
 
				-		pl->room -= bit;
			
 
				-		buf += bit;
			
 
				-		len -= bit;
			
 
				-		ret = ceph_pagelist_addpage(pl);
			
 
				-		if (ret)
			
 
				-			return ret;
			
 
				-	}
			
 
				-
			
 
				-	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
			
 
				-	pl->length += len;
			
 
				-	pl->room -= len;
			
 
				-	return 0;
			
 
				-}
			
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1,10 +1,12 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/sort.h>
			
 
				 #include <linux/slab.h>
			
 
				 
			
 
				 #include "super.h"
			
 
				-#include "decode.h"
			
 
				+#include "mds_client.h"
			
 
				+
			
 
				+#include <linux/ceph/decode.h>
			
 
				 
			
 
				 /*
			
 
				  * Snapshots in ceph are driven in large part by cooperation from the
			
@@ -526,7 +528,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 
				 			    struct ceph_cap_snap *capsnap)
			
 
				 {
			
 
				 	struct inode *inode = &ci->vfs_inode;
			
 
				-	struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
			
 
				 
			
 
				 	BUG_ON(capsnap->writing);
			
 
				 	capsnap->size = inode->i_size;
			
@@ -747,7 +749,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 
				 		      struct ceph_mds_session *session,
			
 
				 		      struct ceph_msg *msg)
			
 
				 {
			
 
				-	struct super_block *sb = mdsc->client->sb;
			
 
				+	struct super_block *sb = mdsc->fsc->sb;
			
 
				 	int mds = session->s_mds;
			
 
				 	u64 split;
			
 
				 	int op;
			
--- a/fs/ceph/ceph_strings.c
+++ b/fs/ceph/ceph_strings.c
@@ -1,71 +1,9 @@
 
				 /*
			
 
				- * Ceph string constants
			
 
				+ * Ceph fs string constants
			
 
				  */
			
 
				-#include "types.h"
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/ceph/types.h>
			
 
				 
			
 
				-const char *ceph_entity_type_name(int type)
			
 
				-{
			
 
				-	switch (type) {
			
 
				-	case CEPH_ENTITY_TYPE_MDS: return "mds";
			
 
				-	case CEPH_ENTITY_TYPE_OSD: return "osd";
			
 
				-	case CEPH_ENTITY_TYPE_MON: return "mon";
			
 
				-	case CEPH_ENTITY_TYPE_CLIENT: return "client";
			
 
				-	case CEPH_ENTITY_TYPE_AUTH: return "auth";
			
 
				-	default: return "unknown";
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-const char *ceph_osd_op_name(int op)
			
 
				-{
			
 
				-	switch (op) {
			
 
				-	case CEPH_OSD_OP_READ: return "read";
			
 
				-	case CEPH_OSD_OP_STAT: return "stat";
			
 
				-
			
 
				-	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
			
 
				-
			
 
				-	case CEPH_OSD_OP_WRITE: return "write";
			
 
				-	case CEPH_OSD_OP_DELETE: return "delete";
			
 
				-	case CEPH_OSD_OP_TRUNCATE: return "truncate";
			
 
				-	case CEPH_OSD_OP_ZERO: return "zero";
			
 
				-	case CEPH_OSD_OP_WRITEFULL: return "writefull";
			
 
				-	case CEPH_OSD_OP_ROLLBACK: return "rollback";
			
 
				-
			
 
				-	case CEPH_OSD_OP_APPEND: return "append";
			
 
				-	case CEPH_OSD_OP_STARTSYNC: return "startsync";
			
 
				-	case CEPH_OSD_OP_SETTRUNC: return "settrunc";
			
 
				-	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
			
 
				-
			
 
				-	case CEPH_OSD_OP_TMAPUP: return "tmapup";
			
 
				-	case CEPH_OSD_OP_TMAPGET: return "tmapget";
			
 
				-	case CEPH_OSD_OP_TMAPPUT: return "tmapput";
			
 
				-
			
 
				-	case CEPH_OSD_OP_GETXATTR: return "getxattr";
			
 
				-	case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
			
 
				-	case CEPH_OSD_OP_SETXATTR: return "setxattr";
			
 
				-	case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
			
 
				-	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
			
 
				-	case CEPH_OSD_OP_RMXATTR: return "rmxattr";
			
 
				-	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
			
 
				-
			
 
				-	case CEPH_OSD_OP_PULL: return "pull";
			
 
				-	case CEPH_OSD_OP_PUSH: return "push";
			
 
				-	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
			
 
				-	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
			
 
				-	case CEPH_OSD_OP_SCRUB: return "scrub";
			
 
				-
			
 
				-	case CEPH_OSD_OP_WRLOCK: return "wrlock";
			
 
				-	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
			
 
				-	case CEPH_OSD_OP_RDLOCK: return "rdlock";
			
 
				-	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
			
 
				-	case CEPH_OSD_OP_UPLOCK: return "uplock";
			
 
				-	case CEPH_OSD_OP_DNLOCK: return "dnlock";
			
 
				-
			
 
				-	case CEPH_OSD_OP_CALL: return "call";
			
 
				-
			
 
				-	case CEPH_OSD_OP_PGLS: return "pgls";
			
 
				-	}
			
 
				-	return "???";
			
 
				-}
			
 
				 
			
 
				 const char *ceph_mds_state_name(int s)
			
 
				 {
			
@@ -177,17 +115,3 @@ const char *ceph_snap_op_name(int o)
 
				 	}
			
 
				 	return "???";
			
 
				 }
			
 
				-
			
 
				-const char *ceph_pool_op_name(int op)
			
 
				-{
			
 
				-	switch (op) {
			
 
				-	case POOL_OP_CREATE: return "create";
			
 
				-	case POOL_OP_DELETE: return "delete";
			
 
				-	case POOL_OP_AUID_CHANGE: return "auid change";
			
 
				-	case POOL_OP_CREATE_SNAP: return "create snap";
			
 
				-	case POOL_OP_DELETE_SNAP: return "delete snap";
			
 
				-	case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
			
 
				-	case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
			
 
				-	}
			
 
				-	return "???";
			
 
				-}
			
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1,7 +1,7 @@
 
				 #ifndef _FS_CEPH_SUPER_H
			
 
				 #define _FS_CEPH_SUPER_H
			
 
				 
			
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <asm/unaligned.h>
			
 
				 #include <linux/backing-dev.h>
			
@@ -14,13 +14,7 @@
 
				 #include <linux/writeback.h>
			
 
				 #include <linux/slab.h>
			
 
				 
			
 
				-#include "types.h"
			
 
				-#include "messenger.h"
			
 
				-#include "msgpool.h"
			
 
				-#include "mon_client.h"
			
 
				-#include "mds_client.h"
			
 
				-#include "osd_client.h"
			
 
				-#include "ceph_fs.h"
			
 
				+#include <linux/ceph/libceph.h>
			
 
				 
			
 
				 /* f_type in struct statfs */
			
 
				 #define CEPH_SUPER_MAGIC 0x00c36400
			
@@ -30,42 +24,25 @@
 
				 #define CEPH_BLOCK_SHIFT   20  /* 1 MB */
			
 
				 #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
			
 
				 
			
 
				-/*
			
 
				- * Supported features
			
 
				- */
			
 
				-#define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK
			
 
				-#define CEPH_FEATURE_REQUIRED  CEPH_FEATURE_NOSRCADDR
			
 
				+#define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
			
 
				+#define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
			
 
				+#define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
			
 
				 
			
 
				-/*
			
 
				- * mount options
			
 
				- */
			
 
				-#define CEPH_OPT_FSID             (1<<0)
			
 
				-#define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
			
 
				-#define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
			
 
				-#define CEPH_OPT_DIRSTAT          (1<<4) /* funky `cat dirname` for stats */
			
 
				-#define CEPH_OPT_RBYTES           (1<<5) /* dir st_bytes = rbytes */
			
 
				-#define CEPH_OPT_NOCRC            (1<<6) /* no data crc on writes */
			
 
				-#define CEPH_OPT_NOASYNCREADDIR   (1<<7) /* no dcache readdir */
			
 
				+#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
			
 
				 
			
 
				-#define CEPH_OPT_DEFAULT   (CEPH_OPT_RBYTES)
			
 
				+#define ceph_set_mount_opt(fsc, opt) \
			
 
				+	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
			
 
				+#define ceph_test_mount_opt(fsc, opt) \
			
 
				+	(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
			
 
				 
			
 
				-#define ceph_set_opt(client, opt) \
			
 
				-	(client)->mount_args->flags |= CEPH_OPT_##opt;
			
 
				-#define ceph_test_opt(client, opt) \
			
 
				-	(!!((client)->mount_args->flags & CEPH_OPT_##opt))
			
 
				+#define CEPH_MAX_READDIR_DEFAULT        1024
			
 
				+#define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
			
 
				+#define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
			
 
				 
			
 
				-
			
 
				-struct ceph_mount_args {
			
 
				-	int sb_flags;
			
 
				+struct ceph_mount_options {
			
 
				 	int flags;
			
 
				-	struct ceph_fsid fsid;
			
 
				-	struct ceph_entity_addr my_addr;
			
 
				-	int num_mon;
			
 
				-	struct ceph_entity_addr *mon_addr;
			
 
				-	int mount_timeout;
			
 
				-	int osd_idle_ttl;
			
 
				-	int osd_timeout;
			
 
				-	int osd_keepalive_timeout;
			
 
				+	int sb_flags;
			
 
				+
			
 
				 	int wsize;
			
 
				 	int rsize;            /* max readahead */
			
 
				 	int congestion_kb;    /* max writeback in flight */
			
@@ -73,82 +50,25 @@ struct ceph_mount_args {
 
				 	int cap_release_safety;
			
 
				 	int max_readdir;       /* max readdir result (entires) */
			
 
				 	int max_readdir_bytes; /* max readdir result (bytes) */
			
 
				-	char *snapdir_name;   /* default ".snap" */
			
 
				-	char *name;
			
 
				-	char *secret;
			
 
				-};
			
 
				 
			
 
				-/*
			
 
				- * defaults
			
 
				- */
			
 
				-#define CEPH_MOUNT_TIMEOUT_DEFAULT  60
			
 
				-#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
			
 
				-#define CEPH_OSD_KEEPALIVE_DEFAULT  5
			
 
				-#define CEPH_OSD_IDLE_TTL_DEFAULT    60
			
 
				-#define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */
			
 
				-#define CEPH_MAX_READDIR_DEFAULT    1024
			
 
				-#define CEPH_MAX_READDIR_BYTES_DEFAULT    (512*1024)
			
 
				-
			
 
				-#define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
			
 
				-#define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
			
 
				-
			
 
				-#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
			
 
				-#define CEPH_AUTH_NAME_DEFAULT   "guest"
			
 
				-/*
			
 
				- * Delay telling the MDS we no longer want caps, in case we reopen
			
 
				- * the file.  Delay a minimum amount of time, even if we send a cap
			
 
				- * message for some other reason.  Otherwise, take the oppotunity to
			
 
				- * update the mds to avoid sending another message later.
			
 
				- */
			
 
				-#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */
			
 
				-#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */
			
 
				-
			
 
				-#define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4)
			
 
				-
			
 
				-/* mount state */
			
 
				-enum {
			
 
				-	CEPH_MOUNT_MOUNTING,
			
 
				-	CEPH_MOUNT_MOUNTED,
			
 
				-	CEPH_MOUNT_UNMOUNTING,
			
 
				-	CEPH_MOUNT_UNMOUNTED,
			
 
				-	CEPH_MOUNT_SHUTDOWN,
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * subtract jiffies
			
 
				- */
			
 
				-static inline unsigned long time_sub(unsigned long a, unsigned long b)
			
 
				-{
			
 
				-	BUG_ON(time_after(b, a));
			
 
				-	return (long)a - (long)b;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * per-filesystem client state
			
 
				- *
			
 
				- * possibly shared by multiple mount points, if they are
			
 
				- * mounting the same ceph filesystem/cluster.
			
 
				- */
			
 
				-struct ceph_client {
			
 
				-	struct ceph_fsid fsid;
			
 
				-	bool have_fsid;
			
 
				+	/*
			
 
				+	 * everything above this point can be memcmp'd; everything below
			
 
				+	 * is handled in compare_mount_options()
			
 
				+	 */
			
 
				 
			
 
				-	struct mutex mount_mutex;       /* serialize mount attempts */
			
 
				-	struct ceph_mount_args *mount_args;
			
 
				+	char *snapdir_name;   /* default ".snap" */
			
 
				+};
			
 
				 
			
 
				+struct ceph_fs_client {
			
 
				 	struct super_block *sb;
			
 
				 
			
 
				-	unsigned long mount_state;
			
 
				-	wait_queue_head_t auth_wq;
			
 
				-
			
 
				-	int auth_err;
			
 
				+	struct ceph_mount_options *mount_options;
			
 
				+	struct ceph_client *client;
			
 
				 
			
 
				+	unsigned long mount_state;
			
 
				 	int min_caps;                  /* min caps i added */
			
 
				 
			
 
				-	struct ceph_messenger *msgr;   /* messenger instance */
			
 
				-	struct ceph_mon_client monc;
			
 
				-	struct ceph_mds_client mdsc;
			
 
				-	struct ceph_osd_client osdc;
			
 
				+	struct ceph_mds_client *mdsc;
			
 
				 
			
 
				 	/* writeback */
			
 
				 	mempool_t *wb_pagevec_pool;
			
@@ -160,14 +80,14 @@ struct ceph_client {
 
				 	struct backing_dev_info backing_dev_info;
			
 
				 
			
 
				 #ifdef CONFIG_DEBUG_FS
			
 
				-	struct dentry *debugfs_monmap;
			
 
				-	struct dentry *debugfs_mdsmap, *debugfs_osdmap;
			
 
				-	struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps;
			
 
				+	struct dentry *debugfs_dentry_lru, *debugfs_caps;
			
 
				 	struct dentry *debugfs_congestion_kb;
			
 
				 	struct dentry *debugfs_bdi;
			
 
				+	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
			
 
				 #endif
			
 
				 };
			
 
				 
			
 
				+
			
 
				 /*
			
 
				  * File i/o capability.  This tracks shared state with the metadata
			
 
				  * server that allows us to cache or writeback attributes or to read
			
@@ -275,6 +195,20 @@ struct ceph_inode_xattr {
 
				 	int should_free_val;
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * Ceph dentry state
			
 
				+ */
			
 
				+struct ceph_dentry_info {
			
 
				+	struct ceph_mds_session *lease_session;
			
 
				+	u32 lease_gen, lease_shared_gen;
			
 
				+	u32 lease_seq;
			
 
				+	unsigned long lease_renew_after, lease_renew_from;
			
 
				+	struct list_head lru;
			
 
				+	struct dentry *dentry;
			
 
				+	u64 time;
			
 
				+	u64 offset;
			
 
				+};
			
 
				+
			
 
				 struct ceph_inode_xattrs_info {
			
 
				 	/*
			
 
				 	 * (still encoded) xattr blob. we avoid the overhead of parsing
			
@@ -296,11 +230,6 @@ struct ceph_inode_xattrs_info {
 
				 /*
			
 
				  * Ceph inode.
			
 
				  */
			
 
				-#define CEPH_I_COMPLETE  1  /* we have complete directory cached */
			
 
				-#define CEPH_I_NODELAY   4  /* do not delay cap release */
			
 
				-#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
			
 
				-#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
			
 
				-
			
 
				 struct ceph_inode_info {
			
 
				 	struct ceph_vino i_vino;   /* ceph ino + snap */
			
 
				 
			
@@ -391,6 +320,63 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
 
				 	return container_of(inode, struct ceph_inode_info, vfs_inode);
			
 
				 }
			
 
				 
			
 
				+static inline struct ceph_vino ceph_vino(struct inode *inode)
			
 
				+{
			
 
				+	return ceph_inode(inode)->i_vino;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * ino_t is <64 bits on many architectures, blech.
			
 
				+ *
			
 
				+ * don't include snap in ino hash, at least for now.
			
 
				+ */
			
 
				+static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
			
 
				+{
			
 
				+	ino_t ino = (ino_t)vino.ino;  /* ^ (vino.snap << 20); */
			
 
				+#if BITS_PER_LONG == 32
			
 
				+	ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
			
 
				+	if (!ino)
			
 
				+		ino = 1;
			
 
				+#endif
			
 
				+	return ino;
			
 
				+}
			
 
				+
			
 
				+/* for printf-style formatting */
			
 
				+#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
			
 
				+
			
 
				+static inline u64 ceph_ino(struct inode *inode)
			
 
				+{
			
 
				+	return ceph_inode(inode)->i_vino.ino;
			
 
				+}
			
 
				+static inline u64 ceph_snap(struct inode *inode)
			
 
				+{
			
 
				+	return ceph_inode(inode)->i_vino.snap;
			
 
				+}
			
 
				+
			
 
				+static inline int ceph_ino_compare(struct inode *inode, void *data)
			
 
				+{
			
 
				+	struct ceph_vino *pvino = (struct ceph_vino *)data;
			
 
				+	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				+	return ci->i_vino.ino == pvino->ino &&
			
 
				+		ci->i_vino.snap == pvino->snap;
			
 
				+}
			
 
				+
			
 
				+static inline struct inode *ceph_find_inode(struct super_block *sb,
			
 
				+					    struct ceph_vino vino)
			
 
				+{
			
 
				+	ino_t t = ceph_vino_to_ino(vino);
			
 
				+	return ilookup5(sb, t, ceph_ino_compare, &vino);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Ceph inode.
			
 
				+ */
			
 
				+#define CEPH_I_COMPLETE  1  /* we have complete directory cached */
			
 
				+#define CEPH_I_NODELAY   4  /* do not delay cap release */
			
 
				+#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
			
 
				+#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
			
 
				+
			
 
				 static inline void ceph_i_clear(struct inode *inode, unsigned mask)
			
 
				 {
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
@@ -414,8 +400,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask)
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				 	bool r;
			
 
				 
			
 
				-	smp_mb();
			
 
				+	spin_lock(&inode->i_lock);
			
 
				 	r = (ci->i_ceph_flags & mask) == mask;
			
 
				+	spin_unlock(&inode->i_lock);
			
 
				 	return r;
			
 
				 }
			
 
				 
			
@@ -432,20 +419,6 @@ extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
 
				 			    struct ceph_inode_frag *pfrag,
			
 
				 			    int *found);
			
 
				 
			
 
				-/*
			
 
				- * Ceph dentry state
			
 
				- */
			
 
				-struct ceph_dentry_info {
			
 
				-	struct ceph_mds_session *lease_session;
			
 
				-	u32 lease_gen, lease_shared_gen;
			
 
				-	u32 lease_seq;
			
 
				-	unsigned long lease_renew_after, lease_renew_from;
			
 
				-	struct list_head lru;
			
 
				-	struct dentry *dentry;
			
 
				-	u64 time;
			
 
				-	u64 offset;
			
 
				-};
			
 
				-
			
 
				 static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry)
			
 
				 {
			
 
				 	return (struct ceph_dentry_info *)dentry->d_fsdata;
			
@@ -456,22 +429,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
 
				 	return ((loff_t)frag << 32) | (loff_t)off;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * ino_t is <64 bits on many architectures, blech.
			
 
				- *
			
 
				- * don't include snap in ino hash, at least for now.
			
 
				- */
			
 
				-static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
			
 
				-{
			
 
				-	ino_t ino = (ino_t)vino.ino;  /* ^ (vino.snap << 20); */
			
 
				-#if BITS_PER_LONG == 32
			
 
				-	ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
			
 
				-	if (!ino)
			
 
				-		ino = 1;
			
 
				-#endif
			
 
				-	return ino;
			
 
				-}
			
 
				-
			
 
				 static inline int ceph_set_ino_cb(struct inode *inode, void *data)
			
 
				 {
			
 
				 	ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
			
@@ -479,39 +436,6 @@ static inline int ceph_set_ino_cb(struct inode *inode, void *data)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline struct ceph_vino ceph_vino(struct inode *inode)
			
 
				-{
			
 
				-	return ceph_inode(inode)->i_vino;
			
 
				-}
			
 
				-
			
 
				-/* for printf-style formatting */
			
 
				-#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
			
 
				-
			
 
				-static inline u64 ceph_ino(struct inode *inode)
			
 
				-{
			
 
				-	return ceph_inode(inode)->i_vino.ino;
			
 
				-}
			
 
				-static inline u64 ceph_snap(struct inode *inode)
			
 
				-{
			
 
				-	return ceph_inode(inode)->i_vino.snap;
			
 
				-}
			
 
				-
			
 
				-static inline int ceph_ino_compare(struct inode *inode, void *data)
			
 
				-{
			
 
				-	struct ceph_vino *pvino = (struct ceph_vino *)data;
			
 
				-	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				-	return ci->i_vino.ino == pvino->ino &&
			
 
				-		ci->i_vino.snap == pvino->snap;
			
 
				-}
			
 
				-
			
 
				-static inline struct inode *ceph_find_inode(struct super_block *sb,
			
 
				-					    struct ceph_vino vino)
			
 
				-{
			
 
				-	ino_t t = ceph_vino_to_ino(vino);
			
 
				-	return ilookup5(sb, t, ceph_ino_compare, &vino);
			
 
				-}
			
 
				-
			
 
				-
			
 
				 /*
			
 
				  * caps helpers
			
 
				  */
			
@@ -576,18 +500,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 
				 			     struct ceph_cap_reservation *ctx, int need);
			
 
				 extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
			
 
				 			       struct ceph_cap_reservation *ctx);
			
 
				-extern void ceph_reservation_status(struct ceph_client *client,
			
 
				+extern void ceph_reservation_status(struct ceph_fs_client *client,
			
 
				 				    int *total, int *avail, int *used,
			
 
				 				    int *reserved, int *min);
			
 
				 
			
 
				-static inline struct ceph_client *ceph_inode_to_client(struct inode *inode)
			
 
				+static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
			
 
				 {
			
 
				-	return (struct ceph_client *)inode->i_sb->s_fs_info;
			
 
				+	return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
			
 
				 }
			
 
				 
			
 
				-static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb)
			
 
				+static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
			
 
				 {
			
 
				-	return (struct ceph_client *)sb->s_fs_info;
			
 
				+	return (struct ceph_fs_client *)sb->s_fs_info;
			
 
				 }
			
 
				 
			
 
				 
			
@@ -616,51 +540,6 @@ struct ceph_file_info {
 
				 
			
 
				 
			
 
				 
			
 
				-/*
			
 
				- * snapshots
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * A "snap context" is the set of existing snapshots when we
			
 
				- * write data.  It is used by the OSD to guide its COW behavior.
			
 
				- *
			
 
				- * The ceph_snap_context is refcounted, and attached to each dirty
			
 
				- * page, indicating which context the dirty data belonged when it was
			
 
				- * dirtied.
			
 
				- */
			
 
				-struct ceph_snap_context {
			
 
				-	atomic_t nref;
			
 
				-	u64 seq;
			
 
				-	int num_snaps;
			
 
				-	u64 snaps[];
			
 
				-};
			
 
				-
			
 
				-static inline struct ceph_snap_context *
			
 
				-ceph_get_snap_context(struct ceph_snap_context *sc)
			
 
				-{
			
 
				-	/*
			
 
				-	printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
			
 
				-	       atomic_read(&sc->nref)+1);
			
 
				-	*/
			
 
				-	if (sc)
			
 
				-		atomic_inc(&sc->nref);
			
 
				-	return sc;
			
 
				-}
			
 
				-
			
 
				-static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
			
 
				-{
			
 
				-	if (!sc)
			
 
				-		return;
			
 
				-	/*
			
 
				-	printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
			
 
				-	       atomic_read(&sc->nref)-1);
			
 
				-	*/
			
 
				-	if (atomic_dec_and_test(&sc->nref)) {
			
 
				-		/*printk(" deleting snap_context %p\n", sc);*/
			
 
				-		kfree(sc);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * A "snap realm" describes a subset of the file hierarchy sharing
			
 
				  * the same set of snapshots that apply to it.  The realms themselves
			
@@ -699,16 +578,33 @@ struct ceph_snap_realm {
 
				 	spinlock_t inodes_with_caps_lock;
			
 
				 };
			
 
				 
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * calculate the number of pages a given length and offset map onto,
			
 
				- * if we align the data.
			
 
				- */
			
 
				-static inline int calc_pages_for(u64 off, u64 len)
			
 
				+static inline int default_congestion_kb(void)
			
 
				 {
			
 
				-	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
			
 
				-		(off >> PAGE_CACHE_SHIFT);
			
 
				+	int congestion_kb;
			
 
				+
			
 
				+	/*
			
 
				+	 * Copied from NFS
			
 
				+	 *
			
 
				+	 * congestion size, scale with available memory.
			
 
				+	 *
			
 
				+	 *  64MB:    8192k
			
 
				+	 * 128MB:   11585k
			
 
				+	 * 256MB:   16384k
			
 
				+	 * 512MB:   23170k
			
 
				+	 *   1GB:   32768k
			
 
				+	 *   2GB:   46340k
			
 
				+	 *   4GB:   65536k
			
 
				+	 *   8GB:   92681k
			
 
				+	 *  16GB:  131072k
			
 
				+	 *
			
 
				+	 * This allows larger machines to have larger/more transfers.
			
 
				+	 * Limit the default to 256M
			
 
				+	 */
			
 
				+	congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
			
 
				+	if (congestion_kb > 256*1024)
			
 
				+		congestion_kb = 256*1024;
			
 
				+
			
 
				+	return congestion_kb;
			
 
				 }
			
 
				 
			
 
				 
			
@@ -741,16 +637,6 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
 
				 			   ci_item)->writing;
			
 
				 }
			
 
				 
			
 
				-
			
 
				-/* super.c */
			
 
				-extern struct kmem_cache *ceph_inode_cachep;
			
 
				-extern struct kmem_cache *ceph_cap_cachep;
			
 
				-extern struct kmem_cache *ceph_dentry_cachep;
			
 
				-extern struct kmem_cache *ceph_file_cachep;
			
 
				-
			
 
				-extern const char *ceph_msg_type_name(int type);
			
 
				-extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
			
 
				-
			
 
				 /* inode.c */
			
 
				 extern const struct inode_operations ceph_file_iops;
			
 
				 
			
@@ -857,12 +743,18 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
 
				 /* file.c */
			
 
				 extern const struct file_operations ceph_file_fops;
			
 
				 extern const struct address_space_operations ceph_aops;
			
 
				+extern int ceph_copy_to_page_vector(struct page **pages,
			
 
				+				    const char *data,
			
 
				+				    loff_t off, size_t len);
			
 
				+extern int ceph_copy_from_page_vector(struct page **pages,
			
 
				+				    char *data,
			
 
				+				    loff_t off, size_t len);
			
 
				+extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
			
 
				 extern int ceph_open(struct inode *inode, struct file *file);
			
 
				 extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
			
 
				 				       struct nameidata *nd, int mode,
			
 
				 				       int locked_dir);
			
 
				 extern int ceph_release(struct inode *inode, struct file *filp);
			
 
				-extern void ceph_release_page_vector(struct page **pages, int num_pages);
			
 
				 
			
 
				 /* dir.c */
			
 
				 extern const struct file_operations ceph_dir_fops;
			
@@ -892,12 +784,6 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 
				 /* export.c */
			
 
				 extern const struct export_operations ceph_export_ops;
			
 
				 
			
 
				-/* debugfs.c */
			
 
				-extern int ceph_debugfs_init(void);
			
 
				-extern void ceph_debugfs_cleanup(void);
			
 
				-extern int ceph_debugfs_client_init(struct ceph_client *client);
			
 
				-extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
			
 
				-
			
 
				 /* locks.c */
			
 
				 extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
			
 
				 extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
			
@@ -914,4 +800,8 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				+/* debugfs.c */
			
 
				+extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
			
 
				+extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
			
 
				+
			
 
				 #endif /* _FS_CEPH_SUPER_H */
			
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1,6 +1,9 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				+
			
 
				 #include "super.h"
			
 
				-#include "decode.h"
			
 
				+#include "mds_client.h"
			
 
				+
			
 
				+#include <linux/ceph/decode.h>
			
 
				 
			
 
				 #include <linux/xattr.h>
			
 
				 #include <linux/slab.h>
			
@@ -620,12 +623,12 @@ out:
 
				 static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
			
 
				 			      const char *value, size_t size, int flags)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
			
 
				 	struct inode *inode = dentry->d_inode;
			
 
				 	struct ceph_inode_info *ci = ceph_inode(inode);
			
 
				 	struct inode *parent_inode = dentry->d_parent->d_inode;
			
 
				 	struct ceph_mds_request *req;
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	int err;
			
 
				 	int i, nr_pages;
			
 
				 	struct page **pages = NULL;
			
@@ -713,10 +716,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
 
				 
			
 
				 	/* preallocate memory for xattr name, value, index node */
			
 
				 	err = -ENOMEM;
			
 
				-	newname = kmalloc(name_len + 1, GFP_NOFS);
			
 
				+	newname = kmemdup(name, name_len + 1, GFP_NOFS);
			
 
				 	if (!newname)
			
 
				 		goto out;
			
 
				-	memcpy(newname, name, name_len + 1);
			
 
				 
			
 
				 	if (val_len) {
			
 
				 		newval = kmalloc(val_len + 1, GFP_NOFS);
			
@@ -777,8 +779,8 @@ out:
 
				 
			
 
				 static int ceph_send_removexattr(struct dentry *dentry, const char *name)
			
 
				 {
			
 
				-	struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
			
 
				-	struct ceph_mds_client *mdsc = &client->mdsc;
			
 
				+	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
			
 
				+	struct ceph_mds_client *mdsc = fsc->mdsc;
			
 
				 	struct inode *inode = dentry->d_inode;
			
 
				 	struct inode *parent_inode = dentry->d_parent->d_inode;
			
 
				 	struct ceph_mds_request *req;
			
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -1,8 +1,8 @@
 
				 #ifndef _FS_CEPH_AUTH_H
			
 
				 #define _FS_CEPH_AUTH_H
			
 
				 
			
 
				-#include "types.h"
			
 
				-#include "buffer.h"
			
 
				+#include <linux/ceph/types.h>
			
 
				+#include <linux/ceph/buffer.h>
			
 
				 
			
 
				 /*
			
 
				  * Abstract interface for communicating with the authenticate module.
			
--- a/include/linux/ceph/buffer.h
+++ b/include/linux/ceph/buffer.h
--- a/include/linux/ceph/ceph_debug.h
+++ b/include/linux/ceph/ceph_debug.h
@@ -3,7 +3,7 @@
 
				 
			
 
				 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
			
 
				 
			
 
				-#ifdef CONFIG_CEPH_FS_PRETTYDEBUG
			
 
				+#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
			
 
				 
			
 
				 /*
			
 
				  * wrap pr_debug to include a filename:lineno prefix on each line.
			
@@ -14,7 +14,8 @@
 
				 # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
			
 
				 extern const char *ceph_file_part(const char *s, int len);
			
 
				 #  define dout(fmt, ...)						\
			
 
				-	pr_debug(" %12.12s:%-4d : " fmt,				\
			
 
				+	pr_debug("%.*s %12.12s:%-4d : " fmt,				\
			
 
				+		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
			
 
				 		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\
			
 
				 		 __LINE__, ##__VA_ARGS__)
			
 
				 # else
			
--- a/include/linux/ceph/ceph_frag.h
+++ b/include/linux/ceph/ceph_frag.h
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -299,6 +299,7 @@ enum {
 
				 	CEPH_MDS_OP_SETATTR    = 0x01108,
			
 
				 	CEPH_MDS_OP_SETFILELOCK= 0x01109,
			
 
				 	CEPH_MDS_OP_GETFILELOCK= 0x00110,
			
 
				+	CEPH_MDS_OP_SETDIRLAYOUT=0x0110a,
			
 
				 
			
 
				 	CEPH_MDS_OP_MKNOD      = 0x01201,
			
 
				 	CEPH_MDS_OP_LINK       = 0x01202,
			
--- a/include/linux/ceph/ceph_hash.h
+++ b/include/linux/ceph/ceph_hash.h
--- a/include/linux/ceph/debugfs.h
+++ b/include/linux/ceph/debugfs.h
@@ -0,0 +1,33 @@
 
				+#ifndef _FS_CEPH_DEBUGFS_H
			
 
				+#define _FS_CEPH_DEBUGFS_H
			
 
				+
			
 
				+#include "ceph_debug.h"
			
 
				+#include "types.h"
			
 
				+
			
 
				+#define CEPH_DEFINE_SHOW_FUNC(name)					\
			
 
				+static int name##_open(struct inode *inode, struct file *file)		\
			
 
				+{									\
			
 
				+	struct seq_file *sf;						\
			
 
				+	int ret;							\
			
 
				+									\
			
 
				+	ret = single_open(file, name, NULL);				\
			
 
				+	sf = file->private_data;					\
			
 
				+	sf->private = inode->i_private;					\
			
 
				+	return ret;							\
			
 
				+}									\
			
 
				+									\
			
 
				+static const struct file_operations name##_fops = {			\
			
 
				+	.open		= name##_open,					\
			
 
				+	.read		= seq_read,					\
			
 
				+	.llseek		= seq_lseek,					\
			
 
				+	.release	= single_release,				\
			
 
				+};
			
 
				+
			
 
				+/* debugfs.c */
			
 
				+extern int ceph_debugfs_init(void);
			
 
				+extern void ceph_debugfs_cleanup(void);
			
 
				+extern int ceph_debugfs_client_init(struct ceph_client *client);
			
 
				+extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
			
 
				+
			
 
				+#endif
			
 
				+
			
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -191,6 +191,11 @@ static inline void ceph_encode_string(void **p, void *end,
 
				 		ceph_encode_need(p, end, n, bad);		\
			
 
				 		ceph_encode_copy(p, pv, n);			\
			
 
				 	} while (0)
			
 
				+#define ceph_encode_string_safe(p, end, s, n, bad)		\
			
 
				+	do {							\
			
 
				+		ceph_encode_need(p, end, n, bad);		\
			
 
				+		ceph_encode_string(p, end, s, n);		\
			
 
				+	} while (0)
			
 
				 
			
 
				 
			
 
				 #endif
			
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -0,0 +1,249 @@
 
				+#ifndef _FS_CEPH_LIBCEPH_H
			
 
				+#define _FS_CEPH_LIBCEPH_H
			
 
				+
			
 
				+#include "ceph_debug.h"
			
 
				+
			
 
				+#include <asm/unaligned.h>
			
 
				+#include <linux/backing-dev.h>
			
 
				+#include <linux/completion.h>
			
 
				+#include <linux/exportfs.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/mempool.h>
			
 
				+#include <linux/pagemap.h>
			
 
				+#include <linux/wait.h>
			
 
				+#include <linux/writeback.h>
			
 
				+#include <linux/slab.h>
			
 
				+
			
 
				+#include "types.h"
			
 
				+#include "messenger.h"
			
 
				+#include "msgpool.h"
			
 
				+#include "mon_client.h"
			
 
				+#include "osd_client.h"
			
 
				+#include "ceph_fs.h"
			
 
				+
			
 
				+/*
			
 
				+ * Supported features
			
 
				+ */
			
 
				+#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR
			
 
				+#define CEPH_FEATURE_REQUIRED_DEFAULT  CEPH_FEATURE_NOSRCADDR
			
 
				+
			
 
				+/*
			
 
				+ * mount options
			
 
				+ */
			
 
				+#define CEPH_OPT_FSID             (1<<0)
			
 
				+#define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
			
 
				+#define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
			
 
				+#define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */
			
 
				+
			
 
				+#define CEPH_OPT_DEFAULT   (0);
			
 
				+
			
 
				+#define ceph_set_opt(client, opt) \
			
 
				+	(client)->options->flags |= CEPH_OPT_##opt;
			
 
				+#define ceph_test_opt(client, opt) \
			
 
				+	(!!((client)->options->flags & CEPH_OPT_##opt))
			
 
				+
			
 
				+struct ceph_options {
			
 
				+	int flags;
			
 
				+	struct ceph_fsid fsid;
			
 
				+	struct ceph_entity_addr my_addr;
			
 
				+	int mount_timeout;
			
 
				+	int osd_idle_ttl;
			
 
				+	int osd_timeout;
			
 
				+	int osd_keepalive_timeout;
			
 
				+
			
 
				+	/*
			
 
				+	 * any type that can't be simply compared or doesn't need need
			
 
				+	 * to be compared should go beyond this point,
			
 
				+	 * ceph_compare_options() should be updated accordingly
			
 
				+	 */
			
 
				+
			
 
				+	struct ceph_entity_addr *mon_addr; /* should be the first
			
 
				+					      pointer type of args */
			
 
				+	int num_mon;
			
 
				+	char *name;
			
 
				+	char *secret;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * defaults
			
 
				+ */
			
 
				+#define CEPH_MOUNT_TIMEOUT_DEFAULT  60
			
 
				+#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
			
 
				+#define CEPH_OSD_KEEPALIVE_DEFAULT  5
			
 
				+#define CEPH_OSD_IDLE_TTL_DEFAULT    60
			
 
				+#define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */
			
 
				+
			
 
				+#define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
			
 
				+#define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
			
 
				+
			
 
				+#define CEPH_AUTH_NAME_DEFAULT   "guest"
			
 
				+
			
 
				+/*
			
 
				+ * Delay telling the MDS we no longer want caps, in case we reopen
			
 
				+ * the file.  Delay a minimum amount of time, even if we send a cap
			
 
				+ * message for some other reason.  Otherwise, take the oppotunity to
			
 
				+ * update the mds to avoid sending another message later.
			
 
				+ */
			
 
				+#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */
			
 
				+#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */
			
 
				+
			
 
				+#define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4)
			
 
				+
			
 
				+/* mount state */
			
 
				+enum {
			
 
				+	CEPH_MOUNT_MOUNTING,
			
 
				+	CEPH_MOUNT_MOUNTED,
			
 
				+	CEPH_MOUNT_UNMOUNTING,
			
 
				+	CEPH_MOUNT_UNMOUNTED,
			
 
				+	CEPH_MOUNT_SHUTDOWN,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * subtract jiffies
			
 
				+ */
			
 
				+static inline unsigned long time_sub(unsigned long a, unsigned long b)
			
 
				+{
			
 
				+	BUG_ON(time_after(b, a));
			
 
				+	return (long)a - (long)b;
			
 
				+}
			
 
				+
			
 
				+struct ceph_mds_client;
			
 
				+
			
 
				+/*
			
 
				+ * per client state
			
 
				+ *
			
 
				+ * possibly shared by multiple mount points, if they are
			
 
				+ * mounting the same ceph filesystem/cluster.
			
 
				+ */
			
 
				+struct ceph_client {
			
 
				+	struct ceph_fsid fsid;
			
 
				+	bool have_fsid;
			
 
				+
			
 
				+	void *private;
			
 
				+
			
 
				+	struct ceph_options *options;
			
 
				+
			
 
				+	struct mutex mount_mutex;      /* serialize mount attempts */
			
 
				+	wait_queue_head_t auth_wq;
			
 
				+	int auth_err;
			
 
				+
			
 
				+	int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
			
 
				+
			
 
				+	u32 supported_features;
			
 
				+	u32 required_features;
			
 
				+
			
 
				+	struct ceph_messenger *msgr;   /* messenger instance */
			
 
				+	struct ceph_mon_client monc;
			
 
				+	struct ceph_osd_client osdc;
			
 
				+
			
 
				+#ifdef CONFIG_DEBUG_FS
			
 
				+	struct dentry *debugfs_dir;
			
 
				+	struct dentry *debugfs_monmap;
			
 
				+	struct dentry *debugfs_osdmap;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * snapshots
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * A "snap context" is the set of existing snapshots when we
			
 
				+ * write data.  It is used by the OSD to guide its COW behavior.
			
 
				+ *
			
 
				+ * The ceph_snap_context is refcounted, and attached to each dirty
			
 
				+ * page, indicating which context the dirty data belonged when it was
			
 
				+ * dirtied.
			
 
				+ */
			
 
				+struct ceph_snap_context {
			
 
				+	atomic_t nref;
			
 
				+	u64 seq;
			
 
				+	int num_snaps;
			
 
				+	u64 snaps[];
			
 
				+};
			
 
				+
			
 
				+static inline struct ceph_snap_context *
			
 
				+ceph_get_snap_context(struct ceph_snap_context *sc)
			
 
				+{
			
 
				+	/*
			
 
				+	printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
			
 
				+	       atomic_read(&sc->nref)+1);
			
 
				+	*/
			
 
				+	if (sc)
			
 
				+		atomic_inc(&sc->nref);
			
 
				+	return sc;
			
 
				+}
			
 
				+
			
 
				+static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
			
 
				+{
			
 
				+	if (!sc)
			
 
				+		return;
			
 
				+	/*
			
 
				+	printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
			
 
				+	       atomic_read(&sc->nref)-1);
			
 
				+	*/
			
 
				+	if (atomic_dec_and_test(&sc->nref)) {
			
 
				+		/*printk(" deleting snap_context %p\n", sc);*/
			
 
				+		kfree(sc);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * calculate the number of pages a given length and offset map onto,
			
 
				+ * if we align the data.
			
 
				+ */
			
 
				+static inline int calc_pages_for(u64 off, u64 len)
			
 
				+{
			
 
				+	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
			
 
				+		(off >> PAGE_CACHE_SHIFT);
			
 
				+}
			
 
				+
			
 
				+/* ceph_common.c */
			
 
				+extern const char *ceph_msg_type_name(int type);
			
 
				+extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
			
 
				+extern struct kmem_cache *ceph_inode_cachep;
			
 
				+extern struct kmem_cache *ceph_cap_cachep;
			
 
				+extern struct kmem_cache *ceph_dentry_cachep;
			
 
				+extern struct kmem_cache *ceph_file_cachep;
			
 
				+
			
 
				+extern int ceph_parse_options(struct ceph_options **popt, char *options,
			
 
				+			      const char *dev_name, const char *dev_name_end,
			
 
				+			      int (*parse_extra_token)(char *c, void *private),
			
 
				+			      void *private);
			
 
				+extern void ceph_destroy_options(struct ceph_options *opt);
			
 
				+extern int ceph_compare_options(struct ceph_options *new_opt,
			
 
				+				struct ceph_client *client);
			
 
				+extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
			
 
				+					      void *private);
			
 
				+extern u64 ceph_client_id(struct ceph_client *client);
			
 
				+extern void ceph_destroy_client(struct ceph_client *client);
			
 
				+extern int __ceph_open_session(struct ceph_client *client,
			
 
				+			       unsigned long started);
			
 
				+extern int ceph_open_session(struct ceph_client *client);
			
 
				+
			
 
				+/* pagevec.c */
			
 
				+extern void ceph_release_page_vector(struct page **pages, int num_pages);
			
 
				+
			
 
				+extern struct page **ceph_get_direct_page_vector(const char __user *data,
			
 
				+					    int num_pages,
			
 
				+					    loff_t off, size_t len);
			
 
				+extern void ceph_put_page_vector(struct page **pages, int num_pages);
			
 
				+extern void ceph_release_page_vector(struct page **pages, int num_pages);
			
 
				+extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
			
 
				+extern int ceph_copy_user_to_page_vector(struct page **pages,
			
 
				+					 const char __user *data,
			
 
				+					 loff_t off, size_t len);
			
 
				+extern int ceph_copy_to_page_vector(struct page **pages,
			
 
				+				    const char *data,
			
 
				+				    loff_t off, size_t len);
			
 
				+extern int ceph_copy_from_page_vector(struct page **pages,
			
 
				+				    char *data,
			
 
				+				    loff_t off, size_t len);
			
 
				+extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data,
			
 
				+				    loff_t off, size_t len);
			
 
				+extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
			
 
				+
			
 
				+
			
 
				+#endif /* _FS_CEPH_SUPER_H */
			
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -65,6 +65,9 @@ struct ceph_messenger {
 
				 	 */
			
 
				 	u32 global_seq;
			
 
				 	spinlock_t global_seq_lock;
			
 
				+
			
 
				+	u32 supported_features;
			
 
				+	u32 required_features;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -82,6 +85,10 @@ struct ceph_msg {
 
				 	struct ceph_pagelist *pagelist; /* instead of pages */
			
 
				 	struct list_head list_head;
			
 
				 	struct kref kref;
			
 
				+	struct bio  *bio;		/* instead of pages/pagelist */
			
 
				+	struct bio  *bio_iter;		/* bio iterator */
			
 
				+	int bio_seg;			/* current bio segment */
			
 
				+	struct ceph_pagelist *trail;	/* the trailing part of the data */
			
 
				 	bool front_is_vmalloc;
			
 
				 	bool more_to_follow;
			
 
				 	bool needs_out_seq;
			
@@ -205,7 +212,7 @@ struct ceph_connection {
 
				 };
			
 
				 
			
 
				 
			
 
				-extern const char *pr_addr(const struct sockaddr_storage *ss);
			
 
				+extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
			
 
				 extern int ceph_parse_ips(const char *c, const char *end,
			
 
				 			  struct ceph_entity_addr *addr,
			
 
				 			  int max_count, int *count);
			
@@ -216,7 +223,8 @@ extern void ceph_msgr_exit(void);
 
				 extern void ceph_msgr_flush(void);
			
 
				 
			
 
				 extern struct ceph_messenger *ceph_messenger_create(
			
 
				-	struct ceph_entity_addr *myaddr);
			
 
				+	struct ceph_entity_addr *myaddr,
			
 
				+	u32 features, u32 required);
			
 
				 extern void ceph_messenger_destroy(struct ceph_messenger *);
			
 
				 
			
 
				 extern void ceph_con_init(struct ceph_messenger *msgr,
			
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -79,6 +79,7 @@ struct ceph_mon_client {
 
				 	u64 last_tid;
			
 
				 
			
 
				 	/* mds/osd map */
			
 
				+	int want_mdsmap;
			
 
				 	int want_next_osdmap; /* 1 = want, 2 = want+asked */
			
 
				 	u32 have_osdmap, have_mdsmap;
			
 
				 
			
--- a/include/linux/ceph/msgpool.h
+++ b/include/linux/ceph/msgpool.h
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -15,6 +15,7 @@ struct ceph_snap_context;
 
				 struct ceph_osd_request;
			
 
				 struct ceph_osd_client;
			
 
				 struct ceph_authorizer;
			
 
				+struct ceph_pagelist;
			
 
				 
			
 
				 /*
			
 
				  * completion callback for async writepages
			
@@ -68,6 +69,7 @@ struct ceph_osd_request {
 
				 	struct list_head  r_unsafe_item;
			
 
				 
			
 
				 	struct inode *r_inode;         	      /* for use by callbacks */
			
 
				+	void *r_priv;			      /* ditto */
			
 
				 
			
 
				 	char              r_oid[40];          /* object name */
			
 
				 	int               r_oid_len;
			
@@ -80,6 +82,11 @@ struct ceph_osd_request {
 
				 	struct page     **r_pages;            /* pages for data payload */
			
 
				 	int               r_pages_from_pool;
			
 
				 	int               r_own_pages;        /* if true, i own page list */
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+	struct bio       *r_bio;	      /* instead of pages */
			
 
				+#endif
			
 
				+
			
 
				+	struct ceph_pagelist *r_trail;	      /* trailing part of the data */
			
 
				 };
			
 
				 
			
 
				 struct ceph_osd_client {
			
@@ -110,6 +117,42 @@ struct ceph_osd_client {
 
				 	struct ceph_msgpool	msgpool_op_reply;
			
 
				 };
			
 
				 
			
 
				+struct ceph_osd_req_op {
			
 
				+	u16 op;           /* CEPH_OSD_OP_* */
			
 
				+	u32 flags;        /* CEPH_OSD_FLAG_* */
			
 
				+	union {
			
 
				+		struct {
			
 
				+			u64 offset, length;
			
 
				+			u64 truncate_size;
			
 
				+			u32 truncate_seq;
			
 
				+		} extent;
			
 
				+		struct {
			
 
				+			const char *name;
			
 
				+			u32 name_len;
			
 
				+			const char  *val;
			
 
				+			u32 value_len;
			
 
				+			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
			
 
				+			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
			
 
				+		} xattr;
			
 
				+		struct {
			
 
				+			const char *class_name;
			
 
				+			__u8 class_len;
			
 
				+			const char *method_name;
			
 
				+			__u8 method_len;
			
 
				+			__u8 argc;
			
 
				+			const char *indata;
			
 
				+			u32 indata_len;
			
 
				+		} cls;
			
 
				+		struct {
			
 
				+			u64 cookie, count;
			
 
				+		} pgls;
			
 
				+	        struct {
			
 
				+		        u64 snapid;
			
 
				+	        } snap;
			
 
				+	};
			
 
				+	u32 payload_len;
			
 
				+};
			
 
				+
			
 
				 extern int ceph_osdc_init(struct ceph_osd_client *osdc,
			
 
				 			  struct ceph_client *client);
			
 
				 extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
			
@@ -119,6 +162,30 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 
				 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
			
 
				 				 struct ceph_msg *msg);
			
 
				 
			
 
				+extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
			
 
				+			struct ceph_file_layout *layout,
			
 
				+			u64 snapid,
			
 
				+			u64 off, u64 *plen, u64 *bno,
			
 
				+			struct ceph_osd_request *req,
			
 
				+			struct ceph_osd_req_op *op);
			
 
				+
			
 
				+extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
			
 
				+					       int flags,
			
 
				+					       struct ceph_snap_context *snapc,
			
 
				+					       struct ceph_osd_req_op *ops,
			
 
				+					       bool use_mempool,
			
 
				+					       gfp_t gfp_flags,
			
 
				+					       struct page **pages,
			
 
				+					       struct bio *bio);
			
 
				+
			
 
				+extern void ceph_osdc_build_request(struct ceph_osd_request *req,
			
 
				+				    u64 off, u64 *plen,
			
 
				+				    struct ceph_osd_req_op *src_ops,
			
 
				+				    struct ceph_snap_context *snapc,
			
 
				+				    struct timespec *mtime,
			
 
				+				    const char *oid,
			
 
				+				    int oid_len);
			
 
				+
			
 
				 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
			
 
				 				      struct ceph_file_layout *layout,
			
 
				 				      struct ceph_vino vino,
			
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -4,7 +4,7 @@
 
				 #include <linux/rbtree.h>
			
 
				 #include "types.h"
			
 
				 #include "ceph_fs.h"
			
 
				-#include "crush/crush.h"
			
 
				+#include <linux/crush/crush.h>
			
 
				 
			
 
				 /*
			
 
				  * The osd map describes the current membership of the osd cluster and
			
@@ -125,4 +125,6 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 
				 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
			
 
				 				struct ceph_pg pgid);
			
 
				 
			
 
				+extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
			
 
				+
			
 
				 #endif
			
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -8,6 +8,14 @@ struct ceph_pagelist {
 
				 	void *mapped_tail;
			
 
				 	size_t length;
			
 
				 	size_t room;
			
 
				+	struct list_head free_list;
			
 
				+	size_t num_pages_free;
			
 
				+};
			
 
				+
			
 
				+struct ceph_pagelist_cursor {
			
 
				+	struct ceph_pagelist *pl;   /* pagelist, for error checking */
			
 
				+	struct list_head *page_lru; /* page in list */
			
 
				+	size_t room;		    /* room remaining to reset to */
			
 
				 };
			
 
				 
			
 
				 static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
			
@@ -16,10 +24,23 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
 
				 	pl->mapped_tail = NULL;
			
 
				 	pl->length = 0;
			
 
				 	pl->room = 0;
			
 
				+	INIT_LIST_HEAD(&pl->free_list);
			
 
				+	pl->num_pages_free = 0;
			
 
				 }
			
 
				+
			
 
				 extern int ceph_pagelist_release(struct ceph_pagelist *pl);
			
 
				 
			
 
				-extern int ceph_pagelist_append(struct ceph_pagelist *pl, void *d, size_t l);
			
 
				+extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
			
 
				+
			
 
				+extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space);
			
 
				+
			
 
				+extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl);
			
 
				+
			
 
				+extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
			
 
				+				     struct ceph_pagelist_cursor *c);
			
 
				+
			
 
				+extern int ceph_pagelist_truncate(struct ceph_pagelist *pl,
			
 
				+				  struct ceph_pagelist_cursor *c);
			
 
				 
			
 
				 static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
			
 
				 {
			
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
--- a/include/linux/ceph/types.h
+++ b/include/linux/ceph/types.h
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
--- a/include/linux/crush/hash.h
+++ b/include/linux/crush/hash.h
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -293,6 +293,7 @@ source "net/wimax/Kconfig"
 
				 source "net/rfkill/Kconfig"
			
 
				 source "net/9p/Kconfig"
			
 
				 source "net/caif/Kconfig"
			
 
				+source "net/ceph/Kconfig"
			
 
				 
			
 
				 
			
 
				 endif   # if NET
			
--- a/net/Makefile
+++ b/net/Makefile
@@ -68,3 +68,4 @@ obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
 
				 endif
			
 
				 obj-$(CONFIG_WIMAX)		+= wimax/
			
 
				 obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
			
 
				+obj-$(CONFIG_CEPH_LIB)		+= ceph/
			
--- a/net/ceph/Kconfig
+++ b/net/ceph/Kconfig
@@ -0,0 +1,28 @@
 
				+config CEPH_LIB
			
 
				+        tristate "Ceph core library (EXPERIMENTAL)"
			
 
				+	depends on INET && EXPERIMENTAL
			
 
				+	select LIBCRC32C
			
 
				+	select CRYPTO_AES
			
 
				+	select CRYPTO
			
 
				+	default n
			
 
				+	help
			
 
				+	  Choose Y or M here to include cephlib, which provides the
			
 
				+	  common functionality to both the Ceph filesystem and
			
 
				+	  to the rados block device (rbd).
			
 
				+
			
 
				+	  More information at http://ceph.newdream.net/.
			
 
				+
			
 
				+	  If unsure, say N.
			
 
				+
			
 
				+config CEPH_LIB_PRETTYDEBUG
			
 
				+	bool "Include file:line in ceph debug output"
			
 
				+	depends on CEPH_LIB
			
 
				+	default n
			
 
				+	help
			
 
				+	  If you say Y here, debug output will include a filename and
			
 
				+	  line to aid debugging.  This increases kernel size and slows
			
 
				+	  execution slightly when debug call sites are enabled (e.g.,
			
 
				+	  via CONFIG_DYNAMIC_DEBUG).
			
 
				+
			
 
				+	  If unsure, say N.
			
 
				+
			
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -0,0 +1,37 @@
 
				+#
			
 
				+# Makefile for CEPH filesystem.
			
 
				+#
			
 
				+
			
 
				+ifneq ($(KERNELRELEASE),)
			
 
				+
			
 
				+obj-$(CONFIG_CEPH_LIB) += libceph.o
			
 
				+
			
 
				+libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
			
 
				+	mon_client.o \
			
 
				+	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
			
 
				+	debugfs.o \
			
 
				+	auth.o auth_none.o \
			
 
				+	crypto.o armor.o \
			
 
				+	auth_x.o \
			
 
				+	ceph_fs.o ceph_strings.o ceph_hash.o \
			
 
				+	pagevec.o
			
 
				+
			
 
				+else
			
 
				+#Otherwise we were called directly from the command
			
 
				+# line; invoke the kernel build system.
			
 
				+
			
 
				+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
			
 
				+PWD := $(shell pwd)
			
 
				+
			
 
				+default: all
			
 
				+
			
 
				+all:
			
 
				+	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
			
 
				+
			
 
				+modules_install:
			
 
				+	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
			
 
				+
			
 
				+clean:
			
 
				+	$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
			
 
				+
			
 
				+endif
			
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -1,16 +1,16 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/slab.h>
			
 
				 
			
 
				-#include "types.h"
			
 
				+#include <linux/ceph/types.h>
			
 
				+#include <linux/ceph/decode.h>
			
 
				+#include <linux/ceph/libceph.h>
			
 
				+#include <linux/ceph/messenger.h>
			
 
				 #include "auth_none.h"
			
 
				 #include "auth_x.h"
			
 
				-#include "decode.h"
			
 
				-#include "super.h"
			
 
				 
			
 
				-#include "messenger.h"
			
 
				 
			
 
				 /*
			
 
				  * get protocol handler
			
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -1,14 +1,15 @@
 
				 
			
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/random.h>
			
 
				 #include <linux/slab.h>
			
 
				 
			
 
				+#include <linux/ceph/decode.h>
			
 
				+#include <linux/ceph/auth.h>
			
 
				+
			
 
				 #include "auth_none.h"
			
 
				-#include "auth.h"
			
 
				-#include "decode.h"
			
 
				 
			
 
				 static void reset(struct ceph_auth_client *ac)
			
 
				 {
			
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -2,8 +2,7 @@
 
				 #define _FS_CEPH_AUTH_NONE_H
			
 
				 
			
 
				 #include <linux/slab.h>
			
 
				-
			
 
				-#include "auth.h"
			
 
				+#include <linux/ceph/auth.h>
			
 
				 
			
 
				 /*
			
 
				  * null security mode.
			
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -1,16 +1,17 @@
 
				 
			
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/module.h>
			
 
				 #include <linux/random.h>
			
 
				 #include <linux/slab.h>
			
 
				 
			
 
				+#include <linux/ceph/decode.h>
			
 
				+#include <linux/ceph/auth.h>
			
 
				+
			
 
				+#include "crypto.h"
			
 
				 #include "auth_x.h"
			
 
				 #include "auth_x_protocol.h"
			
 
				-#include "crypto.h"
			
 
				-#include "auth.h"
			
 
				-#include "decode.h"
			
 
				 
			
 
				 #define TEMP_TICKET_BUF_LEN	256
			
 
				 
			
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -3,8 +3,9 @@
 
				 
			
 
				 #include <linux/rbtree.h>
			
 
				 
			
 
				+#include <linux/ceph/auth.h>
			
 
				+
			
 
				 #include "crypto.h"
			
 
				-#include "auth.h"
			
 
				 #include "auth_x_protocol.h"
			
 
				 
			
 
				 /*
			
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -1,10 +1,11 @@
 
				 
			
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				+#include <linux/module.h>
			
 
				 #include <linux/slab.h>
			
 
				 
			
 
				-#include "buffer.h"
			
 
				-#include "decode.h"
			
 
				+#include <linux/ceph/buffer.h>
			
 
				+#include <linux/ceph/decode.h>
			
 
				 
			
 
				 struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
			
 
				 {
			
@@ -32,6 +33,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
 
				 	dout("buffer_new %p\n", b);
			
 
				 	return b;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_buffer_new);
			
 
				 
			
 
				 void ceph_buffer_release(struct kref *kref)
			
 
				 {
			
@@ -46,6 +48,7 @@ void ceph_buffer_release(struct kref *kref)
 
				 	}
			
 
				 	kfree(b);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_buffer_release);
			
 
				 
			
 
				 int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end)
			
 
				 {
			
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -0,0 +1,529 @@
 
				+
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				+#include <linux/backing-dev.h>
			
 
				+#include <linux/ctype.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/inet.h>
			
 
				+#include <linux/in6.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/mount.h>
			
 
				+#include <linux/parser.h>
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/seq_file.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include <linux/statfs.h>
			
 
				+#include <linux/string.h>
			
 
				+
			
 
				+
			
 
				+#include <linux/ceph/libceph.h>
			
 
				+#include <linux/ceph/debugfs.h>
			
 
				+#include <linux/ceph/decode.h>
			
 
				+#include <linux/ceph/mon_client.h>
			
 
				+#include <linux/ceph/auth.h>
			
 
				+
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * find filename portion of a path (/foo/bar/baz -> baz)
			
 
				+ */
			
 
				+const char *ceph_file_part(const char *s, int len)
			
 
				+{
			
 
				+	const char *e = s + len;
			
 
				+
			
 
				+	while (e != s && *(e-1) != '/')
			
 
				+		e--;
			
 
				+	return e;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_file_part);
			
 
				+
			
 
				+const char *ceph_msg_type_name(int type)
			
 
				+{
			
 
				+	switch (type) {
			
 
				+	case CEPH_MSG_SHUTDOWN: return "shutdown";
			
 
				+	case CEPH_MSG_PING: return "ping";
			
 
				+	case CEPH_MSG_AUTH: return "auth";
			
 
				+	case CEPH_MSG_AUTH_REPLY: return "auth_reply";
			
 
				+	case CEPH_MSG_MON_MAP: return "mon_map";
			
 
				+	case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
			
 
				+	case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
			
 
				+	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
			
 
				+	case CEPH_MSG_STATFS: return "statfs";
			
 
				+	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
			
 
				+	case CEPH_MSG_MDS_MAP: return "mds_map";
			
 
				+	case CEPH_MSG_CLIENT_SESSION: return "client_session";
			
 
				+	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
			
 
				+	case CEPH_MSG_CLIENT_REQUEST: return "client_request";
			
 
				+	case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
			
 
				+	case CEPH_MSG_CLIENT_REPLY: return "client_reply";
			
 
				+	case CEPH_MSG_CLIENT_CAPS: return "client_caps";
			
 
				+	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
			
 
				+	case CEPH_MSG_CLIENT_SNAP: return "client_snap";
			
 
				+	case CEPH_MSG_CLIENT_LEASE: return "client_lease";
			
 
				+	case CEPH_MSG_OSD_MAP: return "osd_map";
			
 
				+	case CEPH_MSG_OSD_OP: return "osd_op";
			
 
				+	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
			
 
				+	default: return "unknown";
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_msg_type_name);
			
 
				+
			
 
				+/*
			
 
				+ * Initially learn our fsid, or verify an fsid matches.
			
 
				+ */
			
 
				+int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
			
 
				+{
			
 
				+	if (client->have_fsid) {
			
 
				+		if (ceph_fsid_compare(&client->fsid, fsid)) {
			
 
				+			pr_err("bad fsid, had %pU got %pU",
			
 
				+			       &client->fsid, fsid);
			
 
				+			return -1;
			
 
				+		}
			
 
				+	} else {
			
 
				+		pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
			
 
				+		memcpy(&client->fsid, fsid, sizeof(*fsid));
			
 
				+		ceph_debugfs_client_init(client);
			
 
				+		client->have_fsid = true;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_check_fsid);
			
 
				+
			
 
				+static int strcmp_null(const char *s1, const char *s2)
			
 
				+{
			
 
				+	if (!s1 && !s2)
			
 
				+		return 0;
			
 
				+	if (s1 && !s2)
			
 
				+		return -1;
			
 
				+	if (!s1 && s2)
			
 
				+		return 1;
			
 
				+	return strcmp(s1, s2);
			
 
				+}
			
 
				+
			
 
				+int ceph_compare_options(struct ceph_options *new_opt,
			
 
				+			 struct ceph_client *client)
			
 
				+{
			
 
				+	struct ceph_options *opt1 = new_opt;
			
 
				+	struct ceph_options *opt2 = client->options;
			
 
				+	int ofs = offsetof(struct ceph_options, mon_addr);
			
 
				+	int i;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = memcmp(opt1, opt2, ofs);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = strcmp_null(opt1->name, opt2->name);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	ret = strcmp_null(opt1->secret, opt2->secret);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	/* any matching mon ip implies a match */
			
 
				+	for (i = 0; i < opt1->num_mon; i++) {
			
 
				+		if (ceph_monmap_contains(client->monc.monmap,
			
 
				+				 &opt1->mon_addr[i]))
			
 
				+			return 0;
			
 
				+	}
			
 
				+	return -1;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_compare_options);
			
 
				+
			
 
				+
			
 
				+static int parse_fsid(const char *str, struct ceph_fsid *fsid)
			
 
				+{
			
 
				+	int i = 0;
			
 
				+	char tmp[3];
			
 
				+	int err = -EINVAL;
			
 
				+	int d;
			
 
				+
			
 
				+	dout("parse_fsid '%s'\n", str);
			
 
				+	tmp[2] = 0;
			
 
				+	while (*str && i < 16) {
			
 
				+		if (ispunct(*str)) {
			
 
				+			str++;
			
 
				+			continue;
			
 
				+		}
			
 
				+		if (!isxdigit(str[0]) || !isxdigit(str[1]))
			
 
				+			break;
			
 
				+		tmp[0] = str[0];
			
 
				+		tmp[1] = str[1];
			
 
				+		if (sscanf(tmp, "%x", &d) < 1)
			
 
				+			break;
			
 
				+		fsid->fsid[i] = d & 0xff;
			
 
				+		i++;
			
 
				+		str += 2;
			
 
				+	}
			
 
				+
			
 
				+	if (i == 16)
			
 
				+		err = 0;
			
 
				+	dout("parse_fsid ret %d got fsid %pU", err, fsid);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * ceph options
			
 
				+ */
			
 
				+enum {
			
 
				+	Opt_osdtimeout,
			
 
				+	Opt_osdkeepalivetimeout,
			
 
				+	Opt_mount_timeout,
			
 
				+	Opt_osd_idle_ttl,
			
 
				+	Opt_last_int,
			
 
				+	/* int args above */
			
 
				+	Opt_fsid,
			
 
				+	Opt_name,
			
 
				+	Opt_secret,
			
 
				+	Opt_ip,
			
 
				+	Opt_last_string,
			
 
				+	/* string args above */
			
 
				+	Opt_noshare,
			
 
				+	Opt_nocrc,
			
 
				+};
			
 
				+
			
 
				+static match_table_t opt_tokens = {
			
 
				+	{Opt_osdtimeout, "osdtimeout=%d"},
			
 
				+	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
			
 
				+	{Opt_mount_timeout, "mount_timeout=%d"},
			
 
				+	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
			
 
				+	/* int args above */
			
 
				+	{Opt_fsid, "fsid=%s"},
			
 
				+	{Opt_name, "name=%s"},
			
 
				+	{Opt_secret, "secret=%s"},
			
 
				+	{Opt_ip, "ip=%s"},
			
 
				+	/* string args above */
			
 
				+	{Opt_noshare, "noshare"},
			
 
				+	{Opt_nocrc, "nocrc"},
			
 
				+	{-1, NULL}
			
 
				+};
			
 
				+
			
 
				+void ceph_destroy_options(struct ceph_options *opt)
			
 
				+{
			
 
				+	dout("destroy_options %p\n", opt);
			
 
				+	kfree(opt->name);
			
 
				+	kfree(opt->secret);
			
 
				+	kfree(opt);
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_destroy_options);
			
 
				+
			
 
				+int ceph_parse_options(struct ceph_options **popt, char *options,
			
 
				+		       const char *dev_name, const char *dev_name_end,
			
 
				+		       int (*parse_extra_token)(char *c, void *private),
			
 
				+		       void *private)
			
 
				+{
			
 
				+	struct ceph_options *opt;
			
 
				+	const char *c;
			
 
				+	int err = -ENOMEM;
			
 
				+	substring_t argstr[MAX_OPT_ARGS];
			
 
				+
			
 
				+	opt = kzalloc(sizeof(*opt), GFP_KERNEL);
			
 
				+	if (!opt)
			
 
				+		return err;
			
 
				+	opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
			
 
				+				GFP_KERNEL);
			
 
				+	if (!opt->mon_addr)
			
 
				+		goto out;
			
 
				+
			
 
				+	dout("parse_options %p options '%s' dev_name '%s'\n", opt, options,
			
 
				+	     dev_name);
			
 
				+
			
 
				+	/* start with defaults */
			
 
				+	opt->flags = CEPH_OPT_DEFAULT;
			
 
				+	opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
			
 
				+	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
			
 
				+	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
			
 
				+	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
			
 
				+
			
 
				+	/* get mon ip(s) */
			
 
				+	/* ip1[:port1][,ip2[:port2]...] */
			
 
				+	err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr,
			
 
				+			     CEPH_MAX_MON, &opt->num_mon);
			
 
				+	if (err < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* parse mount options */
			
 
				+	while ((c = strsep(&options, ",")) != NULL) {
			
 
				+		int token, intval, ret;
			
 
				+		if (!*c)
			
 
				+			continue;
			
 
				+		err = -EINVAL;
			
 
				+		token = match_token((char *)c, opt_tokens, argstr);
			
 
				+		if (token < 0 && parse_extra_token) {
			
 
				+			/* extra? */
			
 
				+			err = parse_extra_token((char *)c, private);
			
 
				+			if (err < 0) {
			
 
				+				pr_err("bad option at '%s'\n", c);
			
 
				+				goto out;
			
 
				+			}
			
 
				+			continue;
			
 
				+		}
			
 
				+		if (token < Opt_last_int) {
			
 
				+			ret = match_int(&argstr[0], &intval);
			
 
				+			if (ret < 0) {
			
 
				+				pr_err("bad mount option arg (not int) "
			
 
				+				       "at '%s'\n", c);
			
 
				+				continue;
			
 
				+			}
			
 
				+			dout("got int token %d val %d\n", token, intval);
			
 
				+		} else if (token > Opt_last_int && token < Opt_last_string) {
			
 
				+			dout("got string token %d val %s\n", token,
			
 
				+			     argstr[0].from);
			
 
				+		} else {
			
 
				+			dout("got token %d\n", token);
			
 
				+		}
			
 
				+		switch (token) {
			
 
				+		case Opt_ip:
			
 
				+			err = ceph_parse_ips(argstr[0].from,
			
 
				+					     argstr[0].to,
			
 
				+					     &opt->my_addr,
			
 
				+					     1, NULL);
			
 
				+			if (err < 0)
			
 
				+				goto out;
			
 
				+			opt->flags |= CEPH_OPT_MYIP;
			
 
				+			break;
			
 
				+
			
 
				+		case Opt_fsid:
			
 
				+			err = parse_fsid(argstr[0].from, &opt->fsid);
			
 
				+			if (err == 0)
			
 
				+				opt->flags |= CEPH_OPT_FSID;
			
 
				+			break;
			
 
				+		case Opt_name:
			
 
				+			opt->name = kstrndup(argstr[0].from,
			
 
				+					      argstr[0].to-argstr[0].from,
			
 
				+					      GFP_KERNEL);
			
 
				+			break;
			
 
				+		case Opt_secret:
			
 
				+			opt->secret = kstrndup(argstr[0].from,
			
 
				+						argstr[0].to-argstr[0].from,
			
 
				+						GFP_KERNEL);
			
 
				+			break;
			
 
				+
			
 
				+			/* misc */
			
 
				+		case Opt_osdtimeout:
			
 
				+			opt->osd_timeout = intval;
			
 
				+			break;
			
 
				+		case Opt_osdkeepalivetimeout:
			
 
				+			opt->osd_keepalive_timeout = intval;
			
 
				+			break;
			
 
				+		case Opt_osd_idle_ttl:
			
 
				+			opt->osd_idle_ttl = intval;
			
 
				+			break;
			
 
				+		case Opt_mount_timeout:
			
 
				+			opt->mount_timeout = intval;
			
 
				+			break;
			
 
				+
			
 
				+		case Opt_noshare:
			
 
				+			opt->flags |= CEPH_OPT_NOSHARE;
			
 
				+			break;
			
 
				+
			
 
				+		case Opt_nocrc:
			
 
				+			opt->flags |= CEPH_OPT_NOCRC;
			
 
				+			break;
			
 
				+
			
 
				+		default:
			
 
				+			BUG_ON(token);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* success */
			
 
				+	*popt = opt;
			
 
				+	return 0;
			
 
				+
			
 
				+out:
			
 
				+	ceph_destroy_options(opt);
			
 
				+	return err;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_parse_options);
			
 
				+
			
 
				+u64 ceph_client_id(struct ceph_client *client)
			
 
				+{
			
 
				+	return client->monc.auth->global_id;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_client_id);
			
 
				+
			
 
				+/*
			
 
				+ * create a fresh client instance
			
 
				+ */
			
 
				+struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
			
 
				+{
			
 
				+	struct ceph_client *client;
			
 
				+	int err = -ENOMEM;
			
 
				+
			
 
				+	client = kzalloc(sizeof(*client), GFP_KERNEL);
			
 
				+	if (client == NULL)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+
			
 
				+	client->private = private;
			
 
				+	client->options = opt;
			
 
				+
			
 
				+	mutex_init(&client->mount_mutex);
			
 
				+	init_waitqueue_head(&client->auth_wq);
			
 
				+	client->auth_err = 0;
			
 
				+
			
 
				+	client->extra_mon_dispatch = NULL;
			
 
				+	client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT;
			
 
				+	client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT;
			
 
				+
			
 
				+	client->msgr = NULL;
			
 
				+
			
 
				+	/* subsystems */
			
 
				+	err = ceph_monc_init(&client->monc, client);
			
 
				+	if (err < 0)
			
 
				+		goto fail;
			
 
				+	err = ceph_osdc_init(&client->osdc, client);
			
 
				+	if (err < 0)
			
 
				+		goto fail_monc;
			
 
				+
			
 
				+	return client;
			
 
				+
			
 
				+fail_monc:
			
 
				+	ceph_monc_stop(&client->monc);
			
 
				+fail:
			
 
				+	kfree(client);
			
 
				+	return ERR_PTR(err);
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_create_client);
			
 
				+
			
 
				+void ceph_destroy_client(struct ceph_client *client)
			
 
				+{
			
 
				+	dout("destroy_client %p\n", client);
			
 
				+
			
 
				+	/* unmount */
			
 
				+	ceph_osdc_stop(&client->osdc);
			
 
				+
			
 
				+	/*
			
 
				+	 * make sure mds and osd connections close out before destroying
			
 
				+	 * the auth module, which is needed to free those connections'
			
 
				+	 * ceph_authorizers.
			
 
				+	 */
			
 
				+	ceph_msgr_flush();
			
 
				+
			
 
				+	ceph_monc_stop(&client->monc);
			
 
				+
			
 
				+	ceph_debugfs_client_cleanup(client);
			
 
				+
			
 
				+	if (client->msgr)
			
 
				+		ceph_messenger_destroy(client->msgr);
			
 
				+
			
 
				+	ceph_destroy_options(client->options);
			
 
				+
			
 
				+	kfree(client);
			
 
				+	dout("destroy_client %p done\n", client);
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_destroy_client);
			
 
				+
			
 
				+/*
			
 
				+ * true if we have the mon map (and have thus joined the cluster)
			
 
				+ */
			
 
				+static int have_mon_and_osd_map(struct ceph_client *client)
			
 
				+{
			
 
				+	return client->monc.monmap && client->monc.monmap->epoch &&
			
 
				+	       client->osdc.osdmap && client->osdc.osdmap->epoch;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * mount: join the ceph cluster, and open root directory.
			
 
				+ */
			
 
				+int __ceph_open_session(struct ceph_client *client, unsigned long started)
			
 
				+{
			
 
				+	struct ceph_entity_addr *myaddr = NULL;
			
 
				+	int err;
			
 
				+	unsigned long timeout = client->options->mount_timeout * HZ;
			
 
				+
			
 
				+	/* initialize the messenger */
			
 
				+	if (client->msgr == NULL) {
			
 
				+		if (ceph_test_opt(client, MYIP))
			
 
				+			myaddr = &client->options->my_addr;
			
 
				+		client->msgr = ceph_messenger_create(myaddr,
			
 
				+					client->supported_features,
			
 
				+					client->required_features);
			
 
				+		if (IS_ERR(client->msgr)) {
			
 
				+			client->msgr = NULL;
			
 
				+			return PTR_ERR(client->msgr);
			
 
				+		}
			
 
				+		client->msgr->nocrc = ceph_test_opt(client, NOCRC);
			
 
				+	}
			
 
				+
			
 
				+	/* open session, and wait for mon and osd maps */
			
 
				+	err = ceph_monc_open_session(&client->monc);
			
 
				+	if (err < 0)
			
 
				+		return err;
			
 
				+
			
 
				+	while (!have_mon_and_osd_map(client)) {
			
 
				+		err = -EIO;
			
 
				+		if (timeout && time_after_eq(jiffies, started + timeout))
			
 
				+			return err;
			
 
				+
			
 
				+		/* wait */
			
 
				+		dout("mount waiting for mon_map\n");
			
 
				+		err = wait_event_interruptible_timeout(client->auth_wq,
			
 
				+			have_mon_and_osd_map(client) || (client->auth_err < 0),
			
 
				+			timeout);
			
 
				+		if (err == -EINTR || err == -ERESTARTSYS)
			
 
				+			return err;
			
 
				+		if (client->auth_err < 0)
			
 
				+			return client->auth_err;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(__ceph_open_session);
			
 
				+
			
 
				+
			
 
				+int ceph_open_session(struct ceph_client *client)
			
 
				+{
			
 
				+	int ret;
			
 
				+	unsigned long started = jiffies;  /* note the start time */
			
 
				+
			
 
				+	dout("open_session start\n");
			
 
				+	mutex_lock(&client->mount_mutex);
			
 
				+
			
 
				+	ret = __ceph_open_session(client, started);
			
 
				+
			
 
				+	mutex_unlock(&client->mount_mutex);
			
 
				+	return ret;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_open_session);
			
 
				+
			
 
				+
			
 
				+static int __init init_ceph_lib(void)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	ret = ceph_debugfs_init();
			
 
				+	if (ret < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	ret = ceph_msgr_init();
			
 
				+	if (ret < 0)
			
 
				+		goto out_debugfs;
			
 
				+
			
 
				+	pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
			
 
				+		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
			
 
				+		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
			
 
				+		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+out_debugfs:
			
 
				+	ceph_debugfs_cleanup();
			
 
				+out:
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void __exit exit_ceph_lib(void)
			
 
				+{
			
 
				+	dout("exit_ceph_lib\n");
			
 
				+	ceph_msgr_exit();
			
 
				+	ceph_debugfs_cleanup();
			
 
				+}
			
 
				+
			
 
				+module_init(init_ceph_lib);
			
 
				+module_exit(exit_ceph_lib);
			
 
				+
			
 
				+MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
			
 
				+MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
			
 
				+MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
			
 
				+MODULE_DESCRIPTION("Ceph filesystem for Linux");
			
 
				+MODULE_LICENSE("GPL");
			
--- a/net/ceph/ceph_fs.c
+++ b/net/ceph/ceph_fs.c
@@ -1,7 +1,8 @@
 
				 /*
			
 
				  * Some non-inline ceph helpers
			
 
				  */
			
 
				-#include "types.h"
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/ceph/types.h>
			
 
				 
			
 
				 /*
			
 
				  * return true if @layout appears to be valid
			
@@ -52,6 +53,7 @@ int ceph_flags_to_mode(int flags)
 
				 
			
 
				 	return mode;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_flags_to_mode);
			
 
				 
			
 
				 int ceph_caps_for_mode(int mode)
			
 
				 {
			
@@ -70,3 +72,4 @@ int ceph_caps_for_mode(int mode)
 
				 
			
 
				 	return caps;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_caps_for_mode);
			
--- a/net/ceph/ceph_hash.c
+++ b/net/ceph/ceph_hash.c
@@ -1,5 +1,5 @@
 
				 
			
 
				-#include "types.h"
			
 
				+#include <linux/ceph/types.h>
			
 
				 
			
 
				 /*
			
 
				  * Robert Jenkin's hash function.
			
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -0,0 +1,84 @@
 
				+/*
			
 
				+ * Ceph string constants
			
 
				+ */
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/ceph/types.h>
			
 
				+
			
 
				+const char *ceph_entity_type_name(int type)
			
 
				+{
			
 
				+	switch (type) {
			
 
				+	case CEPH_ENTITY_TYPE_MDS: return "mds";
			
 
				+	case CEPH_ENTITY_TYPE_OSD: return "osd";
			
 
				+	case CEPH_ENTITY_TYPE_MON: return "mon";
			
 
				+	case CEPH_ENTITY_TYPE_CLIENT: return "client";
			
 
				+	case CEPH_ENTITY_TYPE_AUTH: return "auth";
			
 
				+	default: return "unknown";
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+const char *ceph_osd_op_name(int op)
			
 
				+{
			
 
				+	switch (op) {
			
 
				+	case CEPH_OSD_OP_READ: return "read";
			
 
				+	case CEPH_OSD_OP_STAT: return "stat";
			
 
				+
			
 
				+	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
			
 
				+
			
 
				+	case CEPH_OSD_OP_WRITE: return "write";
			
 
				+	case CEPH_OSD_OP_DELETE: return "delete";
			
 
				+	case CEPH_OSD_OP_TRUNCATE: return "truncate";
			
 
				+	case CEPH_OSD_OP_ZERO: return "zero";
			
 
				+	case CEPH_OSD_OP_WRITEFULL: return "writefull";
			
 
				+	case CEPH_OSD_OP_ROLLBACK: return "rollback";
			
 
				+
			
 
				+	case CEPH_OSD_OP_APPEND: return "append";
			
 
				+	case CEPH_OSD_OP_STARTSYNC: return "startsync";
			
 
				+	case CEPH_OSD_OP_SETTRUNC: return "settrunc";
			
 
				+	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
			
 
				+
			
 
				+	case CEPH_OSD_OP_TMAPUP: return "tmapup";
			
 
				+	case CEPH_OSD_OP_TMAPGET: return "tmapget";
			
 
				+	case CEPH_OSD_OP_TMAPPUT: return "tmapput";
			
 
				+
			
 
				+	case CEPH_OSD_OP_GETXATTR: return "getxattr";
			
 
				+	case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
			
 
				+	case CEPH_OSD_OP_SETXATTR: return "setxattr";
			
 
				+	case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
			
 
				+	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
			
 
				+	case CEPH_OSD_OP_RMXATTR: return "rmxattr";
			
 
				+	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
			
 
				+
			
 
				+	case CEPH_OSD_OP_PULL: return "pull";
			
 
				+	case CEPH_OSD_OP_PUSH: return "push";
			
 
				+	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
			
 
				+	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
			
 
				+	case CEPH_OSD_OP_SCRUB: return "scrub";
			
 
				+
			
 
				+	case CEPH_OSD_OP_WRLOCK: return "wrlock";
			
 
				+	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
			
 
				+	case CEPH_OSD_OP_RDLOCK: return "rdlock";
			
 
				+	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
			
 
				+	case CEPH_OSD_OP_UPLOCK: return "uplock";
			
 
				+	case CEPH_OSD_OP_DNLOCK: return "dnlock";
			
 
				+
			
 
				+	case CEPH_OSD_OP_CALL: return "call";
			
 
				+
			
 
				+	case CEPH_OSD_OP_PGLS: return "pgls";
			
 
				+	}
			
 
				+	return "???";
			
 
				+}
			
 
				+
			
 
				+
			
 
				+const char *ceph_pool_op_name(int op)
			
 
				+{
			
 
				+	switch (op) {
			
 
				+	case POOL_OP_CREATE: return "create";
			
 
				+	case POOL_OP_DELETE: return "delete";
			
 
				+	case POOL_OP_AUID_CHANGE: return "auid change";
			
 
				+	case POOL_OP_CREATE_SNAP: return "create snap";
			
 
				+	case POOL_OP_DELETE_SNAP: return "delete snap";
			
 
				+	case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
			
 
				+	case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
			
 
				+	}
			
 
				+	return "???";
			
 
				+}
			
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -8,7 +8,7 @@
 
				 # define BUG_ON(x) assert(!(x))
			
 
				 #endif
			
 
				 
			
 
				-#include "crush.h"
			
 
				+#include <linux/crush/crush.h>
			
 
				 
			
 
				 const char *crush_bucket_alg_name(int alg)
			
 
				 {
			
--- a/net/ceph/crush/hash.c
+++ b/net/ceph/crush/hash.c
@@ -1,6 +1,6 @@
 
				 
			
 
				 #include <linux/types.h>
			
 
				-#include "hash.h"
			
 
				+#include <linux/crush/hash.h>
			
 
				 
			
 
				 /*
			
 
				  * Robert Jenkins' function for mixing 32-bit values
			
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -18,8 +18,8 @@
 
				 # define kfree(x) free(x)
			
 
				 #endif
			
 
				 
			
 
				-#include "crush.h"
			
 
				-#include "hash.h"
			
 
				+#include <linux/crush/crush.h>
			
 
				+#include <linux/crush/hash.h>
			
 
				 
			
 
				 /*
			
 
				  * Implement the core CRUSH mapping algorithm.
			
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -1,13 +1,13 @@
 
				 
			
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/scatterlist.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <crypto/hash.h>
			
 
				 
			
 
				+#include <linux/ceph/decode.h>
			
 
				 #include "crypto.h"
			
 
				-#include "decode.h"
			
 
				 
			
 
				 int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
			
 
				 {
			
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -1,8 +1,8 @@
 
				 #ifndef _FS_CEPH_CRYPTO_H
			
 
				 #define _FS_CEPH_CRYPTO_H
			
 
				 
			
 
				-#include "types.h"
			
 
				-#include "buffer.h"
			
 
				+#include <linux/ceph/types.h>
			
 
				+#include <linux/ceph/buffer.h>
			
 
				 
			
 
				 /*
			
 
				  * cryptographic secret
			
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -0,0 +1,267 @@
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				+
			
 
				+#include <linux/device.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/ctype.h>
			
 
				+#include <linux/debugfs.h>
			
 
				+#include <linux/seq_file.h>
			
 
				+
			
 
				+#include <linux/ceph/libceph.h>
			
 
				+#include <linux/ceph/mon_client.h>
			
 
				+#include <linux/ceph/auth.h>
			
 
				+#include <linux/ceph/debugfs.h>
			
 
				+
			
 
				+#ifdef CONFIG_DEBUG_FS
			
 
				+
			
 
				+/*
			
 
				+ * Implement /sys/kernel/debug/ceph fun
			
 
				+ *
			
 
				+ * /sys/kernel/debug/ceph/client*  - an instance of the ceph client
			
 
				+ *      .../osdmap      - current osdmap
			
 
				+ *      .../monmap      - current monmap
			
 
				+ *      .../osdc        - active osd requests
			
 
				+ *      .../monc        - mon client state
			
 
				+ *      .../dentry_lru  - dump contents of dentry lru
			
 
				+ *      .../caps        - expose cap (reservation) stats
			
 
				+ *      .../bdi         - symlink to ../../bdi/something
			
 
				+ */
			
 
				+
			
 
				+static struct dentry *ceph_debugfs_dir;
			
 
				+
			
 
				+static int monmap_show(struct seq_file *s, void *p)
			
 
				+{
			
 
				+	int i;
			
 
				+	struct ceph_client *client = s->private;
			
 
				+
			
 
				+	if (client->monc.monmap == NULL)
			
 
				+		return 0;
			
 
				+
			
 
				+	seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
			
 
				+	for (i = 0; i < client->monc.monmap->num_mon; i++) {
			
 
				+		struct ceph_entity_inst *inst =
			
 
				+			&client->monc.monmap->mon_inst[i];
			
 
				+
			
 
				+		seq_printf(s, "\t%s%lld\t%s\n",
			
 
				+			   ENTITY_NAME(inst->name),
			
 
				+			   ceph_pr_addr(&inst->addr.in_addr));
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int osdmap_show(struct seq_file *s, void *p)
			
 
				+{
			
 
				+	int i;
			
 
				+	struct ceph_client *client = s->private;
			
 
				+	struct rb_node *n;
			
 
				+
			
 
				+	if (client->osdc.osdmap == NULL)
			
 
				+		return 0;
			
 
				+	seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
			
 
				+	seq_printf(s, "flags%s%s\n",
			
 
				+		   (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
			
 
				+		   " NEARFULL" : "",
			
 
				+		   (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
			
 
				+		   " FULL" : "");
			
 
				+	for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
			
 
				+		struct ceph_pg_pool_info *pool =
			
 
				+			rb_entry(n, struct ceph_pg_pool_info, node);
			
 
				+		seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
			
 
				+			   pool->id, pool->v.pg_num, pool->pg_num_mask,
			
 
				+			   pool->v.lpg_num, pool->lpg_num_mask);
			
 
				+	}
			
 
				+	for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
			
 
				+		struct ceph_entity_addr *addr =
			
 
				+			&client->osdc.osdmap->osd_addr[i];
			
 
				+		int state = client->osdc.osdmap->osd_state[i];
			
 
				+		char sb[64];
			
 
				+
			
 
				+		seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
			
 
				+			   i, ceph_pr_addr(&addr->in_addr),
			
 
				+			   ((client->osdc.osdmap->osd_weight[i]*100) >> 16),
			
 
				+			   ceph_osdmap_state_str(sb, sizeof(sb), state));
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int monc_show(struct seq_file *s, void *p)
			
 
				+{
			
 
				+	struct ceph_client *client = s->private;
			
 
				+	struct ceph_mon_generic_request *req;
			
 
				+	struct ceph_mon_client *monc = &client->monc;
			
 
				+	struct rb_node *rp;
			
 
				+
			
 
				+	mutex_lock(&monc->mutex);
			
 
				+
			
 
				+	if (monc->have_mdsmap)
			
 
				+		seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
			
 
				+	if (monc->have_osdmap)
			
 
				+		seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
			
 
				+	if (monc->want_next_osdmap)
			
 
				+		seq_printf(s, "want next osdmap\n");
			
 
				+
			
 
				+	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
			
 
				+		__u16 op;
			
 
				+		req = rb_entry(rp, struct ceph_mon_generic_request, node);
			
 
				+		op = le16_to_cpu(req->request->hdr.type);
			
 
				+		if (op == CEPH_MSG_STATFS)
			
 
				+			seq_printf(s, "%lld statfs\n", req->tid);
			
 
				+		else
			
 
				+			seq_printf(s, "%lld unknown\n", req->tid);
			
 
				+	}
			
 
				+
			
 
				+	mutex_unlock(&monc->mutex);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int osdc_show(struct seq_file *s, void *pp)
			
 
				+{
			
 
				+	struct ceph_client *client = s->private;
			
 
				+	struct ceph_osd_client *osdc = &client->osdc;
			
 
				+	struct rb_node *p;
			
 
				+
			
 
				+	mutex_lock(&osdc->request_mutex);
			
 
				+	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
			
 
				+		struct ceph_osd_request *req;
			
 
				+		struct ceph_osd_request_head *head;
			
 
				+		struct ceph_osd_op *op;
			
 
				+		int num_ops;
			
 
				+		int opcode, olen;
			
 
				+		int i;
			
 
				+
			
 
				+		req = rb_entry(p, struct ceph_osd_request, r_node);
			
 
				+
			
 
				+		seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
			
 
				+			   req->r_osd ? req->r_osd->o_osd : -1,
			
 
				+			   le32_to_cpu(req->r_pgid.pool),
			
 
				+			   le16_to_cpu(req->r_pgid.ps));
			
 
				+
			
 
				+		head = req->r_request->front.iov_base;
			
 
				+		op = (void *)(head + 1);
			
 
				+
			
 
				+		num_ops = le16_to_cpu(head->num_ops);
			
 
				+		olen = le32_to_cpu(head->object_len);
			
 
				+		seq_printf(s, "%.*s", olen,
			
 
				+			   (const char *)(head->ops + num_ops));
			
 
				+
			
 
				+		if (req->r_reassert_version.epoch)
			
 
				+			seq_printf(s, "\t%u'%llu",
			
 
				+			   (unsigned)le32_to_cpu(req->r_reassert_version.epoch),
			
 
				+			   le64_to_cpu(req->r_reassert_version.version));
			
 
				+		else
			
 
				+			seq_printf(s, "\t");
			
 
				+
			
 
				+		for (i = 0; i < num_ops; i++) {
			
 
				+			opcode = le16_to_cpu(op->op);
			
 
				+			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
			
 
				+			op++;
			
 
				+		}
			
 
				+
			
 
				+		seq_printf(s, "\n");
			
 
				+	}
			
 
				+	mutex_unlock(&osdc->request_mutex);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+CEPH_DEFINE_SHOW_FUNC(monmap_show)
			
 
				+CEPH_DEFINE_SHOW_FUNC(osdmap_show)
			
 
				+CEPH_DEFINE_SHOW_FUNC(monc_show)
			
 
				+CEPH_DEFINE_SHOW_FUNC(osdc_show)
			
 
				+
			
 
				+int ceph_debugfs_init(void)
			
 
				+{
			
 
				+	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
			
 
				+	if (!ceph_debugfs_dir)
			
 
				+		return -ENOMEM;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void ceph_debugfs_cleanup(void)
			
 
				+{
			
 
				+	debugfs_remove(ceph_debugfs_dir);
			
 
				+}
			
 
				+
			
 
				+int ceph_debugfs_client_init(struct ceph_client *client)
			
 
				+{
			
 
				+	int ret = -ENOMEM;
			
 
				+	char name[80];
			
 
				+
			
 
				+	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
			
 
				+		 client->monc.auth->global_id);
			
 
				+
			
 
				+	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
			
 
				+	if (!client->debugfs_dir)
			
 
				+		goto out;
			
 
				+
			
 
				+	client->monc.debugfs_file = debugfs_create_file("monc",
			
 
				+						      0600,
			
 
				+						      client->debugfs_dir,
			
 
				+						      client,
			
 
				+						      &monc_show_fops);
			
 
				+	if (!client->monc.debugfs_file)
			
 
				+		goto out;
			
 
				+
			
 
				+	client->osdc.debugfs_file = debugfs_create_file("osdc",
			
 
				+						      0600,
			
 
				+						      client->debugfs_dir,
			
 
				+						      client,
			
 
				+						      &osdc_show_fops);
			
 
				+	if (!client->osdc.debugfs_file)
			
 
				+		goto out;
			
 
				+
			
 
				+	client->debugfs_monmap = debugfs_create_file("monmap",
			
 
				+					0600,
			
 
				+					client->debugfs_dir,
			
 
				+					client,
			
 
				+					&monmap_show_fops);
			
 
				+	if (!client->debugfs_monmap)
			
 
				+		goto out;
			
 
				+
			
 
				+	client->debugfs_osdmap = debugfs_create_file("osdmap",
			
 
				+					0600,
			
 
				+					client->debugfs_dir,
			
 
				+					client,
			
 
				+					&osdmap_show_fops);
			
 
				+	if (!client->debugfs_osdmap)
			
 
				+		goto out;
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+out:
			
 
				+	ceph_debugfs_client_cleanup(client);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+void ceph_debugfs_client_cleanup(struct ceph_client *client)
			
 
				+{
			
 
				+	debugfs_remove(client->debugfs_osdmap);
			
 
				+	debugfs_remove(client->debugfs_monmap);
			
 
				+	debugfs_remove(client->osdc.debugfs_file);
			
 
				+	debugfs_remove(client->monc.debugfs_file);
			
 
				+	debugfs_remove(client->debugfs_dir);
			
 
				+}
			
 
				+
			
 
				+#else  /* CONFIG_DEBUG_FS */
			
 
				+
			
 
				+int ceph_debugfs_init(void)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void ceph_debugfs_cleanup(void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+int ceph_debugfs_client_init(struct ceph_client *client)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void ceph_debugfs_client_cleanup(struct ceph_client *client)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+#endif  /* CONFIG_DEBUG_FS */
			
 
				+
			
 
				+EXPORT_SYMBOL(ceph_debugfs_init);
			
 
				+EXPORT_SYMBOL(ceph_debugfs_cleanup);
			
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1,4 +1,4 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/crc32c.h>
			
 
				 #include <linux/ctype.h>
			
@@ -9,12 +9,14 @@
 
				 #include <linux/slab.h>
			
 
				 #include <linux/socket.h>
			
 
				 #include <linux/string.h>
			
 
				+#include <linux/bio.h>
			
 
				+#include <linux/blkdev.h>
			
 
				 #include <net/tcp.h>
			
 
				 
			
 
				-#include "super.h"
			
 
				-#include "messenger.h"
			
 
				-#include "decode.h"
			
 
				-#include "pagelist.h"
			
 
				+#include <linux/ceph/libceph.h>
			
 
				+#include <linux/ceph/messenger.h>
			
 
				+#include <linux/ceph/decode.h>
			
 
				+#include <linux/ceph/pagelist.h>
			
 
				 
			
 
				 /*
			
 
				  * Ceph uses the messenger to exchange ceph_msg messages with other
			
@@ -48,7 +50,7 @@ static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
 
				 static DEFINE_SPINLOCK(addr_str_lock);
			
 
				 static int last_addr_str;
			
 
				 
			
 
				-const char *pr_addr(const struct sockaddr_storage *ss)
			
 
				+const char *ceph_pr_addr(const struct sockaddr_storage *ss)
			
 
				 {
			
 
				 	int i;
			
 
				 	char *s;
			
@@ -79,6 +81,7 @@ const char *pr_addr(const struct sockaddr_storage *ss)
 
				 
			
 
				 	return s;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_pr_addr);
			
 
				 
			
 
				 static void encode_my_addr(struct ceph_messenger *msgr)
			
 
				 {
			
@@ -91,7 +94,7 @@ static void encode_my_addr(struct ceph_messenger *msgr)
 
				  */
			
 
				 struct workqueue_struct *ceph_msgr_wq;
			
 
				 
			
 
				-int __init ceph_msgr_init(void)
			
 
				+int ceph_msgr_init(void)
			
 
				 {
			
 
				 	ceph_msgr_wq = create_workqueue("ceph-msgr");
			
 
				 	if (IS_ERR(ceph_msgr_wq)) {
			
@@ -102,16 +105,19 @@ int __init ceph_msgr_init(void)
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_msgr_init);
			
 
				 
			
 
				 void ceph_msgr_exit(void)
			
 
				 {
			
 
				 	destroy_workqueue(ceph_msgr_wq);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_msgr_exit);
			
 
				 
			
 
				 void ceph_msgr_flush(void)
			
 
				 {
			
 
				 	flush_workqueue(ceph_msgr_wq);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_msgr_flush);
			
 
				 
			
 
				 
			
 
				 /*
			
@@ -221,19 +227,19 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
 
				 
			
 
				 	set_sock_callbacks(sock, con);
			
 
				 
			
 
				-	dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
			
 
				+	dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
			
 
				 
			
 
				 	ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
			
 
				 				 O_NONBLOCK);
			
 
				 	if (ret == -EINPROGRESS) {
			
 
				 		dout("connect %s EINPROGRESS sk_state = %u\n",
			
 
				-		     pr_addr(&con->peer_addr.in_addr),
			
 
				+		     ceph_pr_addr(&con->peer_addr.in_addr),
			
 
				 		     sock->sk->sk_state);
			
 
				 		ret = 0;
			
 
				 	}
			
 
				 	if (ret < 0) {
			
 
				 		pr_err("connect %s error %d\n",
			
 
				-		       pr_addr(&con->peer_addr.in_addr), ret);
			
 
				+		       ceph_pr_addr(&con->peer_addr.in_addr), ret);
			
 
				 		sock_release(sock);
			
 
				 		con->sock = NULL;
			
 
				 		con->error_msg = "connect error";
			
@@ -334,7 +340,8 @@ static void reset_connection(struct ceph_connection *con)
 
				  */
			
 
				 void ceph_con_close(struct ceph_connection *con)
			
 
				 {
			
 
				-	dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr));
			
 
				+	dout("con_close %p peer %s\n", con,
			
 
				+	     ceph_pr_addr(&con->peer_addr.in_addr));
			
 
				 	set_bit(CLOSED, &con->state);  /* in case there's queued work */
			
 
				 	clear_bit(STANDBY, &con->state);  /* avoid connect_seq bump */
			
 
				 	clear_bit(LOSSYTX, &con->state);  /* so we retry next connect */
			
@@ -347,19 +354,21 @@ void ceph_con_close(struct ceph_connection *con)
 
				 	mutex_unlock(&con->mutex);
			
 
				 	queue_con(con);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_con_close);
			
 
				 
			
 
				 /*
			
 
				  * Reopen a closed connection, with a new peer address.
			
 
				  */
			
 
				 void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
			
 
				 {
			
 
				-	dout("con_open %p %s\n", con, pr_addr(&addr->in_addr));
			
 
				+	dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
			
 
				 	set_bit(OPENING, &con->state);
			
 
				 	clear_bit(CLOSED, &con->state);
			
 
				 	memcpy(&con->peer_addr, addr, sizeof(*addr));
			
 
				 	con->delay = 0;      /* reset backoff memory */
			
 
				 	queue_con(con);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_con_open);
			
 
				 
			
 
				 /*
			
 
				  * return true if this connection ever successfully opened
			
@@ -406,6 +415,7 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
 
				 	INIT_LIST_HEAD(&con->out_sent);
			
 
				 	INIT_DELAYED_WORK(&con->work, con_work);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_con_init);
			
 
				 
			
 
				 
			
 
				 /*
			
@@ -529,8 +539,11 @@ static void prepare_write_message(struct ceph_connection *con)
 
				 	if (le32_to_cpu(m->hdr.data_len) > 0) {
			
 
				 		/* initialize page iterator */
			
 
				 		con->out_msg_pos.page = 0;
			
 
				-		con->out_msg_pos.page_pos =
			
 
				-			le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
			
 
				+		if (m->pages)
			
 
				+			con->out_msg_pos.page_pos =
			
 
				+				le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK;
			
 
				+		else
			
 
				+			con->out_msg_pos.page_pos = 0;
			
 
				 		con->out_msg_pos.data_pos = 0;
			
 
				 		con->out_msg_pos.did_page_crc = 0;
			
 
				 		con->out_more = 1;  /* data + footer will follow */
			
@@ -647,7 +660,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
 
				 	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
			
 
				 	     con->connect_seq, global_seq, proto);
			
 
				 
			
 
				-	con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED);
			
 
				+	con->out_connect.features = cpu_to_le64(msgr->supported_features);
			
 
				 	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
			
 
				 	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
			
 
				 	con->out_connect.global_seq = cpu_to_le32(global_seq);
			
@@ -712,6 +725,31 @@ out:
 
				 	return ret;  /* done! */
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
			
 
				+{
			
 
				+	if (!bio) {
			
 
				+		*iter = NULL;
			
 
				+		*seg = 0;
			
 
				+		return;
			
 
				+	}
			
 
				+	*iter = bio;
			
 
				+	*seg = bio->bi_idx;
			
 
				+}
			
 
				+
			
 
				+static void iter_bio_next(struct bio **bio_iter, int *seg)
			
 
				+{
			
 
				+	if (*bio_iter == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
			
 
				+
			
 
				+	(*seg)++;
			
 
				+	if (*seg == (*bio_iter)->bi_vcnt)
			
 
				+		init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * Write as much message data payload as we can.  If we finish, queue
			
 
				  * up the footer.
			
@@ -726,21 +764,46 @@ static int write_partial_msg_pages(struct ceph_connection *con)
 
				 	size_t len;
			
 
				 	int crc = con->msgr->nocrc;
			
 
				 	int ret;
			
 
				+	int total_max_write;
			
 
				+	int in_trail = 0;
			
 
				+	size_t trail_len = (msg->trail ? msg->trail->length : 0);
			
 
				 
			
 
				 	dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
			
 
				 	     con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
			
 
				 	     con->out_msg_pos.page_pos);
			
 
				 
			
 
				-	while (con->out_msg_pos.page < con->out_msg->nr_pages) {
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+	if (msg->bio && !msg->bio_iter)
			
 
				+		init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
			
 
				+#endif
			
 
				+
			
 
				+	while (data_len > con->out_msg_pos.data_pos) {
			
 
				 		struct page *page = NULL;
			
 
				 		void *kaddr = NULL;
			
 
				+		int max_write = PAGE_SIZE;
			
 
				+		int page_shift = 0;
			
 
				+
			
 
				+		total_max_write = data_len - trail_len -
			
 
				+			con->out_msg_pos.data_pos;
			
 
				 
			
 
				 		/*
			
 
				 		 * if we are calculating the data crc (the default), we need
			
 
				 		 * to map the page.  if our pages[] has been revoked, use the
			
 
				 		 * zero page.
			
 
				 		 */
			
 
				-		if (msg->pages) {
			
 
				+
			
 
				+		/* have we reached the trail part of the data? */
			
 
				+		if (con->out_msg_pos.data_pos >= data_len - trail_len) {
			
 
				+			in_trail = 1;
			
 
				+
			
 
				+			total_max_write = data_len - con->out_msg_pos.data_pos;
			
 
				+
			
 
				+			page = list_first_entry(&msg->trail->head,
			
 
				+						struct page, lru);
			
 
				+			if (crc)
			
 
				+				kaddr = kmap(page);
			
 
				+			max_write = PAGE_SIZE;
			
 
				+		} else if (msg->pages) {
			
 
				 			page = msg->pages[con->out_msg_pos.page];
			
 
				 			if (crc)
			
 
				 				kaddr = kmap(page);
			
@@ -749,13 +812,25 @@ static int write_partial_msg_pages(struct ceph_connection *con)
 
				 						struct page, lru);
			
 
				 			if (crc)
			
 
				 				kaddr = kmap(page);
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+		} else if (msg->bio) {
			
 
				+			struct bio_vec *bv;
			
 
				+
			
 
				+			bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg);
			
 
				+			page = bv->bv_page;
			
 
				+			page_shift = bv->bv_offset;
			
 
				+			if (crc)
			
 
				+				kaddr = kmap(page) + page_shift;
			
 
				+			max_write = bv->bv_len;
			
 
				+#endif
			
 
				 		} else {
			
 
				 			page = con->msgr->zero_page;
			
 
				 			if (crc)
			
 
				 				kaddr = page_address(con->msgr->zero_page);
			
 
				 		}
			
 
				-		len = min((int)(PAGE_SIZE - con->out_msg_pos.page_pos),
			
 
				-			  (int)(data_len - con->out_msg_pos.data_pos));
			
 
				+		len = min_t(int, max_write - con->out_msg_pos.page_pos,
			
 
				+			    total_max_write);
			
 
				+
			
 
				 		if (crc && !con->out_msg_pos.did_page_crc) {
			
 
				 			void *base = kaddr + con->out_msg_pos.page_pos;
			
 
				 			u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
			
@@ -765,13 +840,14 @@ static int write_partial_msg_pages(struct ceph_connection *con)
 
				 				cpu_to_le32(crc32c(tmpcrc, base, len));
			
 
				 			con->out_msg_pos.did_page_crc = 1;
			
 
				 		}
			
 
				-
			
 
				 		ret = kernel_sendpage(con->sock, page,
			
 
				-				      con->out_msg_pos.page_pos, len,
			
 
				+				      con->out_msg_pos.page_pos + page_shift,
			
 
				+				      len,
			
 
				 				      MSG_DONTWAIT | MSG_NOSIGNAL |
			
 
				 				      MSG_MORE);
			
 
				 
			
 
				-		if (crc && (msg->pages || msg->pagelist))
			
 
				+		if (crc &&
			
 
				+		    (msg->pages || msg->pagelist || msg->bio || in_trail))
			
 
				 			kunmap(page);
			
 
				 
			
 
				 		if (ret <= 0)
			
@@ -783,9 +859,16 @@ static int write_partial_msg_pages(struct ceph_connection *con)
 
				 			con->out_msg_pos.page_pos = 0;
			
 
				 			con->out_msg_pos.page++;
			
 
				 			con->out_msg_pos.did_page_crc = 0;
			
 
				-			if (msg->pagelist)
			
 
				+			if (in_trail)
			
 
				+				list_move_tail(&page->lru,
			
 
				+					       &msg->trail->head);
			
 
				+			else if (msg->pagelist)
			
 
				 				list_move_tail(&page->lru,
			
 
				 					       &msg->pagelist->head);
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+			else if (msg->bio)
			
 
				+				iter_bio_next(&msg->bio_iter, &msg->bio_seg);
			
 
				+#endif
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -938,7 +1021,7 @@ static int verify_hello(struct ceph_connection *con)
 
				 {
			
 
				 	if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
			
 
				 		pr_err("connect to %s got bad banner\n",
			
 
				-		       pr_addr(&con->peer_addr.in_addr));
			
 
				+		       ceph_pr_addr(&con->peer_addr.in_addr));
			
 
				 		con->error_msg = "protocol error, bad banner";
			
 
				 		return -1;
			
 
				 	}
			
@@ -1041,7 +1124,7 @@ int ceph_parse_ips(const char *c, const char *end,
 
				 
			
 
				 		addr_set_port(ss, port);
			
 
				 
			
 
				-		dout("parse_ips got %s\n", pr_addr(ss));
			
 
				+		dout("parse_ips got %s\n", ceph_pr_addr(ss));
			
 
				 
			
 
				 		if (p == end)
			
 
				 			break;
			
@@ -1061,6 +1144,7 @@ bad:
 
				 	pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
			
 
				 	return -EINVAL;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_parse_ips);
			
 
				 
			
 
				 static int process_banner(struct ceph_connection *con)
			
 
				 {
			
@@ -1082,9 +1166,9 @@ static int process_banner(struct ceph_connection *con)
 
				 	    !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
			
 
				 	      con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
			
 
				 		pr_warning("wrong peer, want %s/%d, got %s/%d\n",
			
 
				-			   pr_addr(&con->peer_addr.in_addr),
			
 
				+			   ceph_pr_addr(&con->peer_addr.in_addr),
			
 
				 			   (int)le32_to_cpu(con->peer_addr.nonce),
			
 
				-			   pr_addr(&con->actual_peer_addr.in_addr),
			
 
				+			   ceph_pr_addr(&con->actual_peer_addr.in_addr),
			
 
				 			   (int)le32_to_cpu(con->actual_peer_addr.nonce));
			
 
				 		con->error_msg = "wrong peer at address";
			
 
				 		return -1;
			
@@ -1102,7 +1186,7 @@ static int process_banner(struct ceph_connection *con)
 
				 		addr_set_port(&con->msgr->inst.addr.in_addr, port);
			
 
				 		encode_my_addr(con->msgr);
			
 
				 		dout("process_banner learned my addr is %s\n",
			
 
				-		     pr_addr(&con->msgr->inst.addr.in_addr));
			
 
				+		     ceph_pr_addr(&con->msgr->inst.addr.in_addr));
			
 
				 	}
			
 
				 
			
 
				 	set_bit(NEGOTIATING, &con->state);
			
@@ -1123,8 +1207,8 @@ static void fail_protocol(struct ceph_connection *con)
 
				 
			
 
				 static int process_connect(struct ceph_connection *con)
			
 
				 {
			
 
				-	u64 sup_feat = CEPH_FEATURE_SUPPORTED;
			
 
				-	u64 req_feat = CEPH_FEATURE_REQUIRED;
			
 
				+	u64 sup_feat = con->msgr->supported_features;
			
 
				+	u64 req_feat = con->msgr->required_features;
			
 
				 	u64 server_feat = le64_to_cpu(con->in_reply.features);
			
 
				 
			
 
				 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
			
@@ -1134,7 +1218,7 @@ static int process_connect(struct ceph_connection *con)
 
				 		pr_err("%s%lld %s feature set mismatch,"
			
 
				 		       " my %llx < server's %llx, missing %llx\n",
			
 
				 		       ENTITY_NAME(con->peer_name),
			
 
				-		       pr_addr(&con->peer_addr.in_addr),
			
 
				+		       ceph_pr_addr(&con->peer_addr.in_addr),
			
 
				 		       sup_feat, server_feat, server_feat & ~sup_feat);
			
 
				 		con->error_msg = "missing required protocol features";
			
 
				 		fail_protocol(con);
			
@@ -1144,7 +1228,7 @@ static int process_connect(struct ceph_connection *con)
 
				 		pr_err("%s%lld %s protocol version mismatch,"
			
 
				 		       " my %d != server's %d\n",
			
 
				 		       ENTITY_NAME(con->peer_name),
			
 
				-		       pr_addr(&con->peer_addr.in_addr),
			
 
				+		       ceph_pr_addr(&con->peer_addr.in_addr),
			
 
				 		       le32_to_cpu(con->out_connect.protocol_version),
			
 
				 		       le32_to_cpu(con->in_reply.protocol_version));
			
 
				 		con->error_msg = "protocol version mismatch";
			
@@ -1178,7 +1262,7 @@ static int process_connect(struct ceph_connection *con)
 
				 		     le32_to_cpu(con->in_connect.connect_seq));
			
 
				 		pr_err("%s%lld %s connection reset\n",
			
 
				 		       ENTITY_NAME(con->peer_name),
			
 
				-		       pr_addr(&con->peer_addr.in_addr));
			
 
				+		       ceph_pr_addr(&con->peer_addr.in_addr));
			
 
				 		reset_connection(con);
			
 
				 		prepare_write_connect(con->msgr, con, 0);
			
 
				 		prepare_read_connect(con);
			
@@ -1223,7 +1307,7 @@ static int process_connect(struct ceph_connection *con)
 
				 			pr_err("%s%lld %s protocol feature mismatch,"
			
 
				 			       " my required %llx > server's %llx, need %llx\n",
			
 
				 			       ENTITY_NAME(con->peer_name),
			
 
				-			       pr_addr(&con->peer_addr.in_addr),
			
 
				+			       ceph_pr_addr(&con->peer_addr.in_addr),
			
 
				 			       req_feat, server_feat, req_feat & ~server_feat);
			
 
				 			con->error_msg = "missing required protocol features";
			
 
				 			fail_protocol(con);
			
@@ -1305,8 +1389,7 @@ static int read_partial_message_section(struct ceph_connection *con,
 
				 					struct kvec *section,
			
 
				 					unsigned int sec_len, u32 *crc)
			
 
				 {
			
 
				-	int left;
			
 
				-	int ret;
			
 
				+	int ret, left;
			
 
				 
			
 
				 	BUG_ON(!section);
			
 
				 
			
@@ -1329,13 +1412,83 @@ static int read_partial_message_section(struct ceph_connection *con,
 
				 static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
			
 
				 				struct ceph_msg_header *hdr,
			
 
				 				int *skip);
			
 
				+
			
 
				+
			
 
				+static int read_partial_message_pages(struct ceph_connection *con,
			
 
				+				      struct page **pages,
			
 
				+				      unsigned data_len, int datacrc)
			
 
				+{
			
 
				+	void *p;
			
 
				+	int ret;
			
 
				+	int left;
			
 
				+
			
 
				+	left = min((int)(data_len - con->in_msg_pos.data_pos),
			
 
				+		   (int)(PAGE_SIZE - con->in_msg_pos.page_pos));
			
 
				+	/* (page) data */
			
 
				+	BUG_ON(pages == NULL);
			
 
				+	p = kmap(pages[con->in_msg_pos.page]);
			
 
				+	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
			
 
				+			       left);
			
 
				+	if (ret > 0 && datacrc)
			
 
				+		con->in_data_crc =
			
 
				+			crc32c(con->in_data_crc,
			
 
				+				  p + con->in_msg_pos.page_pos, ret);
			
 
				+	kunmap(pages[con->in_msg_pos.page]);
			
 
				+	if (ret <= 0)
			
 
				+		return ret;
			
 
				+	con->in_msg_pos.data_pos += ret;
			
 
				+	con->in_msg_pos.page_pos += ret;
			
 
				+	if (con->in_msg_pos.page_pos == PAGE_SIZE) {
			
 
				+		con->in_msg_pos.page_pos = 0;
			
 
				+		con->in_msg_pos.page++;
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+static int read_partial_message_bio(struct ceph_connection *con,
			
 
				+				    struct bio **bio_iter, int *bio_seg,
			
 
				+				    unsigned data_len, int datacrc)
			
 
				+{
			
 
				+	struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
			
 
				+	void *p;
			
 
				+	int ret, left;
			
 
				+
			
 
				+	if (IS_ERR(bv))
			
 
				+		return PTR_ERR(bv);
			
 
				+
			
 
				+	left = min((int)(data_len - con->in_msg_pos.data_pos),
			
 
				+		   (int)(bv->bv_len - con->in_msg_pos.page_pos));
			
 
				+
			
 
				+	p = kmap(bv->bv_page) + bv->bv_offset;
			
 
				+
			
 
				+	ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
			
 
				+			       left);
			
 
				+	if (ret > 0 && datacrc)
			
 
				+		con->in_data_crc =
			
 
				+			crc32c(con->in_data_crc,
			
 
				+				  p + con->in_msg_pos.page_pos, ret);
			
 
				+	kunmap(bv->bv_page);
			
 
				+	if (ret <= 0)
			
 
				+		return ret;
			
 
				+	con->in_msg_pos.data_pos += ret;
			
 
				+	con->in_msg_pos.page_pos += ret;
			
 
				+	if (con->in_msg_pos.page_pos == bv->bv_len) {
			
 
				+		con->in_msg_pos.page_pos = 0;
			
 
				+		iter_bio_next(bio_iter, bio_seg);
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * read (part of) a message.
			
 
				  */
			
 
				 static int read_partial_message(struct ceph_connection *con)
			
 
				 {
			
 
				 	struct ceph_msg *m = con->in_msg;
			
 
				-	void *p;
			
 
				 	int ret;
			
 
				 	int to, left;
			
 
				 	unsigned front_len, middle_len, data_len, data_off;
			
@@ -1381,7 +1534,7 @@ static int read_partial_message(struct ceph_connection *con)
 
				 	if ((s64)seq - (s64)con->in_seq < 1) {
			
 
				 		pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
			
 
				 			ENTITY_NAME(con->peer_name),
			
 
				-			pr_addr(&con->peer_addr.in_addr),
			
 
				+			ceph_pr_addr(&con->peer_addr.in_addr),
			
 
				 			seq, con->in_seq + 1);
			
 
				 		con->in_base_pos = -front_len - middle_len - data_len -
			
 
				 			sizeof(m->footer);
			
@@ -1422,7 +1575,10 @@ static int read_partial_message(struct ceph_connection *con)
 
				 			m->middle->vec.iov_len = 0;
			
 
				 
			
 
				 		con->in_msg_pos.page = 0;
			
 
				-		con->in_msg_pos.page_pos = data_off & ~PAGE_MASK;
			
 
				+		if (m->pages)
			
 
				+			con->in_msg_pos.page_pos = data_off & ~PAGE_MASK;
			
 
				+		else
			
 
				+			con->in_msg_pos.page_pos = 0;
			
 
				 		con->in_msg_pos.data_pos = 0;
			
 
				 	}
			
 
				 
			
@@ -1440,27 +1596,29 @@ static int read_partial_message(struct ceph_connection *con)
 
				 		if (ret <= 0)
			
 
				 			return ret;
			
 
				 	}
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+	if (m->bio && !m->bio_iter)
			
 
				+		init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
			
 
				+#endif
			
 
				 
			
 
				 	/* (page) data */
			
 
				 	while (con->in_msg_pos.data_pos < data_len) {
			
 
				-		left = min((int)(data_len - con->in_msg_pos.data_pos),
			
 
				-			   (int)(PAGE_SIZE - con->in_msg_pos.page_pos));
			
 
				-		BUG_ON(m->pages == NULL);
			
 
				-		p = kmap(m->pages[con->in_msg_pos.page]);
			
 
				-		ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
			
 
				-				       left);
			
 
				-		if (ret > 0 && datacrc)
			
 
				-			con->in_data_crc =
			
 
				-				crc32c(con->in_data_crc,
			
 
				-					  p + con->in_msg_pos.page_pos, ret);
			
 
				-		kunmap(m->pages[con->in_msg_pos.page]);
			
 
				-		if (ret <= 0)
			
 
				-			return ret;
			
 
				-		con->in_msg_pos.data_pos += ret;
			
 
				-		con->in_msg_pos.page_pos += ret;
			
 
				-		if (con->in_msg_pos.page_pos == PAGE_SIZE) {
			
 
				-			con->in_msg_pos.page_pos = 0;
			
 
				-			con->in_msg_pos.page++;
			
 
				+		if (m->pages) {
			
 
				+			ret = read_partial_message_pages(con, m->pages,
			
 
				+						 data_len, datacrc);
			
 
				+			if (ret <= 0)
			
 
				+				return ret;
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+		} else if (m->bio) {
			
 
				+
			
 
				+			ret = read_partial_message_bio(con,
			
 
				+						 &m->bio_iter, &m->bio_seg,
			
 
				+						 data_len, datacrc);
			
 
				+			if (ret <= 0)
			
 
				+				return ret;
			
 
				+#endif
			
 
				+		} else {
			
 
				+			BUG_ON(1);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1874,9 +2032,9 @@ out:
 
				 static void ceph_fault(struct ceph_connection *con)
			
 
				 {
			
 
				 	pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
			
 
				-	       pr_addr(&con->peer_addr.in_addr), con->error_msg);
			
 
				+	       ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
			
 
				 	dout("fault %p state %lu to peer %s\n",
			
 
				-	     con, con->state, pr_addr(&con->peer_addr.in_addr));
			
 
				+	     con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
			
 
				 
			
 
				 	if (test_bit(LOSSYTX, &con->state)) {
			
 
				 		dout("fault on LOSSYTX channel\n");
			
@@ -1936,7 +2094,9 @@ out:
 
				 /*
			
 
				  * create a new messenger instance
			
 
				  */
			
 
				-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
			
 
				+struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
			
 
				+					     u32 supported_features,
			
 
				+					     u32 required_features)
			
 
				 {
			
 
				 	struct ceph_messenger *msgr;
			
 
				 
			
@@ -1944,6 +2104,9 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
 
				 	if (msgr == NULL)
			
 
				 		return ERR_PTR(-ENOMEM);
			
 
				 
			
 
				+	msgr->supported_features = supported_features;
			
 
				+	msgr->required_features = required_features;
			
 
				+
			
 
				 	spin_lock_init(&msgr->global_seq_lock);
			
 
				 
			
 
				 	/* the zero page is needed if a request is "canceled" while the message
			
@@ -1966,6 +2129,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr)
 
				 	dout("messenger_create %p\n", msgr);
			
 
				 	return msgr;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_messenger_create);
			
 
				 
			
 
				 void ceph_messenger_destroy(struct ceph_messenger *msgr)
			
 
				 {
			
@@ -1975,6 +2139,7 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr)
 
				 	kfree(msgr);
			
 
				 	dout("destroyed messenger %p\n", msgr);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_messenger_destroy);
			
 
				 
			
 
				 /*
			
 
				  * Queue up an outgoing message on the given connection.
			
@@ -2011,6 +2176,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
 
				 	if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
			
 
				 		queue_con(con);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_con_send);
			
 
				 
			
 
				 /*
			
 
				  * Revoke a message that was previously queued for send
			
@@ -2076,6 +2242,7 @@ void ceph_con_keepalive(struct ceph_connection *con)
 
				 	    test_and_set_bit(WRITE_PENDING, &con->state) == 0)
			
 
				 		queue_con(con);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_con_keepalive);
			
 
				 
			
 
				 
			
 
				 /*
			
@@ -2136,6 +2303,10 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
 
				 	m->nr_pages = 0;
			
 
				 	m->pages = NULL;
			
 
				 	m->pagelist = NULL;
			
 
				+	m->bio = NULL;
			
 
				+	m->bio_iter = NULL;
			
 
				+	m->bio_seg = 0;
			
 
				+	m->trail = NULL;
			
 
				 
			
 
				 	dout("ceph_msg_new %p front %d\n", m, front_len);
			
 
				 	return m;
			
@@ -2146,6 +2317,7 @@ out:
 
				 	pr_err("msg_new can't create type %d front %d\n", type, front_len);
			
 
				 	return NULL;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_msg_new);
			
 
				 
			
 
				 /*
			
 
				  * Allocate "middle" portion of a message, if it is needed and wasn't
			
@@ -2250,11 +2422,14 @@ void ceph_msg_last_put(struct kref *kref)
 
				 		m->pagelist = NULL;
			
 
				 	}
			
 
				 
			
 
				+	m->trail = NULL;
			
 
				+
			
 
				 	if (m->pool)
			
 
				 		ceph_msgpool_put(m->pool, m);
			
 
				 	else
			
 
				 		ceph_msg_kfree(m);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_msg_last_put);
			
 
				 
			
 
				 void ceph_msg_dump(struct ceph_msg *msg)
			
 
				 {
			
@@ -2275,3 +2450,4 @@ void ceph_msg_dump(struct ceph_msg *msg)
 
				 		       DUMP_PREFIX_OFFSET, 16, 1,
			
 
				 		       &msg->footer, sizeof(msg->footer), true);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_msg_dump);
			
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1,14 +1,16 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				+#include <linux/module.h>
			
 
				 #include <linux/types.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/random.h>
			
 
				 #include <linux/sched.h>
			
 
				 
			
 
				-#include "mon_client.h"
			
 
				-#include "super.h"
			
 
				-#include "auth.h"
			
 
				-#include "decode.h"
			
 
				+#include <linux/ceph/mon_client.h>
			
 
				+#include <linux/ceph/libceph.h>
			
 
				+#include <linux/ceph/decode.h>
			
 
				+
			
 
				+#include <linux/ceph/auth.h>
			
 
				 
			
 
				 /*
			
 
				  * Interact with Ceph monitor cluster.  Handle requests for new map
			
@@ -74,7 +76,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
 
				 	     m->num_mon);
			
 
				 	for (i = 0; i < m->num_mon; i++)
			
 
				 		dout("monmap_decode  mon%d is %s\n", i,
			
 
				-		     pr_addr(&m->mon_inst[i].addr.in_addr));
			
 
				+		     ceph_pr_addr(&m->mon_inst[i].addr.in_addr));
			
 
				 	return m;
			
 
				 
			
 
				 bad:
			
@@ -191,30 +193,33 @@ static void __send_subscribe(struct ceph_mon_client *monc)
 
				 		struct ceph_msg *msg = monc->m_subscribe;
			
 
				 		struct ceph_mon_subscribe_item *i;
			
 
				 		void *p, *end;
			
 
				+		int num;
			
 
				 
			
 
				 		p = msg->front.iov_base;
			
 
				 		end = p + msg->front_max;
			
 
				 
			
 
				-		dout("__send_subscribe to 'mdsmap' %u+\n",
			
 
				-		     (unsigned)monc->have_mdsmap);
			
 
				+		num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
			
 
				+		ceph_encode_32(&p, num);
			
 
				+
			
 
				 		if (monc->want_next_osdmap) {
			
 
				 			dout("__send_subscribe to 'osdmap' %u\n",
			
 
				 			     (unsigned)monc->have_osdmap);
			
 
				-			ceph_encode_32(&p, 3);
			
 
				 			ceph_encode_string(&p, end, "osdmap", 6);
			
 
				 			i = p;
			
 
				 			i->have = cpu_to_le64(monc->have_osdmap);
			
 
				 			i->onetime = 1;
			
 
				 			p += sizeof(*i);
			
 
				 			monc->want_next_osdmap = 2;  /* requested */
			
 
				-		} else {
			
 
				-			ceph_encode_32(&p, 2);
			
 
				 		}
			
 
				-		ceph_encode_string(&p, end, "mdsmap", 6);
			
 
				-		i = p;
			
 
				-		i->have = cpu_to_le64(monc->have_mdsmap);
			
 
				-		i->onetime = 0;
			
 
				-		p += sizeof(*i);
			
 
				+		if (monc->want_mdsmap) {
			
 
				+			dout("__send_subscribe to 'mdsmap' %u+\n",
			
 
				+			     (unsigned)monc->have_mdsmap);
			
 
				+			ceph_encode_string(&p, end, "mdsmap", 6);
			
 
				+			i = p;
			
 
				+			i->have = cpu_to_le64(monc->have_mdsmap);
			
 
				+			i->onetime = 0;
			
 
				+			p += sizeof(*i);
			
 
				+		}
			
 
				 		ceph_encode_string(&p, end, "monmap", 6);
			
 
				 		i = p;
			
 
				 		i->have = 0;
			
@@ -243,7 +248,8 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
 
				 	mutex_lock(&monc->mutex);
			
 
				 	if (monc->hunting) {
			
 
				 		pr_info("mon%d %s session established\n",
			
 
				-			monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr));
			
 
				+			monc->cur_mon,
			
 
				+			ceph_pr_addr(&monc->con->peer_addr.in_addr));
			
 
				 		monc->hunting = false;
			
 
				 	}
			
 
				 	dout("handle_subscribe_ack after %d seconds\n", seconds);
			
@@ -266,6 +272,7 @@ int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
 
				 	mutex_unlock(&monc->mutex);
			
 
				 	return 0;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_monc_got_mdsmap);
			
 
				 
			
 
				 int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
			
 
				 {
			
@@ -310,6 +317,7 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
 
				 	mutex_unlock(&monc->mutex);
			
 
				 	return 0;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_monc_open_session);
			
 
				 
			
 
				 /*
			
 
				  * The monitor responds with mount ack indicate mount success.  The
			
@@ -540,6 +548,7 @@ out:
 
				 	kref_put(&req->kref, release_generic_request);
			
 
				 	return err;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_monc_do_statfs);
			
 
				 
			
 
				 /*
			
 
				  * pool ops
			
@@ -651,6 +660,7 @@ int ceph_monc_create_snapid(struct ceph_mon_client *monc,
 
				 				   pool, 0, (char *)snapid, sizeof(*snapid));
			
 
				 
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_monc_create_snapid);
			
 
				 
			
 
				 int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
			
 
				 			    u32 pool, u64 snapid)
			
@@ -708,9 +718,9 @@ static void delayed_work(struct work_struct *work)
 
				  */
			
 
				 static int build_initial_monmap(struct ceph_mon_client *monc)
			
 
				 {
			
 
				-	struct ceph_mount_args *args = monc->client->mount_args;
			
 
				-	struct ceph_entity_addr *mon_addr = args->mon_addr;
			
 
				-	int num_mon = args->num_mon;
			
 
				+	struct ceph_options *opt = monc->client->options;
			
 
				+	struct ceph_entity_addr *mon_addr = opt->mon_addr;
			
 
				+	int num_mon = opt->num_mon;
			
 
				 	int i;
			
 
				 
			
 
				 	/* build initial monmap */
			
@@ -728,11 +738,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
 
				 	}
			
 
				 	monc->monmap->num_mon = num_mon;
			
 
				 	monc->have_fsid = false;
			
 
				-
			
 
				-	/* release addr memory */
			
 
				-	kfree(args->mon_addr);
			
 
				-	args->mon_addr = NULL;
			
 
				-	args->num_mon = 0;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -753,8 +758,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
 
				 	monc->con = NULL;
			
 
				 
			
 
				 	/* authentication */
			
 
				-	monc->auth = ceph_auth_init(cl->mount_args->name,
			
 
				-				    cl->mount_args->secret);
			
 
				+	monc->auth = ceph_auth_init(cl->options->name,
			
 
				+				    cl->options->secret);
			
 
				 	if (IS_ERR(monc->auth))
			
 
				 		return PTR_ERR(monc->auth);
			
 
				 	monc->auth->want_keys =
			
@@ -808,6 +813,7 @@ out_monmap:
 
				 out:
			
 
				 	return err;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_monc_init);
			
 
				 
			
 
				 void ceph_monc_stop(struct ceph_mon_client *monc)
			
 
				 {
			
@@ -832,6 +838,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
 
				 
			
 
				 	kfree(monc->monmap);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_monc_stop);
			
 
				 
			
 
				 static void handle_auth_reply(struct ceph_mon_client *monc,
			
 
				 			      struct ceph_msg *msg)
			
@@ -889,6 +896,7 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc)
 
				 	mutex_unlock(&monc->mutex);
			
 
				 	return ret;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_monc_validate_auth);
			
 
				 
			
 
				 /*
			
 
				  * handle incoming message
			
@@ -922,15 +930,16 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 
				 		ceph_monc_handle_map(monc, msg);
			
 
				 		break;
			
 
				 
			
 
				-	case CEPH_MSG_MDS_MAP:
			
 
				-		ceph_mdsc_handle_map(&monc->client->mdsc, msg);
			
 
				-		break;
			
 
				-
			
 
				 	case CEPH_MSG_OSD_MAP:
			
 
				 		ceph_osdc_handle_map(&monc->client->osdc, msg);
			
 
				 		break;
			
 
				 
			
 
				 	default:
			
 
				+		/* can the chained handler handle it? */
			
 
				+		if (monc->client->extra_mon_dispatch &&
			
 
				+		    monc->client->extra_mon_dispatch(monc->client, msg) == 0)
			
 
				+			break;
			
 
				+			
			
 
				 		pr_err("received unknown message type %d %s\n", type,
			
 
				 		       ceph_msg_type_name(type));
			
 
				 	}
			
@@ -994,7 +1003,7 @@ static void mon_fault(struct ceph_connection *con)
 
				 	if (monc->con && !monc->hunting)
			
 
				 		pr_info("mon%d %s session lost, "
			
 
				 			"hunting for new mon\n", monc->cur_mon,
			
 
				-			pr_addr(&monc->con->peer_addr.in_addr));
			
 
				+			ceph_pr_addr(&monc->con->peer_addr.in_addr));
			
 
				 
			
 
				 	__close_session(monc);
			
 
				 	if (!monc->hunting) {
			
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -1,11 +1,11 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/sched.h>
			
 
				 #include <linux/types.h>
			
 
				 #include <linux/vmalloc.h>
			
 
				 
			
 
				-#include "msgpool.h"
			
 
				+#include <linux/ceph/msgpool.h>
			
 
				 
			
 
				 static void *alloc_fn(gfp_t gfp_mask, void *arg)
			
 
				 {
			
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1,17 +1,22 @@
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				+#include <linux/module.h>
			
 
				 #include <linux/err.h>
			
 
				 #include <linux/highmem.h>
			
 
				 #include <linux/mm.h>
			
 
				 #include <linux/pagemap.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/uaccess.h>
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+#include <linux/bio.h>
			
 
				+#endif
			
 
				 
			
 
				-#include "super.h"
			
 
				-#include "osd_client.h"
			
 
				-#include "messenger.h"
			
 
				-#include "decode.h"
			
 
				-#include "auth.h"
			
 
				+#include <linux/ceph/libceph.h>
			
 
				+#include <linux/ceph/osd_client.h>
			
 
				+#include <linux/ceph/messenger.h>
			
 
				+#include <linux/ceph/decode.h>
			
 
				+#include <linux/ceph/auth.h>
			
 
				+#include <linux/ceph/pagelist.h>
			
 
				 
			
 
				 #define OSD_OP_FRONT_LEN	4096
			
 
				 #define OSD_OPREPLY_FRONT_LEN	512
			
@@ -22,6 +27,59 @@ static int __kick_requests(struct ceph_osd_client *osdc,
 
				 
			
 
				 static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd);
			
 
				 
			
 
				+static int op_needs_trail(int op)
			
 
				+{
			
 
				+	switch (op) {
			
 
				+	case CEPH_OSD_OP_GETXATTR:
			
 
				+	case CEPH_OSD_OP_SETXATTR:
			
 
				+	case CEPH_OSD_OP_CMPXATTR:
			
 
				+	case CEPH_OSD_OP_CALL:
			
 
				+		return 1;
			
 
				+	default:
			
 
				+		return 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int op_has_extent(int op)
			
 
				+{
			
 
				+	return (op == CEPH_OSD_OP_READ ||
			
 
				+		op == CEPH_OSD_OP_WRITE);
			
 
				+}
			
 
				+
			
 
				+void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
			
 
				+			struct ceph_file_layout *layout,
			
 
				+			u64 snapid,
			
 
				+			u64 off, u64 *plen, u64 *bno,
			
 
				+			struct ceph_osd_request *req,
			
 
				+			struct ceph_osd_req_op *op)
			
 
				+{
			
 
				+	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
			
 
				+	u64 orig_len = *plen;
			
 
				+	u64 objoff, objlen;    /* extent in object */
			
 
				+
			
 
				+	reqhead->snapid = cpu_to_le64(snapid);
			
 
				+
			
 
				+	/* object extent? */
			
 
				+	ceph_calc_file_object_mapping(layout, off, plen, bno,
			
 
				+				      &objoff, &objlen);
			
 
				+	if (*plen < orig_len)
			
 
				+		dout(" skipping last %llu, final file extent %llu~%llu\n",
			
 
				+		     orig_len - *plen, off, *plen);
			
 
				+
			
 
				+	if (op_has_extent(op->op)) {
			
 
				+		op->extent.offset = objoff;
			
 
				+		op->extent.length = objlen;
			
 
				+	}
			
 
				+	req->r_num_pages = calc_pages_for(off, *plen);
			
 
				+	if (op->op == CEPH_OSD_OP_WRITE)
			
 
				+		op->payload_len = *plen;
			
 
				+
			
 
				+	dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
			
 
				+	     *bno, objoff, objlen, req->r_num_pages);
			
 
				+
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_calc_raw_layout);
			
 
				+
			
 
				 /*
			
 
				  * Implement client access to distributed object storage cluster.
			
 
				  *
			
@@ -48,34 +106,19 @@ static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd);
 
				  * fill osd op in request message.
			
 
				  */
			
 
				 static void calc_layout(struct ceph_osd_client *osdc,
			
 
				-			struct ceph_vino vino, struct ceph_file_layout *layout,
			
 
				+			struct ceph_vino vino,
			
 
				+			struct ceph_file_layout *layout,
			
 
				 			u64 off, u64 *plen,
			
 
				-			struct ceph_osd_request *req)
			
 
				+			struct ceph_osd_request *req,
			
 
				+			struct ceph_osd_req_op *op)
			
 
				 {
			
 
				-	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
			
 
				-	struct ceph_osd_op *op = (void *)(reqhead + 1);
			
 
				-	u64 orig_len = *plen;
			
 
				-	u64 objoff, objlen;    /* extent in object */
			
 
				 	u64 bno;
			
 
				 
			
 
				-	reqhead->snapid = cpu_to_le64(vino.snap);
			
 
				-
			
 
				-	/* object extent? */
			
 
				-	ceph_calc_file_object_mapping(layout, off, plen, &bno,
			
 
				-				      &objoff, &objlen);
			
 
				-	if (*plen < orig_len)
			
 
				-		dout(" skipping last %llu, final file extent %llu~%llu\n",
			
 
				-		     orig_len - *plen, off, *plen);
			
 
				+	ceph_calc_raw_layout(osdc, layout, vino.snap, off,
			
 
				+			     plen, &bno, req, op);
			
 
				 
			
 
				 	sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno);
			
 
				 	req->r_oid_len = strlen(req->r_oid);
			
 
				-
			
 
				-	op->extent.offset = cpu_to_le64(objoff);
			
 
				-	op->extent.length = cpu_to_le64(objlen);
			
 
				-	req->r_num_pages = calc_pages_for(off, *plen);
			
 
				-
			
 
				-	dout("calc_layout %s (%d) %llu~%llu (%d pages)\n",
			
 
				-	     req->r_oid, req->r_oid_len, objoff, objlen, req->r_num_pages);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -101,56 +144,66 @@ void ceph_osdc_release_request(struct kref *kref)
 
				 	if (req->r_own_pages)
			
 
				 		ceph_release_page_vector(req->r_pages,
			
 
				 					 req->r_num_pages);
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+	if (req->r_bio)
			
 
				+		bio_put(req->r_bio);
			
 
				+#endif
			
 
				 	ceph_put_snap_context(req->r_snapc);
			
 
				+	if (req->r_trail) {
			
 
				+		ceph_pagelist_release(req->r_trail);
			
 
				+		kfree(req->r_trail);
			
 
				+	}
			
 
				 	if (req->r_mempool)
			
 
				 		mempool_free(req, req->r_osdc->req_mempool);
			
 
				 	else
			
 
				 		kfree(req);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_osdc_release_request);
			
 
				 
			
 
				-/*
			
 
				- * build new request AND message, calculate layout, and adjust file
			
 
				- * extent as needed.
			
 
				- *
			
 
				- * if the file was recently truncated, we include information about its
			
 
				- * old and new size so that the object can be updated appropriately.  (we
			
 
				- * avoid synchronously deleting truncated objects because it's slow.)
			
 
				- *
			
 
				- * if @do_sync, include a 'startsync' command so that the osd will flush
			
 
				- * data quickly.
			
 
				- */
			
 
				-struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
			
 
				-					       struct ceph_file_layout *layout,
			
 
				-					       struct ceph_vino vino,
			
 
				-					       u64 off, u64 *plen,
			
 
				-					       int opcode, int flags,
			
 
				+static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail)
			
 
				+{
			
 
				+	int i = 0;
			
 
				+
			
 
				+	if (needs_trail)
			
 
				+		*needs_trail = 0;
			
 
				+	while (ops[i].op) {
			
 
				+		if (needs_trail && op_needs_trail(ops[i].op))
			
 
				+			*needs_trail = 1;
			
 
				+		i++;
			
 
				+	}
			
 
				+
			
 
				+	return i;
			
 
				+}
			
 
				+
			
 
				+struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
			
 
				+					       int flags,
			
 
				 					       struct ceph_snap_context *snapc,
			
 
				-					       int do_sync,
			
 
				-					       u32 truncate_seq,
			
 
				-					       u64 truncate_size,
			
 
				-					       struct timespec *mtime,
			
 
				-					       bool use_mempool, int num_reply)
			
 
				+					       struct ceph_osd_req_op *ops,
			
 
				+					       bool use_mempool,
			
 
				+					       gfp_t gfp_flags,
			
 
				+					       struct page **pages,
			
 
				+					       struct bio *bio)
			
 
				 {
			
 
				 	struct ceph_osd_request *req;
			
 
				 	struct ceph_msg *msg;
			
 
				-	struct ceph_osd_request_head *head;
			
 
				-	struct ceph_osd_op *op;
			
 
				-	void *p;
			
 
				-	int num_op = 1 + do_sync;
			
 
				-	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
			
 
				-	int i;
			
 
				+	int needs_trail;
			
 
				+	int num_op = get_num_ops(ops, &needs_trail);
			
 
				+	size_t msg_size = sizeof(struct ceph_osd_request_head);
			
 
				+
			
 
				+	msg_size += num_op*sizeof(struct ceph_osd_op);
			
 
				 
			
 
				 	if (use_mempool) {
			
 
				-		req = mempool_alloc(osdc->req_mempool, GFP_NOFS);
			
 
				+		req = mempool_alloc(osdc->req_mempool, gfp_flags);
			
 
				 		memset(req, 0, sizeof(*req));
			
 
				 	} else {
			
 
				-		req = kzalloc(sizeof(*req), GFP_NOFS);
			
 
				+		req = kzalloc(sizeof(*req), gfp_flags);
			
 
				 	}
			
 
				 	if (req == NULL)
			
 
				 		return NULL;
			
 
				 
			
 
				 	req->r_osdc = osdc;
			
 
				 	req->r_mempool = use_mempool;
			
 
				+
			
 
				 	kref_init(&req->r_kref);
			
 
				 	init_completion(&req->r_completion);
			
 
				 	init_completion(&req->r_safe_completion);
			
@@ -164,13 +217,22 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 
				 		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
			
 
				 	else
			
 
				 		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
			
 
				-				   OSD_OPREPLY_FRONT_LEN, GFP_NOFS);
			
 
				+				   OSD_OPREPLY_FRONT_LEN, gfp_flags);
			
 
				 	if (!msg) {
			
 
				 		ceph_osdc_put_request(req);
			
 
				 		return NULL;
			
 
				 	}
			
 
				 	req->r_reply = msg;
			
 
				 
			
 
				+	/* allocate space for the trailing data */
			
 
				+	if (needs_trail) {
			
 
				+		req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags);
			
 
				+		if (!req->r_trail) {
			
 
				+			ceph_osdc_put_request(req);
			
 
				+			return NULL;
			
 
				+		}
			
 
				+		ceph_pagelist_init(req->r_trail);
			
 
				+	}
			
 
				 	/* create request message; allow space for oid */
			
 
				 	msg_size += 40;
			
 
				 	if (snapc)
			
@@ -178,18 +240,115 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 
				 	if (use_mempool)
			
 
				 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
			
 
				 	else
			
 
				-		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS);
			
 
				+		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags);
			
 
				 	if (!msg) {
			
 
				 		ceph_osdc_put_request(req);
			
 
				 		return NULL;
			
 
				 	}
			
 
				+
			
 
				 	msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
			
 
				 	memset(msg->front.iov_base, 0, msg->front.iov_len);
			
 
				+
			
 
				+	req->r_request = msg;
			
 
				+	req->r_pages = pages;
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+	if (bio) {
			
 
				+		req->r_bio = bio;
			
 
				+		bio_get(req->r_bio);
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+	return req;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_osdc_alloc_request);
			
 
				+
			
 
				+static void osd_req_encode_op(struct ceph_osd_request *req,
			
 
				+			      struct ceph_osd_op *dst,
			
 
				+			      struct ceph_osd_req_op *src)
			
 
				+{
			
 
				+	dst->op = cpu_to_le16(src->op);
			
 
				+
			
 
				+	switch (dst->op) {
			
 
				+	case CEPH_OSD_OP_READ:
			
 
				+	case CEPH_OSD_OP_WRITE:
			
 
				+		dst->extent.offset =
			
 
				+			cpu_to_le64(src->extent.offset);
			
 
				+		dst->extent.length =
			
 
				+			cpu_to_le64(src->extent.length);
			
 
				+		dst->extent.truncate_size =
			
 
				+			cpu_to_le64(src->extent.truncate_size);
			
 
				+		dst->extent.truncate_seq =
			
 
				+			cpu_to_le32(src->extent.truncate_seq);
			
 
				+		break;
			
 
				+
			
 
				+	case CEPH_OSD_OP_GETXATTR:
			
 
				+	case CEPH_OSD_OP_SETXATTR:
			
 
				+	case CEPH_OSD_OP_CMPXATTR:
			
 
				+		BUG_ON(!req->r_trail);
			
 
				+
			
 
				+		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
			
 
				+		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
			
 
				+		dst->xattr.cmp_op = src->xattr.cmp_op;
			
 
				+		dst->xattr.cmp_mode = src->xattr.cmp_mode;
			
 
				+		ceph_pagelist_append(req->r_trail, src->xattr.name,
			
 
				+				     src->xattr.name_len);
			
 
				+		ceph_pagelist_append(req->r_trail, src->xattr.val,
			
 
				+				     src->xattr.value_len);
			
 
				+		break;
			
 
				+	case CEPH_OSD_OP_CALL:
			
 
				+		BUG_ON(!req->r_trail);
			
 
				+
			
 
				+		dst->cls.class_len = src->cls.class_len;
			
 
				+		dst->cls.method_len = src->cls.method_len;
			
 
				+		dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
			
 
				+
			
 
				+		ceph_pagelist_append(req->r_trail, src->cls.class_name,
			
 
				+				     src->cls.class_len);
			
 
				+		ceph_pagelist_append(req->r_trail, src->cls.method_name,
			
 
				+				     src->cls.method_len);
			
 
				+		ceph_pagelist_append(req->r_trail, src->cls.indata,
			
 
				+				     src->cls.indata_len);
			
 
				+		break;
			
 
				+	case CEPH_OSD_OP_ROLLBACK:
			
 
				+		dst->snap.snapid = cpu_to_le64(src->snap.snapid);
			
 
				+		break;
			
 
				+	case CEPH_OSD_OP_STARTSYNC:
			
 
				+		break;
			
 
				+	default:
			
 
				+		pr_err("unrecognized osd opcode %d\n", dst->op);
			
 
				+		WARN_ON(1);
			
 
				+		break;
			
 
				+	}
			
 
				+	dst->payload_len = cpu_to_le32(src->payload_len);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * build new request AND message
			
 
				+ *
			
 
				+ */
			
 
				+void ceph_osdc_build_request(struct ceph_osd_request *req,
			
 
				+			     u64 off, u64 *plen,
			
 
				+			     struct ceph_osd_req_op *src_ops,
			
 
				+			     struct ceph_snap_context *snapc,
			
 
				+			     struct timespec *mtime,
			
 
				+			     const char *oid,
			
 
				+			     int oid_len)
			
 
				+{
			
 
				+	struct ceph_msg *msg = req->r_request;
			
 
				+	struct ceph_osd_request_head *head;
			
 
				+	struct ceph_osd_req_op *src_op;
			
 
				+	struct ceph_osd_op *op;
			
 
				+	void *p;
			
 
				+	int num_op = get_num_ops(src_ops, NULL);
			
 
				+	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
			
 
				+	int flags = req->r_flags;
			
 
				+	u64 data_len = 0;
			
 
				+	int i;
			
 
				+
			
 
				 	head = msg->front.iov_base;
			
 
				 	op = (void *)(head + 1);
			
 
				 	p = (void *)(op + num_op);
			
 
				 
			
 
				-	req->r_request = msg;
			
 
				 	req->r_snapc = ceph_get_snap_context(snapc);
			
 
				 
			
 
				 	head->client_inc = cpu_to_le32(1); /* always, for now. */
			
@@ -197,29 +356,23 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 
				 	if (flags & CEPH_OSD_FLAG_WRITE)
			
 
				 		ceph_encode_timespec(&head->mtime, mtime);
			
 
				 	head->num_ops = cpu_to_le16(num_op);
			
 
				-	op->op = cpu_to_le16(opcode);
			
 
				 
			
 
				-	/* calculate max write size */
			
 
				-	calc_layout(osdc, vino, layout, off, plen, req);
			
 
				-	req->r_file_layout = *layout;  /* keep a copy */
			
 
				-
			
 
				-	if (flags & CEPH_OSD_FLAG_WRITE) {
			
 
				-		req->r_request->hdr.data_off = cpu_to_le16(off);
			
 
				-		req->r_request->hdr.data_len = cpu_to_le32(*plen);
			
 
				-		op->payload_len = cpu_to_le32(*plen);
			
 
				-	}
			
 
				-	op->extent.truncate_size = cpu_to_le64(truncate_size);
			
 
				-	op->extent.truncate_seq = cpu_to_le32(truncate_seq);
			
 
				 
			
 
				 	/* fill in oid */
			
 
				-	head->object_len = cpu_to_le32(req->r_oid_len);
			
 
				-	memcpy(p, req->r_oid, req->r_oid_len);
			
 
				-	p += req->r_oid_len;
			
 
				-
			
 
				-	if (do_sync) {
			
 
				+	head->object_len = cpu_to_le32(oid_len);
			
 
				+	memcpy(p, oid, oid_len);
			
 
				+	p += oid_len;
			
 
				+
			
 
				+	src_op = src_ops;
			
 
				+	while (src_op->op) {
			
 
				+		osd_req_encode_op(req, op, src_op);
			
 
				+		src_op++;
			
 
				 		op++;
			
 
				-		op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC);
			
 
				 	}
			
 
				+
			
 
				+	if (req->r_trail)
			
 
				+		data_len += req->r_trail->length;
			
 
				+
			
 
				 	if (snapc) {
			
 
				 		head->snap_seq = cpu_to_le64(snapc->seq);
			
 
				 		head->num_snaps = cpu_to_le32(snapc->num_snaps);
			
@@ -229,12 +382,79 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	if (flags & CEPH_OSD_FLAG_WRITE) {
			
 
				+		req->r_request->hdr.data_off = cpu_to_le16(off);
			
 
				+		req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len);
			
 
				+	} else if (data_len) {
			
 
				+		req->r_request->hdr.data_off = 0;
			
 
				+		req->r_request->hdr.data_len = cpu_to_le32(data_len);
			
 
				+	}
			
 
				+
			
 
				 	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
			
 
				 	msg_size = p - msg->front.iov_base;
			
 
				 	msg->front.iov_len = msg_size;
			
 
				 	msg->hdr.front_len = cpu_to_le32(msg_size);
			
 
				+	return;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_osdc_build_request);
			
 
				+
			
 
				+/*
			
 
				+ * build new request AND message, calculate layout, and adjust file
			
 
				+ * extent as needed.
			
 
				+ *
			
 
				+ * if the file was recently truncated, we include information about its
			
 
				+ * old and new size so that the object can be updated appropriately.  (we
			
 
				+ * avoid synchronously deleting truncated objects because it's slow.)
			
 
				+ *
			
 
				+ * if @do_sync, include a 'startsync' command so that the osd will flush
			
 
				+ * data quickly.
			
 
				+ */
			
 
				+struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
			
 
				+					       struct ceph_file_layout *layout,
			
 
				+					       struct ceph_vino vino,
			
 
				+					       u64 off, u64 *plen,
			
 
				+					       int opcode, int flags,
			
 
				+					       struct ceph_snap_context *snapc,
			
 
				+					       int do_sync,
			
 
				+					       u32 truncate_seq,
			
 
				+					       u64 truncate_size,
			
 
				+					       struct timespec *mtime,
			
 
				+					       bool use_mempool, int num_reply)
			
 
				+{
			
 
				+	struct ceph_osd_req_op ops[3];
			
 
				+	struct ceph_osd_request *req;
			
 
				+
			
 
				+	ops[0].op = opcode;
			
 
				+	ops[0].extent.truncate_seq = truncate_seq;
			
 
				+	ops[0].extent.truncate_size = truncate_size;
			
 
				+	ops[0].payload_len = 0;
			
 
				+
			
 
				+	if (do_sync) {
			
 
				+		ops[1].op = CEPH_OSD_OP_STARTSYNC;
			
 
				+		ops[1].payload_len = 0;
			
 
				+		ops[2].op = 0;
			
 
				+	} else
			
 
				+		ops[1].op = 0;
			
 
				+
			
 
				+	req = ceph_osdc_alloc_request(osdc, flags,
			
 
				+					 snapc, ops,
			
 
				+					 use_mempool,
			
 
				+					 GFP_NOFS, NULL, NULL);
			
 
				+	if (IS_ERR(req))
			
 
				+		return req;
			
 
				+
			
 
				+	/* calculate max write size */
			
 
				+	calc_layout(osdc, vino, layout, off, plen, req, ops);
			
 
				+	req->r_file_layout = *layout;  /* keep a copy */
			
 
				+
			
 
				+	ceph_osdc_build_request(req, off, plen, ops,
			
 
				+				snapc,
			
 
				+				mtime,
			
 
				+				req->r_oid, req->r_oid_len);
			
 
				+
			
 
				 	return req;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_osdc_new_request);
			
 
				 
			
 
				 /*
			
 
				  * We keep osd requests in an rbtree, sorted by ->r_tid.
			
@@ -389,7 +609,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
 
				 	dout("__move_osd_to_lru %p\n", osd);
			
 
				 	BUG_ON(!list_empty(&osd->o_osd_lru));
			
 
				 	list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
			
 
				-	osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ;
			
 
				+	osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
			
 
				 }
			
 
				 
			
 
				 static void __remove_osd_from_lru(struct ceph_osd *osd)
			
@@ -483,7 +703,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
 
				 static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
			
 
				 {
			
 
				 	schedule_delayed_work(&osdc->timeout_work,
			
 
				-			osdc->client->mount_args->osd_keepalive_timeout * HZ);
			
 
				+			osdc->client->options->osd_keepalive_timeout * HZ);
			
 
				 }
			
 
				 
			
 
				 static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
			
@@ -684,9 +904,9 @@ static void handle_timeout(struct work_struct *work)
 
				 		container_of(work, struct ceph_osd_client, timeout_work.work);
			
 
				 	struct ceph_osd_request *req, *last_req = NULL;
			
 
				 	struct ceph_osd *osd;
			
 
				-	unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ;
			
 
				+	unsigned long timeout = osdc->client->options->osd_timeout * HZ;
			
 
				 	unsigned long keepalive =
			
 
				-		osdc->client->mount_args->osd_keepalive_timeout * HZ;
			
 
				+		osdc->client->options->osd_keepalive_timeout * HZ;
			
 
				 	unsigned long last_stamp = 0;
			
 
				 	struct rb_node *p;
			
 
				 	struct list_head slow_osds;
			
@@ -773,7 +993,7 @@ static void handle_osds_timeout(struct work_struct *work)
 
				 		container_of(work, struct ceph_osd_client,
			
 
				 			     osds_timeout_work.work);
			
 
				 	unsigned long delay =
			
 
				-		osdc->client->mount_args->osd_idle_ttl * HZ >> 2;
			
 
				+		osdc->client->options->osd_idle_ttl * HZ >> 2;
			
 
				 
			
 
				 	dout("osds timeout\n");
			
 
				 	down_read(&osdc->map_sem);
			
@@ -1104,6 +1324,10 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 
				 
			
 
				 	req->r_request->pages = req->r_pages;
			
 
				 	req->r_request->nr_pages = req->r_num_pages;
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+	req->r_request->bio = req->r_bio;
			
 
				+#endif
			
 
				+	req->r_request->trail = req->r_trail;
			
 
				 
			
 
				 	register_request(osdc, req);
			
 
				 
			
@@ -1131,6 +1355,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 
				 	up_read(&osdc->map_sem);
			
 
				 	return rc;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_osdc_start_request);
			
 
				 
			
 
				 /*
			
 
				  * wait for a request to complete
			
@@ -1153,6 +1378,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
 
				 	dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
			
 
				 	return req->r_result;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_osdc_wait_request);
			
 
				 
			
 
				 /*
			
 
				  * sync - wait for all in-flight requests to flush.  avoid starvation.
			
@@ -1186,6 +1412,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc)
 
				 	mutex_unlock(&osdc->request_mutex);
			
 
				 	dout("sync done (thru tid %llu)\n", last_tid);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_osdc_sync);
			
 
				 
			
 
				 /*
			
 
				  * init, shutdown
			
@@ -1211,7 +1438,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 
				 	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
			
 
				 
			
 
				 	schedule_delayed_work(&osdc->osds_timeout_work,
			
 
				-	   round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ));
			
 
				+	   round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
			
 
				 
			
 
				 	err = -ENOMEM;
			
 
				 	osdc->req_mempool = mempool_create_kmalloc_pool(10,
			
@@ -1237,6 +1464,7 @@ out_mempool:
 
				 out:
			
 
				 	return err;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_osdc_init);
			
 
				 
			
 
				 void ceph_osdc_stop(struct ceph_osd_client *osdc)
			
 
				 {
			
@@ -1251,6 +1479,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
 
				 	ceph_msgpool_destroy(&osdc->msgpool_op);
			
 
				 	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_osdc_stop);
			
 
				 
			
 
				 /*
			
 
				  * Read some contiguous pages.  If we cross a stripe boundary, shorten
			
@@ -1288,6 +1517,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
 
				 	dout("readpages result %d\n", rc);
			
 
				 	return rc;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_osdc_readpages);
			
 
				 
			
 
				 /*
			
 
				  * do a synchronous write on N pages
			
@@ -1330,6 +1560,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 
				 	dout("writepages result %d\n", rc);
			
 
				 	return rc;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_osdc_writepages);
			
 
				 
			
 
				 /*
			
 
				  * handle incoming message
			
@@ -1420,6 +1651,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
 
				 		}
			
 
				 		m->pages = req->r_pages;
			
 
				 		m->nr_pages = req->r_num_pages;
			
 
				+#ifdef CONFIG_BLOCK
			
 
				+		m->bio = req->r_bio;
			
 
				+#endif
			
 
				 	}
			
 
				 	*skip = 0;
			
 
				 	req->r_con_filling_msg = ceph_con_get(con);
			
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1,14 +1,15 @@
 
				 
			
 
				-#include "ceph_debug.h"
			
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				 
			
 
				+#include <linux/module.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <asm/div64.h>
			
 
				 
			
 
				-#include "super.h"
			
 
				-#include "osdmap.h"
			
 
				-#include "crush/hash.h"
			
 
				-#include "crush/mapper.h"
			
 
				-#include "decode.h"
			
 
				+#include <linux/ceph/libceph.h>
			
 
				+#include <linux/ceph/osdmap.h>
			
 
				+#include <linux/ceph/decode.h>
			
 
				+#include <linux/crush/hash.h>
			
 
				+#include <linux/crush/mapper.h>
			
 
				 
			
 
				 char *ceph_osdmap_state_str(char *str, int len, int state)
			
 
				 {
			
@@ -417,6 +418,20 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				+int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
			
 
				+{
			
 
				+	struct rb_node *rbp;
			
 
				+
			
 
				+	for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
			
 
				+		struct ceph_pg_pool_info *pi =
			
 
				+			rb_entry(rbp, struct ceph_pg_pool_info, node);
			
 
				+		if (pi->name && strcmp(pi->name, name) == 0)
			
 
				+			return pi->id;
			
 
				+	}
			
 
				+	return -ENOENT;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_pg_poolid_by_name);
			
 
				+
			
 
				 static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
			
 
				 {
			
 
				 	rb_erase(&pi->node, root);
			
@@ -966,6 +981,7 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 
				 
			
 
				 	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_calc_file_object_mapping);
			
 
				 
			
 
				 /*
			
 
				  * calculate an object layout (i.e. pgid) from an oid,
			
@@ -1011,6 +1027,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
 
				 	ol->ol_stripe_unit = fl->fl_object_stripe_unit;
			
 
				 	return 0;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_calc_object_layout);
			
 
				 
			
 
				 /*
			
 
				  * Calculate raw osd vector for the given pgid.  Return pointer to osd
			
@@ -1108,3 +1125,4 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
 
				 			return osds[i];
			
 
				 	return -1;
			
 
				 }
			
 
				+EXPORT_SYMBOL(ceph_calc_pg_primary);
			
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -0,0 +1,154 @@
 
				+
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/gfp.h>
			
 
				+#include <linux/pagemap.h>
			
 
				+#include <linux/highmem.h>
			
 
				+#include <linux/ceph/pagelist.h>
			
 
				+
			
 
				+static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
			
 
				+{
			
 
				+	if (pl->mapped_tail) {
			
 
				+		struct page *page = list_entry(pl->head.prev, struct page, lru);
			
 
				+		kunmap(page);
			
 
				+		pl->mapped_tail = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int ceph_pagelist_release(struct ceph_pagelist *pl)
			
 
				+{
			
 
				+	ceph_pagelist_unmap_tail(pl);
			
 
				+	while (!list_empty(&pl->head)) {
			
 
				+		struct page *page = list_first_entry(&pl->head, struct page,
			
 
				+						     lru);
			
 
				+		list_del(&page->lru);
			
 
				+		__free_page(page);
			
 
				+	}
			
 
				+	ceph_pagelist_free_reserve(pl);
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_pagelist_release);
			
 
				+
			
 
				+static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+
			
 
				+	if (!pl->num_pages_free) {
			
 
				+		page = __page_cache_alloc(GFP_NOFS);
			
 
				+	} else {
			
 
				+		page = list_first_entry(&pl->free_list, struct page, lru);
			
 
				+		list_del(&page->lru);
			
 
				+		--pl->num_pages_free;
			
 
				+	}
			
 
				+	if (!page)
			
 
				+		return -ENOMEM;
			
 
				+	pl->room += PAGE_SIZE;
			
 
				+	ceph_pagelist_unmap_tail(pl);
			
 
				+	list_add_tail(&page->lru, &pl->head);
			
 
				+	pl->mapped_tail = kmap(page);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
			
 
				+{
			
 
				+	while (pl->room < len) {
			
 
				+		size_t bit = pl->room;
			
 
				+		int ret;
			
 
				+
			
 
				+		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
			
 
				+		       buf, bit);
			
 
				+		pl->length += bit;
			
 
				+		pl->room -= bit;
			
 
				+		buf += bit;
			
 
				+		len -= bit;
			
 
				+		ret = ceph_pagelist_addpage(pl);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+	}
			
 
				+
			
 
				+	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
			
 
				+	pl->length += len;
			
 
				+	pl->room -= len;
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_pagelist_append);
			
 
				+
			
 
				+/**
			
 
				+ * Allocate enough pages for a pagelist to append the given amount
			
 
				+ * of data without without allocating.
			
 
				+ * Returns: 0 on success, -ENOMEM on error.
			
 
				+ */
			
 
				+int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)
			
 
				+{
			
 
				+	if (space <= pl->room)
			
 
				+		return 0;
			
 
				+	space -= pl->room;
			
 
				+	space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT;   /* conv to num pages */
			
 
				+
			
 
				+	while (space > pl->num_pages_free) {
			
 
				+		struct page *page = __page_cache_alloc(GFP_NOFS);
			
 
				+		if (!page)
			
 
				+			return -ENOMEM;
			
 
				+		list_add_tail(&page->lru, &pl->free_list);
			
 
				+		++pl->num_pages_free;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_pagelist_reserve);
			
 
				+
			
 
				+/**
			
 
				+ * Free any pages that have been preallocated.
			
 
				+ */
			
 
				+int ceph_pagelist_free_reserve(struct ceph_pagelist *pl)
			
 
				+{
			
 
				+	while (!list_empty(&pl->free_list)) {
			
 
				+		struct page *page = list_first_entry(&pl->free_list,
			
 
				+						     struct page, lru);
			
 
				+		list_del(&page->lru);
			
 
				+		__free_page(page);
			
 
				+		--pl->num_pages_free;
			
 
				+	}
			
 
				+	BUG_ON(pl->num_pages_free);
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_pagelist_free_reserve);
			
 
				+
			
 
				+/**
			
 
				+ * Create a truncation point.
			
 
				+ */
			
 
				+void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
			
 
				+			      struct ceph_pagelist_cursor *c)
			
 
				+{
			
 
				+	c->pl = pl;
			
 
				+	c->page_lru = pl->head.prev;
			
 
				+	c->room = pl->room;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_pagelist_set_cursor);
			
 
				+
			
 
				+/**
			
 
				+ * Truncate a pagelist to the given point. Move extra pages to reserve.
			
 
				+ * This won't sleep.
			
 
				+ * Returns: 0 on success,
			
 
				+ *          -EINVAL if the pagelist doesn't match the trunc point pagelist
			
 
				+ */
			
 
				+int ceph_pagelist_truncate(struct ceph_pagelist *pl,
			
 
				+			   struct ceph_pagelist_cursor *c)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+
			
 
				+	if (pl != c->pl)
			
 
				+		return -EINVAL;
			
 
				+	ceph_pagelist_unmap_tail(pl);
			
 
				+	while (pl->head.prev != c->page_lru) {
			
 
				+		page = list_entry(pl->head.prev, struct page, lru);
			
 
				+		list_del(&page->lru);                /* remove from pagelist */
			
 
				+		list_add_tail(&page->lru, &pl->free_list); /* add to reserve */
			
 
				+		++pl->num_pages_free;
			
 
				+	}
			
 
				+	pl->room = c->room;
			
 
				+	if (!list_empty(&pl->head)) {
			
 
				+		page = list_entry(pl->head.prev, struct page, lru);
			
 
				+		pl->mapped_tail = kmap(page);
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_pagelist_truncate);
			
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -0,0 +1,223 @@
 
				+#include <linux/ceph/ceph_debug.h>
			
 
				+
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include <linux/file.h>
			
 
				+#include <linux/namei.h>
			
 
				+#include <linux/writeback.h>
			
 
				+
			
 
				+#include <linux/ceph/libceph.h>
			
 
				+
			
 
				+/*
			
 
				+ * build a vector of user pages
			
 
				+ */
			
 
				+struct page **ceph_get_direct_page_vector(const char __user *data,
			
 
				+						 int num_pages,
			
 
				+						 loff_t off, size_t len)
			
 
				+{
			
 
				+	struct page **pages;
			
 
				+	int rc;
			
 
				+
			
 
				+	pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
			
 
				+	if (!pages)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+
			
 
				+	down_read(&current->mm->mmap_sem);
			
 
				+	rc = get_user_pages(current, current->mm, (unsigned long)data,
			
 
				+			    num_pages, 0, 0, pages, NULL);
			
 
				+	up_read(&current->mm->mmap_sem);
			
 
				+	if (rc < 0)
			
 
				+		goto fail;
			
 
				+	return pages;
			
 
				+
			
 
				+fail:
			
 
				+	kfree(pages);
			
 
				+	return ERR_PTR(rc);
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_get_direct_page_vector);
			
 
				+
			
 
				+void ceph_put_page_vector(struct page **pages, int num_pages)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < num_pages; i++)
			
 
				+		put_page(pages[i]);
			
 
				+	kfree(pages);
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_put_page_vector);
			
 
				+
			
 
				+void ceph_release_page_vector(struct page **pages, int num_pages)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < num_pages; i++)
			
 
				+		__free_pages(pages[i], 0);
			
 
				+	kfree(pages);
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_release_page_vector);
			
 
				+
			
 
				+/*
			
 
				+ * allocate a vector new pages
			
 
				+ */
			
 
				+struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
			
 
				+{
			
 
				+	struct page **pages;
			
 
				+	int i;
			
 
				+
			
 
				+	pages = kmalloc(sizeof(*pages) * num_pages, flags);
			
 
				+	if (!pages)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+	for (i = 0; i < num_pages; i++) {
			
 
				+		pages[i] = __page_cache_alloc(flags);
			
 
				+		if (pages[i] == NULL) {
			
 
				+			ceph_release_page_vector(pages, i);
			
 
				+			return ERR_PTR(-ENOMEM);
			
 
				+		}
			
 
				+	}
			
 
				+	return pages;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_alloc_page_vector);
			
 
				+
			
 
				+/*
			
 
				+ * copy user data into a page vector
			
 
				+ */
			
 
				+int ceph_copy_user_to_page_vector(struct page **pages,
			
 
				+					 const char __user *data,
			
 
				+					 loff_t off, size_t len)
			
 
				+{
			
 
				+	int i = 0;
			
 
				+	int po = off & ~PAGE_CACHE_MASK;
			
 
				+	int left = len;
			
 
				+	int l, bad;
			
 
				+
			
 
				+	while (left > 0) {
			
 
				+		l = min_t(int, PAGE_CACHE_SIZE-po, left);
			
 
				+		bad = copy_from_user(page_address(pages[i]) + po, data, l);
			
 
				+		if (bad == l)
			
 
				+			return -EFAULT;
			
 
				+		data += l - bad;
			
 
				+		left -= l - bad;
			
 
				+		po += l - bad;
			
 
				+		if (po == PAGE_CACHE_SIZE) {
			
 
				+			po = 0;
			
 
				+			i++;
			
 
				+		}
			
 
				+	}
			
 
				+	return len;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
			
 
				+
			
 
				+int ceph_copy_to_page_vector(struct page **pages,
			
 
				+				    const char *data,
			
 
				+				    loff_t off, size_t len)
			
 
				+{
			
 
				+	int i = 0;
			
 
				+	size_t po = off & ~PAGE_CACHE_MASK;
			
 
				+	size_t left = len;
			
 
				+	size_t l;
			
 
				+
			
 
				+	while (left > 0) {
			
 
				+		l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
			
 
				+		memcpy(page_address(pages[i]) + po, data, l);
			
 
				+		data += l;
			
 
				+		left -= l;
			
 
				+		po += l;
			
 
				+		if (po == PAGE_CACHE_SIZE) {
			
 
				+			po = 0;
			
 
				+			i++;
			
 
				+		}
			
 
				+	}
			
 
				+	return len;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_copy_to_page_vector);
			
 
				+
			
 
				+int ceph_copy_from_page_vector(struct page **pages,
			
 
				+				    char *data,
			
 
				+				    loff_t off, size_t len)
			
 
				+{
			
 
				+	int i = 0;
			
 
				+	size_t po = off & ~PAGE_CACHE_MASK;
			
 
				+	size_t left = len;
			
 
				+	size_t l;
			
 
				+
			
 
				+	while (left > 0) {
			
 
				+		l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
			
 
				+		memcpy(data, page_address(pages[i]) + po, l);
			
 
				+		data += l;
			
 
				+		left -= l;
			
 
				+		po += l;
			
 
				+		if (po == PAGE_CACHE_SIZE) {
			
 
				+			po = 0;
			
 
				+			i++;
			
 
				+		}
			
 
				+	}
			
 
				+	return len;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_copy_from_page_vector);
			
 
				+
			
 
				+/*
			
 
				+ * copy user data from a page vector into a user pointer
			
 
				+ */
			
 
				+int ceph_copy_page_vector_to_user(struct page **pages,
			
 
				+					 char __user *data,
			
 
				+					 loff_t off, size_t len)
			
 
				+{
			
 
				+	int i = 0;
			
 
				+	int po = off & ~PAGE_CACHE_MASK;
			
 
				+	int left = len;
			
 
				+	int l, bad;
			
 
				+
			
 
				+	while (left > 0) {
			
 
				+		l = min_t(int, left, PAGE_CACHE_SIZE-po);
			
 
				+		bad = copy_to_user(data, page_address(pages[i]) + po, l);
			
 
				+		if (bad == l)
			
 
				+			return -EFAULT;
			
 
				+		data += l - bad;
			
 
				+		left -= l - bad;
			
 
				+		if (po) {
			
 
				+			po += l - bad;
			
 
				+			if (po == PAGE_CACHE_SIZE)
			
 
				+				po = 0;
			
 
				+		}
			
 
				+		i++;
			
 
				+	}
			
 
				+	return len;
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
			
 
				+
			
 
				+/*
			
 
				+ * Zero an extent within a page vector.  Offset is relative to the
			
 
				+ * start of the first page.
			
 
				+ */
			
 
				+void ceph_zero_page_vector_range(int off, int len, struct page **pages)
			
 
				+{
			
 
				+	int i = off >> PAGE_CACHE_SHIFT;
			
 
				+
			
 
				+	off &= ~PAGE_CACHE_MASK;
			
 
				+
			
 
				+	dout("zero_page_vector_page %u~%u\n", off, len);
			
 
				+
			
 
				+	/* leading partial page? */
			
 
				+	if (off) {
			
 
				+		int end = min((int)PAGE_CACHE_SIZE, off + len);
			
 
				+		dout("zeroing %d %p head from %d\n", i, pages[i],
			
 
				+		     (int)off);
			
 
				+		zero_user_segment(pages[i], off, end);
			
 
				+		len -= (end - off);
			
 
				+		i++;
			
 
				+	}
			
 
				+	while (len >= PAGE_CACHE_SIZE) {
			
 
				+		dout("zeroing %d %p len=%d\n", i, pages[i], len);
			
 
				+		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
			
 
				+		len -= PAGE_CACHE_SIZE;
			
 
				+		i++;
			
 
				+	}
			
 
				+	/* trailing partial page? */
			
 
				+	if (len) {
			
 
				+		dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
			
 
				+		zero_user_segment(pages[i], 0, len);
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL(ceph_zero_page_vector_range);
			
 
				+