12 years ago · 2250a71b59
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -621,18 +621,6 @@ static void rbd_put_client(struct rbd_client *rbdc)
 
				 		kref_put(&rbdc->kref, rbd_client_release);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Destroy requests collection
			
 
				- */
			
 
				-static void rbd_coll_release(struct kref *kref)
			
 
				-{
			
 
				-	struct rbd_req_coll *coll =
			
 
				-		container_of(kref, struct rbd_req_coll, kref);
			
 
				-
			
 
				-	dout("rbd_coll_release %p\n", coll);
			
 
				-	kfree(coll);
			
 
				-}
			
 
				-
			
 
				 static bool rbd_image_format_valid(u32 image_format)
			
 
				 {
			
 
				 	return image_format == 1 || image_format == 2;
			
@@ -876,28 +864,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev,
 
				 	return length;
			
 
				 }
			
 
				 
			
 
				-static int rbd_get_num_segments(struct rbd_image_header *header,
			
 
				-				u64 ofs, u64 len)
			
 
				-{
			
 
				-	u64 start_seg;
			
 
				-	u64 end_seg;
			
 
				-	u64 result;
			
 
				-
			
 
				-	if (!len)
			
 
				-		return 0;
			
 
				-	if (len - 1 > U64_MAX - ofs)
			
 
				-		return -ERANGE;
			
 
				-
			
 
				-	start_seg = ofs >> header->obj_order;
			
 
				-	end_seg = (ofs + len - 1) >> header->obj_order;
			
 
				-
			
 
				-	result = end_seg - start_seg + 1;
			
 
				-	if (result > (u64) INT_MAX)
			
 
				-		return -ERANGE;
			
 
				-
			
 
				-	return (int) result;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * returns the size of an object in the image
			
 
				  */
			
@@ -1216,52 +1182,6 @@ static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op)
 
				 	kfree(op);
			
 
				 }
			
 
				 
			
 
				-static void rbd_coll_end_req_index(struct request *rq,
			
 
				-				   struct rbd_req_coll *coll,
			
 
				-				   int index,
			
 
				-				   s32 ret, u64 len)
			
 
				-{
			
 
				-	struct request_queue *q;
			
 
				-	int min, max, i;
			
 
				-
			
 
				-	dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n",
			
 
				-	     coll, index, (int)ret, (unsigned long long)len);
			
 
				-
			
 
				-	if (!rq)
			
 
				-		return;
			
 
				-
			
 
				-	if (!coll) {
			
 
				-		blk_end_request(rq, ret, len);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	q = rq->q;
			
 
				-
			
 
				-	spin_lock_irq(q->queue_lock);
			
 
				-	coll->status[index].done = 1;
			
 
				-	coll->status[index].rc = ret;
			
 
				-	coll->status[index].bytes = len;
			
 
				-	max = min = coll->num_done;
			
 
				-	while (max < coll->total && coll->status[max].done)
			
 
				-		max++;
			
 
				-
			
 
				-	for (i = min; i<max; i++) {
			
 
				-		__blk_end_request(rq, (int)coll->status[i].rc,
			
 
				-				  coll->status[i].bytes);
			
 
				-		coll->num_done++;
			
 
				-		kref_put(&coll->kref, rbd_coll_release);
			
 
				-	}
			
 
				-	spin_unlock_irq(q->queue_lock);
			
 
				-}
			
 
				-
			
 
				-static void rbd_coll_end_req(struct rbd_request *rbd_req,
			
 
				-			     s32 ret, u64 len)
			
 
				-{
			
 
				-	rbd_coll_end_req_index(rbd_req->rq,
			
 
				-				rbd_req->coll, rbd_req->coll_index,
			
 
				-				ret, len);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Send ceph osd request
			
 
				  */
			
@@ -1361,46 +1281,6 @@ done_osd_req:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Ceph osd op callback
			
 
				- */
			
 
				-static void rbd_req_cb(struct ceph_osd_request *osd_req, struct ceph_msg *msg)
			
 
				-{
			
 
				-	struct rbd_request *rbd_req = osd_req->r_priv;
			
 
				-	struct ceph_osd_reply_head *replyhead;
			
 
				-	struct ceph_osd_op *op;
			
 
				-	s32 rc;
			
 
				-	u64 bytes;
			
 
				-	int read_op;
			
 
				-
			
 
				-	/* parse reply */
			
 
				-	replyhead = msg->front.iov_base;
			
 
				-	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
			
 
				-	op = (void *)(replyhead + 1);
			
 
				-	rc = (s32)le32_to_cpu(replyhead->result);
			
 
				-	bytes = le64_to_cpu(op->extent.length);
			
 
				-	read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ);
			
 
				-
			
 
				-	dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n",
			
 
				-		(unsigned long long) bytes, read_op, (int) rc);
			
 
				-
			
 
				-	if (rc == (s32)-ENOENT && read_op) {
			
 
				-		zero_bio_chain(rbd_req->bio, 0);
			
 
				-		rc = 0;
			
 
				-	} else if (rc == 0 && read_op && bytes < rbd_req->len) {
			
 
				-		zero_bio_chain(rbd_req->bio, bytes);
			
 
				-		bytes = rbd_req->len;
			
 
				-	}
			
 
				-
			
 
				-	rbd_coll_end_req(rbd_req, rc, bytes);
			
 
				-
			
 
				-	if (rbd_req->bio)
			
 
				-		bio_chain_put(rbd_req->bio);
			
 
				-
			
 
				-	ceph_osdc_put_request(osd_req);
			
 
				-	kfree(rbd_req);
			
 
				-}
			
 
				-
			
 
				 static void rbd_simple_req_cb(struct ceph_osd_request *osd_req,
			
 
				 				struct ceph_msg *msg)
			
 
				 {
			
@@ -1448,70 +1328,6 @@ done:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Do an asynchronous ceph osd operation
			
 
				- */
			
 
				-static int rbd_do_op(struct request *rq,
			
 
				-		     struct rbd_device *rbd_dev,
			
 
				-		     struct ceph_snap_context *snapc,
			
 
				-		     u64 ofs, u64 len,
			
 
				-		     struct bio *bio,
			
 
				-		     struct rbd_req_coll *coll,
			
 
				-		     int coll_index)
			
 
				-{
			
 
				-	const char *seg_name;
			
 
				-	u64 seg_ofs;
			
 
				-	u64 seg_len;
			
 
				-	int ret;
			
 
				-	struct ceph_osd_req_op *op;
			
 
				-	int opcode;
			
 
				-	int flags;
			
 
				-	u64 snapid;
			
 
				-
			
 
				-	seg_name = rbd_segment_name(rbd_dev, ofs);
			
 
				-	if (!seg_name)
			
 
				-		return -ENOMEM;
			
 
				-	seg_len = rbd_segment_length(rbd_dev, ofs, len);
			
 
				-	seg_ofs = rbd_segment_offset(rbd_dev, ofs);
			
 
				-
			
 
				-	if (rq_data_dir(rq) == WRITE) {
			
 
				-		opcode = CEPH_OSD_OP_WRITE;
			
 
				-		flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK;
			
 
				-		snapid = CEPH_NOSNAP;
			
 
				-	} else {
			
 
				-		opcode = CEPH_OSD_OP_READ;
			
 
				-		flags = CEPH_OSD_FLAG_READ;
			
 
				-		rbd_assert(!snapc);
			
 
				-		snapid = rbd_dev->spec->snap_id;
			
 
				-	}
			
 
				-
			
 
				-	ret = -ENOMEM;
			
 
				-	op = rbd_osd_req_op_create(opcode, seg_ofs, seg_len);
			
 
				-	if (!op)
			
 
				-		goto done;
			
 
				-
			
 
				-	/* we've taken care of segment sizes earlier when we
			
 
				-	   cloned the bios. We should never have a segment
			
 
				-	   truncated at this point */
			
 
				-	rbd_assert(seg_len == len);
			
 
				-
			
 
				-	ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
			
 
				-			     seg_name, seg_ofs, seg_len,
			
 
				-			     bio,
			
 
				-			     NULL, 0,
			
 
				-			     flags,
			
 
				-			     op,
			
 
				-			     coll, coll_index,
			
 
				-			     rbd_req_cb, NULL);
			
 
				-	if (ret < 0)
			
 
				-		rbd_coll_end_req_index(rq, coll, coll_index,
			
 
				-					(s32)ret, seg_len);
			
 
				-	rbd_osd_req_op_destroy(op);
			
 
				-done:
			
 
				-	kfree(seg_name);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				 static int rbd_obj_request_submit(struct ceph_osd_client *osdc,
			
 
				 				struct rbd_obj_request *obj_request)
			
 
				 {
			
@@ -1683,78 +1499,6 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static struct rbd_req_coll *rbd_alloc_coll(int num_reqs)
			
 
				-{
			
 
				-	struct rbd_req_coll *coll =
			
 
				-			kzalloc(sizeof(struct rbd_req_coll) +
			
 
				-			        sizeof(struct rbd_req_status) * num_reqs,
			
 
				-				GFP_ATOMIC);
			
 
				-
			
 
				-	if (!coll)
			
 
				-		return NULL;
			
 
				-	coll->total = num_reqs;
			
 
				-	kref_init(&coll->kref);
			
 
				-	return coll;
			
 
				-}
			
 
				-
			
 
				-static int rbd_dev_do_request(struct request *rq,
			
 
				-				struct rbd_device *rbd_dev,
			
 
				-				struct ceph_snap_context *snapc,
			
 
				-				u64 ofs, unsigned int size,
			
 
				-				struct bio *bio_chain)
			
 
				-{
			
 
				-	int num_segs;
			
 
				-	struct rbd_req_coll *coll;
			
 
				-	unsigned int bio_offset;
			
 
				-	int cur_seg = 0;
			
 
				-
			
 
				-	dout("%s 0x%x bytes at 0x%llx\n",
			
 
				-		rq_data_dir(rq) == WRITE ? "write" : "read",
			
 
				-		size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE);
			
 
				-
			
 
				-	num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
			
 
				-	if (num_segs <= 0)
			
 
				-		return num_segs;
			
 
				-
			
 
				-	coll = rbd_alloc_coll(num_segs);
			
 
				-	if (!coll)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	bio_offset = 0;
			
 
				-	do {
			
 
				-		u64 limit = rbd_segment_length(rbd_dev, ofs, size);
			
 
				-		unsigned int clone_size;
			
 
				-		struct bio *bio_clone;
			
 
				-
			
 
				-		BUG_ON(limit > (u64)UINT_MAX);
			
 
				-		clone_size = (unsigned int)limit;
			
 
				-		dout("bio_chain->bi_vcnt=%hu\n", bio_chain->bi_vcnt);
			
 
				-
			
 
				-		kref_get(&coll->kref);
			
 
				-
			
 
				-		/* Pass a cloned bio chain via an osd request */
			
 
				-
			
 
				-		bio_clone = bio_chain_clone_range(&bio_chain,
			
 
				-					&bio_offset, clone_size,
			
 
				-					GFP_ATOMIC);
			
 
				-		if (bio_clone)
			
 
				-			(void)rbd_do_op(rq, rbd_dev, snapc,
			
 
				-					ofs, clone_size,
			
 
				-					bio_clone, coll, cur_seg);
			
 
				-		else
			
 
				-			rbd_coll_end_req_index(rq, coll, cur_seg,
			
 
				-						(s32)-ENOMEM,
			
 
				-						clone_size);
			
 
				-		size -= clone_size;
			
 
				-		ofs += clone_size;
			
 
				-
			
 
				-		cur_seg++;
			
 
				-	} while (size > 0);
			
 
				-	kref_put(&coll->kref, rbd_coll_release);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 static void rbd_osd_read_callback(struct rbd_obj_request *obj_request,
			
 
				 				struct ceph_osd_op *op)
			
 
				 {
			
@@ -2235,68 +1979,6 @@ end_request:
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * block device queue callback
			
 
				- */
			
 
				-static void rbd_rq_fn(struct request_queue *q)
			
 
				-{
			
 
				-	struct rbd_device *rbd_dev = q->queuedata;
			
 
				-	bool read_only = rbd_dev->mapping.read_only;
			
 
				-	struct request *rq;
			
 
				-
			
 
				-	while ((rq = blk_fetch_request(q))) {
			
 
				-		struct ceph_snap_context *snapc = NULL;
			
 
				-		unsigned int size = 0;
			
 
				-		int result;
			
 
				-
			
 
				-		dout("fetched request\n");
			
 
				-
			
 
				-		/* Filter out block requests we don't understand */
			
 
				-
			
 
				-		if ((rq->cmd_type != REQ_TYPE_FS)) {
			
 
				-			__blk_end_request_all(rq, 0);
			
 
				-			continue;
			
 
				-		}
			
 
				-		spin_unlock_irq(q->queue_lock);
			
 
				-
			
 
				-		/* Write requests need a reference to the snapshot context */
			
 
				-
			
 
				-		if (rq_data_dir(rq) == WRITE) {
			
 
				-			result = -EROFS;
			
 
				-			if (read_only) /* Can't write to a read-only device */
			
 
				-				goto out_end_request;
			
 
				-
			
 
				-			/*
			
 
				-			 * Note that each osd request will take its
			
 
				-			 * own reference to the snapshot context
			
 
				-			 * supplied.  The reference we take here
			
 
				-			 * just guarantees the one we provide stays
			
 
				-			 * valid.
			
 
				-			 */
			
 
				-			down_read(&rbd_dev->header_rwsem);
			
 
				-			snapc = ceph_get_snap_context(rbd_dev->header.snapc);
			
 
				-			up_read(&rbd_dev->header_rwsem);
			
 
				-			rbd_assert(snapc != NULL);
			
 
				-		} else if (!atomic_read(&rbd_dev->exists)) {
			
 
				-			rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
			
 
				-			dout("request for non-existent snapshot");
			
 
				-			result = -ENXIO;
			
 
				-			goto out_end_request;
			
 
				-		}
			
 
				-
			
 
				-		size = blk_rq_bytes(rq);
			
 
				-		result = rbd_dev_do_request(rq, rbd_dev, snapc,
			
 
				-				blk_rq_pos(rq) * SECTOR_SIZE,
			
 
				-				size, rq->bio);
			
 
				-out_end_request:
			
 
				-		if (snapc)
			
 
				-			ceph_put_snap_context(snapc);
			
 
				-		spin_lock_irq(q->queue_lock);
			
 
				-		if (!size || result < 0)
			
 
				-			__blk_end_request_all(rq, result);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * a queue callback. Makes sure that we don't create a bio that spans across
			
 
				  * multiple osd objects. One exception would be with a single page bios,
			
@@ -2546,7 +2228,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 
				 	disk->fops = &rbd_bd_ops;
			
 
				 	disk->private_data = rbd_dev;
			
 
				 
			
 
				-	(void) rbd_rq_fn;		/* avoid a warning */
			
 
				 	q = blk_init_queue(rbd_request_fn, &rbd_dev->lock);
			
 
				 	if (!q)
			
 
				 		goto out_disk;