Browse Source

Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
  block: Skip I/O merges when disabled
  block: add large command support
  block: replace sizeof(rq->cmd) with BLK_MAX_CDB
  ide: use blk_rq_init() to initialize the request
  block: use blk_rq_init() to initialize the request
  block: rename and export rq_init()
  block: no need to initialize rq->cmd with blk_get_request
  block: no need to initialize rq->cmd in prepare_flush_fn hook
  block/blk-barrier.c:blk_ordered_cur_seq() mustn't be inline
  block/elevator.c:elv_rq_merge_ok() mustn't be inline
  block: make queue flags non-atomic
  block: add dma alignment and padding support to blk_rq_map_kern
  unexport blk_max_pfn
  ps3disk: Remove superfluous cast
  block: make rq_init() do a full memset()
  relay: fix splice problem
Linus Torvalds 17 years ago
parent
commit
bd5d435a96

+ 3 - 8
block/blk-barrier.c

@@ -53,7 +53,7 @@ EXPORT_SYMBOL(blk_queue_ordered);
 /*
  * Cache flushing for ordered writes handling
  */
-inline unsigned blk_ordered_cur_seq(struct request_queue *q)
+unsigned blk_ordered_cur_seq(struct request_queue *q)
 {
 	if (!q->ordseq)
 		return 0;
@@ -143,10 +143,8 @@ static void queue_flush(struct request_queue *q, unsigned which)
 		end_io = post_flush_end_io;
 	}
 
+	blk_rq_init(q, rq);
 	rq->cmd_flags = REQ_HARDBARRIER;
-	rq_init(q, rq);
-	rq->elevator_private = NULL;
-	rq->elevator_private2 = NULL;
 	rq->rq_disk = q->bar_rq.rq_disk;
 	rq->end_io = end_io;
 	q->prepare_flush_fn(q, rq);
@@ -167,14 +165,11 @@ static inline struct request *start_ordered(struct request_queue *q,
 	blkdev_dequeue_request(rq);
 	q->orig_bar_rq = rq;
 	rq = &q->bar_rq;
-	rq->cmd_flags = 0;
-	rq_init(q, rq);
+	blk_rq_init(q, rq);
 	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
 		rq->cmd_flags |= REQ_RW;
 	if (q->ordered & QUEUE_ORDERED_FUA)
 		rq->cmd_flags |= REQ_FUA;
-	rq->elevator_private = NULL;
-	rq->elevator_private2 = NULL;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
 	rq->end_io = bar_end_io;
 

+ 34 - 41
block/blk-core.c

@@ -107,41 +107,21 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 
-/*
- * We can't just memset() the structure, since the allocation path
- * already stored some information in the request.
- */
-void rq_init(struct request_queue *q, struct request *rq)
+void blk_rq_init(struct request_queue *q, struct request *rq)
 {
+	memset(rq, 0, sizeof(*rq));
+
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->donelist);
 	rq->q = q;
 	rq->sector = rq->hard_sector = (sector_t) -1;
-	rq->nr_sectors = rq->hard_nr_sectors = 0;
-	rq->current_nr_sectors = rq->hard_cur_sectors = 0;
-	rq->bio = rq->biotail = NULL;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
-	rq->rq_disk = NULL;
-	rq->nr_phys_segments = 0;
-	rq->nr_hw_segments = 0;
-	rq->ioprio = 0;
-	rq->special = NULL;
-	rq->buffer = NULL;
+	rq->cmd = rq->__cmd;
 	rq->tag = -1;
-	rq->errors = 0;
 	rq->ref_count = 1;
-	rq->cmd_len = 0;
-	memset(rq->cmd, 0, sizeof(rq->cmd));
-	rq->data_len = 0;
-	rq->extra_len = 0;
-	rq->sense_len = 0;
-	rq->data = NULL;
-	rq->sense = NULL;
-	rq->end_io = NULL;
-	rq->end_io_data = NULL;
-	rq->next_rq = NULL;
 }
+EXPORT_SYMBOL(blk_rq_init);
 
 static void req_bio_endio(struct request *rq, struct bio *bio,
 			  unsigned int nbytes, int error)
@@ -194,7 +174,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 
 	if (blk_pc_request(rq)) {
 		printk(KERN_INFO "  cdb: ");
-		for (bit = 0; bit < sizeof(rq->cmd); bit++)
+		for (bit = 0; bit < BLK_MAX_CDB; bit++)
 			printk("%02x ", rq->cmd[bit]);
 		printk("\n");
 	}
@@ -220,7 +200,8 @@ void blk_plug_device(struct request_queue *q)
 	if (blk_queue_stopped(q))
 		return;
 
-	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
+	if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
+		__set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
 		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
 		blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
 	}
@@ -235,9 +216,10 @@ int blk_remove_plug(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 
-	if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+	if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
 		return 0;
 
+	queue_flag_clear(QUEUE_FLAG_PLUGGED, q);
 	del_timer(&q->unplug_timer);
 	return 1;
 }
@@ -333,15 +315,16 @@ void blk_start_queue(struct request_queue *q)
 {
 	WARN_ON(!irqs_disabled());
 
-	clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
+	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 
 	/*
 	 * one level of recursion is ok and is much faster than kicking
 	 * the unplug handling
 	 */
-	if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+	if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+		queue_flag_set(QUEUE_FLAG_REENTER, q);
 		q->request_fn(q);
-		clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
+		queue_flag_clear(QUEUE_FLAG_REENTER, q);
 	} else {
 		blk_plug_device(q);
 		kblockd_schedule_work(&q->unplug_work);
@@ -366,7 +349,7 @@ EXPORT_SYMBOL(blk_start_queue);
 void blk_stop_queue(struct request_queue *q)
 {
 	blk_remove_plug(q);
-	set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
+	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
 
@@ -395,11 +378,8 @@ EXPORT_SYMBOL(blk_sync_queue);
  * blk_run_queue - run a single device queue
  * @q:	The queue to run
  */
-void blk_run_queue(struct request_queue *q)
+void __blk_run_queue(struct request_queue *q)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
 	blk_remove_plug(q);
 
 	/*
@@ -407,15 +387,28 @@ void blk_run_queue(struct request_queue *q)
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
 	if (!elv_queue_empty(q)) {
-		if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+		if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
+			queue_flag_set(QUEUE_FLAG_REENTER, q);
 			q->request_fn(q);
-			clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
+			queue_flag_clear(QUEUE_FLAG_REENTER, q);
 		} else {
 			blk_plug_device(q);
 			kblockd_schedule_work(&q->unplug_work);
 		}
 	}
+}
+EXPORT_SYMBOL(__blk_run_queue);
+
+/**
+ * blk_run_queue - run a single device queue
+ * @q: The queue to run
+ */
+void blk_run_queue(struct request_queue *q)
+{
+	unsigned long flags;
 
+	spin_lock_irqsave(q->queue_lock, flags);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_run_queue);
@@ -428,7 +421,7 @@ void blk_put_queue(struct request_queue *q)
 void blk_cleanup_queue(struct request_queue *q)
 {
 	mutex_lock(&q->sysfs_lock);
-	set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
+	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
 	mutex_unlock(&q->sysfs_lock);
 
 	if (q->elevator)
@@ -607,6 +600,8 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
 	if (!rq)
 		return NULL;
 
+	blk_rq_init(q, rq);
+
 	/*
 	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
 	 * see bio.h and blkdev.h
@@ -789,8 +784,6 @@ rq_starved:
 	if (ioc_batching(q, ioc))
 		ioc->nr_batch_requests--;
 
-	rq_init(q, rq);
-
 	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
 out:
 	return rq;

+ 20 - 1
block/blk-map.c

@@ -255,10 +255,18 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
  * @kbuf:	the kernel buffer
  * @len:	length of user data
  * @gfp_mask:	memory allocation flags
+ *
+ * Description:
+ *    Data will be mapped directly if possible. Otherwise a bounce
+ *    buffer is used.
  */
 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		    unsigned int len, gfp_t gfp_mask)
 {
+	unsigned long kaddr;
+	unsigned int alignment;
+	int reading = rq_data_dir(rq) == READ;
+	int do_copy = 0;
 	struct bio *bio;
 
 	if (len > (q->max_hw_sectors << 9))
@@ -266,13 +274,24 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	if (!len || !kbuf)
 		return -EINVAL;
 
-	bio = bio_map_kern(q, kbuf, len, gfp_mask);
+	kaddr = (unsigned long)kbuf;
+	alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+	do_copy = ((kaddr & alignment) || (len & alignment));
+
+	if (do_copy)
+		bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
+	else
+		bio = bio_map_kern(q, kbuf, len, gfp_mask);
+
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
 	if (rq_data_dir(rq) == WRITE)
 		bio->bi_rw |= (1 << BIO_RW);
 
+	if (do_copy)
+		rq->cmd_flags |= REQ_COPY_USER;
+
 	blk_rq_bio_prep(q, rq, bio);
 	blk_queue_bounce(q, &rq->bio);
 	rq->buffer = rq->data = NULL;

+ 3 - 3
block/blk-merge.c

@@ -55,7 +55,7 @@ void blk_recalc_rq_segments(struct request *rq)
 	if (!rq->bio)
 		return;
 
-	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
+	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 	hw_seg_size = seg_size = 0;
 	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
 	rq_for_each_segment(bv, rq, iter) {
@@ -128,7 +128,7 @@ EXPORT_SYMBOL(blk_recount_segments);
 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 				   struct bio *nxt)
 {
-	if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
+	if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
 		return 0;
 
 	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
@@ -175,7 +175,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 	int nsegs, cluster;
 
 	nsegs = 0;
-	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
+	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 
 	/*
 	 * for each bio in rq

+ 1 - 2
block/blk-settings.c

@@ -14,7 +14,6 @@ unsigned long blk_max_low_pfn;
 EXPORT_SYMBOL(blk_max_low_pfn);
 
 unsigned long blk_max_pfn;
-EXPORT_SYMBOL(blk_max_pfn);
 
 /**
  * blk_queue_prep_rq - set a prepare_request function for queue
@@ -288,7 +287,7 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 	t->max_segment_size = min(t->max_segment_size, b->max_segment_size);
 	t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
 	if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
-		clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
+		queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 

+ 26 - 0
block/blk-sysfs.c

@@ -135,6 +135,25 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
 
+static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(blk_queue_nomerges(q), page);
+}
+
+static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
+				    size_t count)
+{
+	unsigned long nm;
+	ssize_t ret = queue_var_store(&nm, page, count);
+
+	if (nm)
+	       set_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags);
+	else
+	       clear_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags);
+
+	return ret;
+}
+
 
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
@@ -170,6 +189,12 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = {
 	.show = queue_hw_sector_size_show,
 };
 
+static struct queue_sysfs_entry queue_nomerges_entry = {
+	.attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_nomerges_show,
+	.store = queue_nomerges_store,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -177,6 +202,7 @@ static struct attribute *default_attrs[] = {
 	&queue_max_sectors_entry.attr,
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
+	&queue_nomerges_entry.attr,
 	NULL,
 };
 

+ 4 - 4
block/blk-tag.c

@@ -70,7 +70,7 @@ void __blk_queue_free_tags(struct request_queue *q)
 	__blk_free_tags(bqt);
 
 	q->queue_tags = NULL;
-	q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
+	queue_flag_clear(QUEUE_FLAG_QUEUED, q);
 }
 
 /**
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(blk_free_tags);
  **/
 void blk_queue_free_tags(struct request_queue *q)
 {
-	clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
+	queue_flag_clear(QUEUE_FLAG_QUEUED, q);
 }
 EXPORT_SYMBOL(blk_queue_free_tags);
 
@@ -188,7 +188,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
 		rc = blk_queue_resize_tags(q, depth);
 		if (rc)
 			return rc;
-		set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
+		queue_flag_set(QUEUE_FLAG_QUEUED, q);
 		return 0;
 	} else
 		atomic_inc(&tags->refcnt);
@@ -197,7 +197,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
 	 * assign it, all done
 	 */
 	q->queue_tags = tags;
-	q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
+	queue_flag_set(QUEUE_FLAG_QUEUED, q);
 	INIT_LIST_HEAD(&q->tag_busy_list);
 	return 0;
 fail:

+ 0 - 1
block/blk.h

@@ -10,7 +10,6 @@
 extern struct kmem_cache *blk_requestq_cachep;
 extern struct kobj_type blk_queue_ktype;
 
-void rq_init(struct request_queue *q, struct request *rq);
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);

+ 14 - 4
block/elevator.c

@@ -69,7 +69,7 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 /*
  * can we safely merge with this request?
  */
-inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
+int elv_rq_merge_ok(struct request *rq, struct bio *bio)
 {
 	if (!rq_mergeable(rq))
 		return 0;
@@ -488,6 +488,9 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 		}
 	}
 
+	if (blk_queue_nomerges(q))
+		return ELEVATOR_NO_MERGE;
+
 	/*
 	 * See if our hash lookup can find a potential backmerge.
 	 */
@@ -1070,7 +1073,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	 */
 	spin_lock_irq(q->queue_lock);
 
-	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
 
 	elv_drain_elevator(q);
 
@@ -1104,7 +1107,10 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	 * finally exit old elevator and turn off BYPASS.
 	 */
 	elevator_exit(old_elevator);
-	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	spin_lock_irq(q->queue_lock);
+	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+	spin_unlock_irq(q->queue_lock);
+
 	return 1;
 
 fail_register:
@@ -1115,7 +1121,11 @@ fail_register:
 	elevator_exit(e);
 	q->elevator = old_elevator;
 	elv_register_queue(q);
-	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+
+	spin_lock_irq(q->queue_lock);
+	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+	spin_unlock_irq(q->queue_lock);
+
 	return 0;
 }
 

+ 0 - 3
block/scsi_ioctl.c

@@ -217,8 +217,6 @@ EXPORT_SYMBOL_GPL(blk_verify_command);
 static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
 			     struct sg_io_hdr *hdr, int has_write_perm)
 {
-	memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
-
 	if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
 		return -EFAULT;
 	if (blk_verify_command(rq->cmd, has_write_perm))
@@ -531,7 +529,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	rq->data_len = 0;
 	rq->extra_len = 0;
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
-	memset(rq->cmd, 0, sizeof(rq->cmd));
 	rq->cmd[0] = cmd;
 	rq->cmd[4] = data;
 	rq->cmd_len = 6;

+ 1 - 1
drivers/block/loop.c

@@ -546,7 +546,7 @@ static void loop_unplug(struct request_queue *q)
 {
 	struct loop_device *lo = q->queuedata;
 
-	clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+	queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
 	blk_run_address_space(lo->lo_backing_file->f_mapping);
 }
 

+ 1 - 0
drivers/block/nbd.c

@@ -577,6 +577,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
 	switch (cmd) {
 	case NBD_DISCONNECT:
 	        printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
+		blk_rq_init(NULL, &sreq);
 		sreq.cmd_type = REQ_TYPE_SPECIAL;
 		nbd_cmd(&sreq) = NBD_CMD_DISC;
 		/*

+ 1 - 3
drivers/block/paride/pd.c

@@ -716,10 +716,8 @@ static int pd_special_command(struct pd_unit *disk,
 	struct request rq;
 	int err = 0;
 
-	memset(&rq, 0, sizeof(rq));
-	rq.errors = 0;
+	blk_rq_init(NULL, &rq);
 	rq.rq_disk = disk->gd;
-	rq.ref_count = 1;
 	rq.end_io_data = &wait;
 	rq.end_io = blk_end_sync_rq;
 	blk_insert_request(disk->gd->queue, &rq, 0, func);

+ 0 - 2
drivers/block/pktcdvd.c

@@ -776,8 +776,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 
 	rq->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
 	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
-	if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
-		memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
 
 	rq->timeout = 60*HZ;
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;

+ 1 - 3
drivers/block/ps3disk.c

@@ -102,8 +102,7 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev,
 		dev_dbg(&dev->sbd.core,
 			"%s:%u: bio %u: %u segs %u sectors from %lu\n",
 			__func__, __LINE__, i, bio_segments(iter.bio),
-			bio_sectors(iter.bio),
-			(unsigned long)iter.bio->bi_sector);
+			bio_sectors(iter.bio), iter.bio->bi_sector);
 
 		size = bvec->bv_len;
 		buf = bvec_kmap_irq(bvec, &flags);
@@ -406,7 +405,6 @@ static void ps3disk_prepare_flush(struct request_queue *q, struct request *req)
 
 	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
-	memset(req->cmd, 0, sizeof(req->cmd));
 	req->cmd_type = REQ_TYPE_FLUSH;
 }
 

+ 1 - 1
drivers/block/ub.c

@@ -2399,7 +2399,7 @@ static void ub_disconnect(struct usb_interface *intf)
 		del_gendisk(lun->disk);
 		/*
 		 * I wish I could do:
-		 *    set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
+		 *    queue_flag_set(QUEUE_FLAG_DEAD, q);
 		 * As it is, we rely on our internal poisoning and let
 		 * the upper levels to spin furiously failing all the I/O.
 		 */

+ 0 - 1
drivers/cdrom/cdrom.c

@@ -2194,7 +2194,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 		if (ret)
 			break;
 
-		memset(rq->cmd, 0, sizeof(rq->cmd));
 		rq->cmd[0] = GPCMD_READ_CD;
 		rq->cmd[1] = 1 << 2;
 		rq->cmd[2] = (lba >> 24) & 0xff;

+ 2 - 2
drivers/ide/ide-cd.c

@@ -782,7 +782,7 @@ static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive)
 
 	sector_div(frame, queue_hardsect_size(drive->queue) >> SECTOR_BITS);
 
-	memset(rq->cmd, 0, sizeof(rq->cmd));
+	memset(rq->cmd, 0, BLK_MAX_CDB);
 	rq->cmd[0] = GPCMD_SEEK;
 	put_unaligned(cpu_to_be32(frame), (unsigned int *) &rq->cmd[2]);
 
@@ -1694,7 +1694,7 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
 	long block = (long)rq->hard_sector / (hard_sect >> 9);
 	unsigned long blocks = rq->hard_nr_sectors / (hard_sect >> 9);
 
-	memset(rq->cmd, 0, sizeof(rq->cmd));
+	memset(rq->cmd, 0, BLK_MAX_CDB);
 
 	if (rq_data_dir(rq) == READ)
 		rq->cmd[0] = GPCMD_READ_10;

+ 1 - 1
drivers/ide/ide-cd_verbose.c

@@ -326,7 +326,7 @@ void ide_cd_log_error(const char *name, struct request *failed_command,
 
 		printk(KERN_ERR "  The failed \"%s\" packet command "
 				"was: \n  \"", s);
-		for (i = 0; i < sizeof(failed_command->cmd); i++)
+		for (i = 0; i < BLK_MAX_CDB; i++)
 			printk(KERN_CONT "%02x ", failed_command->cmd[i]);
 		printk(KERN_CONT "\"\n");
 	}

+ 1 - 2
drivers/ide/ide-io.c

@@ -1550,8 +1550,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 
 void ide_init_drive_cmd (struct request *rq)
 {
-	memset(rq, 0, sizeof(*rq));
-	rq->ref_count = 1;
+	blk_rq_init(NULL, rq);
 }
 
 EXPORT_SYMBOL(ide_init_drive_cmd);

+ 1 - 1
drivers/ide/ide-tape.c

@@ -662,7 +662,7 @@ static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc)
 
 static void idetape_init_rq(struct request *rq, u8 cmd)
 {
-	memset(rq, 0, sizeof(*rq));
+	blk_rq_init(NULL, rq);
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd[0] = cmd;
 }

+ 1 - 2
drivers/ide/ide-taskfile.c

@@ -494,8 +494,7 @@ int ide_raw_taskfile(ide_drive_t *drive, ide_task_t *task, u8 *buf, u16 nsect)
 {
 	struct request rq;
 
-	memset(&rq, 0, sizeof(rq));
-	rq.ref_count = 1;
+	blk_rq_init(NULL, &rq);
 	rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
 	rq.buffer = buf;
 

+ 2 - 2
drivers/ide/ide.c

@@ -564,7 +564,7 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	if (!(drive->dn % 2))
 		ide_acpi_get_timing(hwif);
 
-	memset(&rq, 0, sizeof(rq));
+	blk_rq_init(NULL, &rq);
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&args, 0, sizeof(args));
 	rq.cmd_type = REQ_TYPE_PM_SUSPEND;
@@ -602,7 +602,7 @@ static int generic_ide_resume(struct device *dev)
 
 	ide_acpi_exec_tfs(drive);
 
-	memset(&rq, 0, sizeof(rq));
+	blk_rq_init(NULL, &rq);
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&args, 0, sizeof(args));
 	rq.cmd_type = REQ_TYPE_PM_RESUME;

+ 0 - 2
drivers/md/dm-emc.c

@@ -110,8 +110,6 @@ static struct request *get_failover_req(struct emc_handler *h,
 	memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
 	rq->sense_len = 0;
 
-	memset(&rq->cmd, 0, BLK_MAX_CDB);
-
 	rq->timeout = EMC_FAILOVER_TIMEOUT;
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;

+ 0 - 1
drivers/md/dm-mpath-hp-sw.c

@@ -137,7 +137,6 @@ static struct request *hp_sw_get_request(struct dm_path *path)
 	req->sense = h->sense;
 	memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
 
-	memset(&req->cmd, 0, BLK_MAX_CDB);
 	req->cmd[0] = START_STOP;
 	req->cmd[4] = 1;
 	req->cmd_len = COMMAND_SIZE(req->cmd[0]);

+ 0 - 1
drivers/md/dm-mpath-rdac.c

@@ -284,7 +284,6 @@ static struct request *get_rdac_req(struct rdac_handler *h,
 		return NULL;
 	}
 
- 	memset(&rq->cmd, 0, BLK_MAX_CDB);
 	rq->sense = h->sense;
 	memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
 	rq->sense_len = 0;

+ 5 - 2
drivers/md/dm-table.c

@@ -873,10 +873,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	q->max_hw_sectors = t->limits.max_hw_sectors;
 	q->seg_boundary_mask = t->limits.seg_boundary_mask;
 	q->bounce_pfn = t->limits.bounce_pfn;
+	/* XXX: the below will probably go bug. must ensure there can be no
+	 * concurrency on queue_flags, and use the unlocked versions...
+	 */
 	if (t->limits.no_cluster)
-		q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
+		queue_flag_clear(QUEUE_FLAG_CLUSTER, q);
 	else
-		q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER);
+		queue_flag_set(QUEUE_FLAG_CLUSTER, q);
 
 }
 

+ 2 - 1
drivers/md/md.c

@@ -282,7 +282,8 @@ static mddev_t * mddev_find(dev_t unit)
 		kfree(new);
 		return NULL;
 	}
-	set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
+	/* Can be unlocked because the queue is new: no concurrency */
+	queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
 
 	blk_queue_make_request(new->queue, md_fail_request);
 

+ 1 - 1
drivers/scsi/scsi_debug.c

@@ -1773,7 +1773,7 @@ static int scsi_debug_slave_alloc(struct scsi_device *sdp)
 	if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
 		printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %u>\n",
 		       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
-	set_bit(QUEUE_FLAG_BIDI, &sdp->request_queue->queue_flags);
+	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue);
 	return 0;
 }
 

+ 1 - 0
drivers/scsi/scsi_error.c

@@ -1771,6 +1771,7 @@ scsi_reset_provider(struct scsi_device *dev, int flag)
 	unsigned long flags;
 	int rtn;
 
+	blk_rq_init(NULL, &req);
 	scmd->request = &req;
 	memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
 

+ 18 - 13
drivers/scsi/scsi_lib.c

@@ -536,6 +536,9 @@ static void scsi_run_queue(struct request_queue *q)
 	       !shost->host_blocked && !shost->host_self_blocked &&
 		!((shost->can_queue > 0) &&
 		  (shost->host_busy >= shost->can_queue))) {
+
+		int flagset;
+
 		/*
 		 * As long as shost is accepting commands and we have
 		 * starved queues, call blk_run_queue. scsi_request_fn
@@ -549,19 +552,20 @@ static void scsi_run_queue(struct request_queue *q)
 		sdev = list_entry(shost->starved_list.next,
 					  struct scsi_device, starved_entry);
 		list_del_init(&sdev->starved_entry);
-		spin_unlock_irqrestore(shost->host_lock, flags);
-
+		spin_unlock(shost->host_lock);
+
+		spin_lock(sdev->request_queue->queue_lock);
+		flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
+				!test_bit(QUEUE_FLAG_REENTER,
+					&sdev->request_queue->queue_flags);
+		if (flagset)
+			queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
+		__blk_run_queue(sdev->request_queue);
+		if (flagset)
+			queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
+		spin_unlock(sdev->request_queue->queue_lock);
 
-		if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
-		    !test_and_set_bit(QUEUE_FLAG_REENTER,
-				      &sdev->request_queue->queue_flags)) {
-			blk_run_queue(sdev->request_queue);
-			clear_bit(QUEUE_FLAG_REENTER,
-				  &sdev->request_queue->queue_flags);
-		} else
-			blk_run_queue(sdev->request_queue);
-
-		spin_lock_irqsave(shost->host_lock, flags);
+		spin_lock(shost->host_lock);
 		if (unlikely(!list_empty(&sdev->starved_entry)))
 			/*
 			 * sdev lost a race, and was put back on the
@@ -1585,8 +1589,9 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 
 	blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
 
+	/* New queue, no concurrency on queue_flags */
 	if (!shost->use_clustering)
-		clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
 
 	/*
 	 * set a reasonable default alignment on word boundaries: the

+ 1 - 2
drivers/scsi/scsi_transport_sas.c

@@ -248,8 +248,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	else
 		q->queuedata = shost;
 
-	set_bit(QUEUE_FLAG_BIDI, &q->queue_flags);
-
+	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
 	return 0;
 }
 

+ 0 - 1
drivers/scsi/sd.c

@@ -860,7 +860,6 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 
 static void sd_prepare_flush(struct request_queue *q, struct request *rq)
 {
-	memset(rq->cmd, 0, sizeof(rq->cmd));
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->timeout = SD_TIMEOUT;
 	rq->cmd[0] = SYNCHRONIZE_CACHE;

+ 90 - 0
fs/bio.c

@@ -937,6 +937,95 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
 	return ERR_PTR(-EINVAL);
 }
 
+static void bio_copy_kern_endio(struct bio *bio, int err)
+{
+	struct bio_vec *bvec;
+	const int read = bio_data_dir(bio) == READ;
+	char *p = bio->bi_private;
+	int i;
+
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		char *addr = page_address(bvec->bv_page);
+
+		if (read && !err)
+			memcpy(p, addr, bvec->bv_len);
+
+		__free_page(bvec->bv_page);
+		p += bvec->bv_len;
+	}
+
+	bio_put(bio);
+}
+
+/**
+ *	bio_copy_kern	-	copy kernel address into bio
+ *	@q: the struct request_queue for the bio
+ *	@data: pointer to buffer to copy
+ *	@len: length in bytes
+ *	@gfp_mask: allocation flags for bio and page allocation
+ *
+ *	copy the kernel address into a bio suitable for io to a block
+ *	device. Returns an error pointer in case of error.
+ */
+struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
+			  gfp_t gfp_mask, int reading)
+{
+	unsigned long kaddr = (unsigned long)data;
+	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	unsigned long start = kaddr >> PAGE_SHIFT;
+	const int nr_pages = end - start;
+	struct bio *bio;
+	struct bio_vec *bvec;
+	int i, ret;
+
+	bio = bio_alloc(gfp_mask, nr_pages);
+	if (!bio)
+		return ERR_PTR(-ENOMEM);
+
+	while (len) {
+		struct page *page;
+		unsigned int bytes = PAGE_SIZE;
+
+		if (bytes > len)
+			bytes = len;
+
+		page = alloc_page(q->bounce_gfp | gfp_mask);
+		if (!page) {
+			ret = -ENOMEM;
+			goto cleanup;
+		}
+
+		if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
+			ret = -EINVAL;
+			goto cleanup;
+		}
+
+		len -= bytes;
+	}
+
+	if (!reading) {
+		void *p = data;
+
+		bio_for_each_segment(bvec, bio, i) {
+			char *addr = page_address(bvec->bv_page);
+
+			memcpy(addr, p, bvec->bv_len);
+			p += bvec->bv_len;
+		}
+	}
+
+	bio->bi_private = data;
+	bio->bi_end_io = bio_copy_kern_endio;
+	return bio;
+cleanup:
+	bio_for_each_segment(bvec, bio, i)
+		__free_page(bvec->bv_page);
+
+	bio_put(bio);
+
+	return ERR_PTR(ret);
+}
+
 /*
  * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
  * for performing direct-IO in BIOs.
@@ -1273,6 +1362,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
 EXPORT_SYMBOL(bio_map_user);
 EXPORT_SYMBOL(bio_unmap_user);
 EXPORT_SYMBOL(bio_map_kern);
+EXPORT_SYMBOL(bio_copy_kern);
 EXPORT_SYMBOL(bio_pair_release);
 EXPORT_SYMBOL(bio_split);
 EXPORT_SYMBOL(bio_split_pool);

+ 1 - 1
fs/splice.c

@@ -1075,7 +1075,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 
 	ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
 	if (ret > 0)
-		*ppos += ret;
+		*ppos = sd.pos;
 
 	return ret;
 }

+ 2 - 0
include/linux/bio.h

@@ -324,6 +324,8 @@ extern struct bio *bio_map_user_iov(struct request_queue *,
 extern void bio_unmap_user(struct bio *);
 extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
 				gfp_t);
+extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
+				 gfp_t, int);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);

+ 35 - 6
include/linux/blkdev.h

@@ -215,8 +215,9 @@ struct request {
 	/*
 	 * when request is used as a packet command carrier
 	 */
-	unsigned int cmd_len;
-	unsigned char cmd[BLK_MAX_CDB];
+	unsigned short cmd_len;
+	unsigned char __cmd[BLK_MAX_CDB];
+	unsigned char *cmd;
 
 	unsigned int data_len;
 	unsigned int extra_len;	/* length of alignment and padding */
@@ -407,6 +408,31 @@ struct request_queue
 #define QUEUE_FLAG_PLUGGED	7	/* queue is plugged */
 #define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
 #define QUEUE_FLAG_BIDI		9	/* queue supports bidi requests */
+#define QUEUE_FLAG_NOMERGES    10	/* disable merge attempts */
+
+static inline void queue_flag_set_unlocked(unsigned int flag,
+					   struct request_queue *q)
+{
+	__set_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
+{
+	WARN_ON_ONCE(!spin_is_locked(q->queue_lock));
+	__set_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_clear_unlocked(unsigned int flag,
+					     struct request_queue *q)
+{
+	__clear_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
+{
+	WARN_ON_ONCE(!spin_is_locked(q->queue_lock));
+	__clear_bit(flag, &q->queue_flags);
+}
 
 enum {
 	/*
@@ -451,6 +477,7 @@ enum {
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+#define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
 
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
@@ -496,17 +523,17 @@ static inline int blk_queue_full(struct request_queue *q, int rw)
 static inline void blk_set_queue_full(struct request_queue *q, int rw)
 {
 	if (rw == READ)
-		set_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
+		queue_flag_set(QUEUE_FLAG_READFULL, q);
 	else
-		set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
+		queue_flag_set(QUEUE_FLAG_WRITEFULL, q);
 }
 
 static inline void blk_clear_queue_full(struct request_queue *q, int rw)
 {
 	if (rw == READ)
-		clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
+		queue_flag_clear(QUEUE_FLAG_READFULL, q);
 	else
-		clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
+		queue_flag_clear(QUEUE_FLAG_WRITEFULL, q);
 }
 
 
@@ -583,6 +610,7 @@ extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 extern void register_disk(struct gendisk *dev);
 extern void generic_make_request(struct bio *bio);
+extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern void blk_end_sync_rq(struct request *rq, int error);
@@ -626,6 +654,7 @@ extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
+extern void __blk_run_queue(struct request_queue *);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_start_queueing(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long);

+ 1 - 1
kernel/relay.c

@@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in,
 	ret = 0;
 	spliced = 0;
 
-	while (len) {
+	while (len && !spliced) {
 		ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
 		if (ret < 0)
 			break;