12 years ago · 9141a4ca0d
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -22,12 +22,14 @@
 
				 #include <linux/virtio_ids.h>
			
 
				 #include <linux/virtio_config.h>
			
 
				 #include <linux/virtio_scsi.h>
			
 
				+#include <linux/cpu.h>
			
 
				 #include <scsi/scsi_host.h>
			
 
				 #include <scsi/scsi_device.h>
			
 
				 #include <scsi/scsi_cmnd.h>
			
 
				 
			
 
				 #define VIRTIO_SCSI_MEMPOOL_SZ 64
			
 
				 #define VIRTIO_SCSI_EVENT_LEN 8
			
 
				+#define VIRTIO_SCSI_VQ_BASE 2
			
 
				 
			
 
				 /* Command queue element */
			
 
				 struct virtio_scsi_cmd {
			
@@ -59,22 +61,58 @@ struct virtio_scsi_vq {
 
				 	struct virtqueue *vq;
			
 
				 };
			
 
				 
			
 
				-/* Per-target queue state */
			
 
				+/*
			
 
				+ * Per-target queue state.
			
 
				+ *
			
 
				+ * This struct holds the data needed by the queue steering policy.  When a
			
 
				+ * target is sent multiple requests, we need to drive them to the same queue so
			
 
				+ * that FIFO processing order is kept.  However, if a target was idle, we can
			
 
				+ * choose a queue arbitrarily.  In this case the queue is chosen according to
			
 
				+ * the current VCPU, so the driver expects the number of request queues to be
			
 
				+ * equal to the number of VCPUs.  This makes it easy and fast to select the
			
 
				+ * queue, and also lets the driver optimize the IRQ affinity for the virtqueues
			
 
				+ * (each virtqueue's affinity is set to the CPU that "owns" the queue).
			
 
				+ *
			
 
				+ * An interesting effect of this policy is that only writes to req_vq need to
			
 
				+ * take the tgt_lock.  Read can be done outside the lock because:
			
 
				+ *
			
 
				+ * - writes of req_vq only occur when atomic_inc_return(&tgt->reqs) returns 1.
			
 
				+ *   In that case, no other CPU is reading req_vq: even if they were in
			
 
				+ *   virtscsi_queuecommand_multi, they would be spinning on tgt_lock.
			
 
				+ *
			
 
				+ * - reads of req_vq only occur when the target is not idle (reqs != 0).
			
 
				+ *   A CPU that enters virtscsi_queuecommand_multi will not modify req_vq.
			
 
				+ *
			
 
				+ * Similarly, decrements of reqs are never concurrent with writes of req_vq.
			
 
				+ * Thus they can happen outside the tgt_lock, provided of course we make reqs
			
 
				+ * an atomic_t.
			
 
				+ */
			
 
				 struct virtio_scsi_target_state {
			
 
				-	/* Never held at the same time as vq_lock.  */
			
 
				+	/* This spinlock never held at the same time as vq_lock. */
			
 
				 	spinlock_t tgt_lock;
			
 
				+
			
 
				+	/* Count of outstanding requests. */
			
 
				+	atomic_t reqs;
			
 
				+
			
 
				+	/* Currently active virtqueue for requests sent to this target. */
			
 
				+	struct virtio_scsi_vq *req_vq;
			
 
				 };
			
 
				 
			
 
				 /* Driver instance state */
			
 
				 struct virtio_scsi {
			
 
				 	struct virtio_device *vdev;
			
 
				 
			
 
				-	struct virtio_scsi_vq ctrl_vq;
			
 
				-	struct virtio_scsi_vq event_vq;
			
 
				-	struct virtio_scsi_vq req_vq;
			
 
				-
			
 
				 	/* Get some buffers ready for event vq */
			
 
				 	struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN];
			
 
				+
			
 
				+	u32 num_queues;
			
 
				+
			
 
				+	/* If the affinity hint is set for virtqueues */
			
 
				+	bool affinity_hint_set;
			
 
				+
			
 
				+	struct virtio_scsi_vq ctrl_vq;
			
 
				+	struct virtio_scsi_vq event_vq;
			
 
				+	struct virtio_scsi_vq req_vqs[];
			
 
				 };
			
 
				 
			
 
				 static struct kmem_cache *virtscsi_cmd_cache;
			
@@ -109,6 +147,8 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
 
				 	struct virtio_scsi_cmd *cmd = buf;
			
 
				 	struct scsi_cmnd *sc = cmd->sc;
			
 
				 	struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd;
			
 
				+	struct virtio_scsi_target_state *tgt =
			
 
				+				scsi_target(sc->device)->hostdata;
			
 
				 
			
 
				 	dev_dbg(&sc->device->sdev_gendev,
			
 
				 		"cmd %p response %u status %#02x sense_len %u\n",
			
@@ -163,6 +203,8 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
 
				 
			
 
				 	mempool_free(cmd, virtscsi_cmd_pool);
			
 
				 	sc->scsi_done(sc);
			
 
				+
			
 
				+	atomic_dec(&tgt->reqs);
			
 
				 }
			
 
				 
			
 
				 static void virtscsi_vq_done(struct virtio_scsi *vscsi,
			
@@ -187,8 +229,42 @@ static void virtscsi_req_done(struct virtqueue *vq)
 
				 {
			
 
				 	struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
			
 
				 	struct virtio_scsi *vscsi = shost_priv(sh);
			
 
				+	int index = vq->index - VIRTIO_SCSI_VQ_BASE;
			
 
				+	struct virtio_scsi_vq *req_vq = &vscsi->req_vqs[index];
			
 
				 
			
 
				-	virtscsi_vq_done(vscsi, &vscsi->req_vq, virtscsi_complete_cmd);
			
 
				+	/*
			
 
				+	 * Read req_vq before decrementing the reqs field in
			
 
				+	 * virtscsi_complete_cmd.
			
 
				+	 *
			
 
				+	 * With barriers:
			
 
				+	 *
			
 
				+	 * 	CPU #0			virtscsi_queuecommand_multi (CPU #1)
			
 
				+	 * 	------------------------------------------------------------
			
 
				+	 * 	lock vq_lock
			
 
				+	 * 	read req_vq
			
 
				+	 * 	read reqs (reqs = 1)
			
 
				+	 * 	write reqs (reqs = 0)
			
 
				+	 * 				increment reqs (reqs = 1)
			
 
				+	 * 				write req_vq
			
 
				+	 *
			
 
				+	 * Possible reordering without barriers:
			
 
				+	 *
			
 
				+	 * 	CPU #0			virtscsi_queuecommand_multi (CPU #1)
			
 
				+	 * 	------------------------------------------------------------
			
 
				+	 * 	lock vq_lock
			
 
				+	 * 	read reqs (reqs = 1)
			
 
				+	 * 	write reqs (reqs = 0)
			
 
				+	 * 				increment reqs (reqs = 1)
			
 
				+	 * 				write req_vq
			
 
				+	 * 	read (wrong) req_vq
			
 
				+	 *
			
 
				+	 * We do not need a full smp_rmb, because req_vq is required to get
			
 
				+	 * to tgt->reqs: tgt is &vscsi->tgt[sc->device->id], where sc is stored
			
 
				+	 * in the virtqueue as the user token.
			
 
				+	 */
			
 
				+	smp_read_barrier_depends();
			
 
				+
			
 
				+	virtscsi_vq_done(vscsi, req_vq, virtscsi_complete_cmd);
			
 
				 };
			
 
				 
			
 
				 static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf)
			
@@ -251,7 +327,7 @@ static void virtscsi_cancel_event_work(struct virtio_scsi *vscsi)
 
				 }
			
 
				 
			
 
				 static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi,
			
 
				-						struct virtio_scsi_event *event)
			
 
				+					    struct virtio_scsi_event *event)
			
 
				 {
			
 
				 	struct scsi_device *sdev;
			
 
				 	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
			
@@ -410,9 +486,10 @@ static int virtscsi_kick_cmd(struct virtio_scsi_vq *vq,
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				-static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
			
 
				+static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
			
 
				+				 struct virtio_scsi_vq *req_vq,
			
 
				+				 struct scsi_cmnd *sc)
			
 
				 {
			
 
				-	struct virtio_scsi *vscsi = shost_priv(sh);
			
 
				 	struct virtio_scsi_cmd *cmd;
			
 
				 	int ret;
			
 
				 
			
@@ -446,7 +523,7 @@ static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
 
				 	BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
			
 
				 	memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
			
 
				 
			
 
				-	if (virtscsi_kick_cmd(&vscsi->req_vq, cmd,
			
 
				+	if (virtscsi_kick_cmd(req_vq, cmd,
			
 
				 			      sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
			
 
				 			      GFP_ATOMIC) == 0)
			
 
				 		ret = 0;
			
@@ -457,6 +534,55 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
			
 
				+					struct scsi_cmnd *sc)
			
 
				+{
			
 
				+	struct virtio_scsi *vscsi = shost_priv(sh);
			
 
				+	struct virtio_scsi_target_state *tgt =
			
 
				+				scsi_target(sc->device)->hostdata;
			
 
				+
			
 
				+	atomic_inc(&tgt->reqs);
			
 
				+	return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], sc);
			
 
				+}
			
 
				+
			
 
				+static struct virtio_scsi_vq *virtscsi_pick_vq(struct virtio_scsi *vscsi,
			
 
				+					       struct virtio_scsi_target_state *tgt)
			
 
				+{
			
 
				+	struct virtio_scsi_vq *vq;
			
 
				+	unsigned long flags;
			
 
				+	u32 queue_num;
			
 
				+
			
 
				+	spin_lock_irqsave(&tgt->tgt_lock, flags);
			
 
				+
			
 
				+	/*
			
 
				+	 * The memory barrier after atomic_inc_return matches
			
 
				+	 * the smp_read_barrier_depends() in virtscsi_req_done.
			
 
				+	 */
			
 
				+	if (atomic_inc_return(&tgt->reqs) > 1)
			
 
				+		vq = ACCESS_ONCE(tgt->req_vq);
			
 
				+	else {
			
 
				+		queue_num = smp_processor_id();
			
 
				+		while (unlikely(queue_num >= vscsi->num_queues))
			
 
				+			queue_num -= vscsi->num_queues;
			
 
				+
			
 
				+		tgt->req_vq = vq = &vscsi->req_vqs[queue_num];
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_irqrestore(&tgt->tgt_lock, flags);
			
 
				+	return vq;
			
 
				+}
			
 
				+
			
 
				+static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
			
 
				+				       struct scsi_cmnd *sc)
			
 
				+{
			
 
				+	struct virtio_scsi *vscsi = shost_priv(sh);
			
 
				+	struct virtio_scsi_target_state *tgt =
			
 
				+				scsi_target(sc->device)->hostdata;
			
 
				+	struct virtio_scsi_vq *req_vq = virtscsi_pick_vq(vscsi, tgt);
			
 
				+
			
 
				+	return virtscsi_queuecommand(vscsi, req_vq, sc);
			
 
				+}
			
 
				+
			
 
				 static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
			
 
				 {
			
 
				 	DECLARE_COMPLETION_ONSTACK(comp);
			
@@ -533,6 +659,8 @@ static int virtscsi_target_alloc(struct scsi_target *starget)
 
				 		return -ENOMEM;
			
 
				 
			
 
				 	spin_lock_init(&tgt->tgt_lock);
			
 
				+	atomic_set(&tgt->reqs, 0);
			
 
				+	tgt->req_vq = NULL;
			
 
				 
			
 
				 	starget->hostdata = tgt;
			
 
				 	return 0;
			
@@ -544,12 +672,28 @@ static void virtscsi_target_destroy(struct scsi_target *starget)
 
				 	kfree(tgt);
			
 
				 }
			
 
				 
			
 
				-static struct scsi_host_template virtscsi_host_template = {
			
 
				+static struct scsi_host_template virtscsi_host_template_single = {
			
 
				 	.module = THIS_MODULE,
			
 
				 	.name = "Virtio SCSI HBA",
			
 
				 	.proc_name = "virtio_scsi",
			
 
				-	.queuecommand = virtscsi_queuecommand,
			
 
				 	.this_id = -1,
			
 
				+	.queuecommand = virtscsi_queuecommand_single,
			
 
				+	.eh_abort_handler = virtscsi_abort,
			
 
				+	.eh_device_reset_handler = virtscsi_device_reset,
			
 
				+
			
 
				+	.can_queue = 1024,
			
 
				+	.dma_boundary = UINT_MAX,
			
 
				+	.use_clustering = ENABLE_CLUSTERING,
			
 
				+	.target_alloc = virtscsi_target_alloc,
			
 
				+	.target_destroy = virtscsi_target_destroy,
			
 
				+};
			
 
				+
			
 
				+static struct scsi_host_template virtscsi_host_template_multi = {
			
 
				+	.module = THIS_MODULE,
			
 
				+	.name = "Virtio SCSI HBA",
			
 
				+	.proc_name = "virtio_scsi",
			
 
				+	.this_id = -1,
			
 
				+	.queuecommand = virtscsi_queuecommand_multi,
			
 
				 	.eh_abort_handler = virtscsi_abort,
			
 
				 	.eh_device_reset_handler = virtscsi_device_reset,
			
 
				 
			
@@ -577,6 +721,47 @@ static struct scsi_host_template virtscsi_host_template = {
 
				 				  &__val, sizeof(__val)); \
			
 
				 	})
			
 
				 
			
 
				+static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
			
 
				+{
			
 
				+	int i;
			
 
				+	int cpu;
			
 
				+
			
 
				+	/* In multiqueue mode, when the number of cpu is equal
			
 
				+	 * to the number of request queues, we let the qeueues
			
 
				+	 * to be private to one cpu by setting the affinity hint
			
 
				+	 * to eliminate the contention.
			
 
				+	 */
			
 
				+	if ((vscsi->num_queues == 1 ||
			
 
				+	     vscsi->num_queues != num_online_cpus()) && affinity) {
			
 
				+		if (vscsi->affinity_hint_set)
			
 
				+			affinity = false;
			
 
				+		else
			
 
				+			return;
			
 
				+	}
			
 
				+
			
 
				+	if (affinity) {
			
 
				+		i = 0;
			
 
				+		for_each_online_cpu(cpu) {
			
 
				+			virtqueue_set_affinity(vscsi->req_vqs[i].vq, cpu);
			
 
				+			i++;
			
 
				+		}
			
 
				+
			
 
				+		vscsi->affinity_hint_set = true;
			
 
				+	} else {
			
 
				+		for (i = 0; i < vscsi->num_queues - VIRTIO_SCSI_VQ_BASE; i++)
			
 
				+			virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1);
			
 
				+
			
 
				+		vscsi->affinity_hint_set = false;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
			
 
				+{
			
 
				+	get_online_cpus();
			
 
				+	__virtscsi_set_affinity(vscsi, affinity);
			
 
				+	put_online_cpus();
			
 
				+}
			
 
				+
			
 
				 static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
			
 
				 			     struct virtqueue *vq)
			
 
				 {
			
@@ -593,6 +778,11 @@ static void virtscsi_scan(struct virtio_device *vdev)
 
				 
			
 
				 static void virtscsi_remove_vqs(struct virtio_device *vdev)
			
 
				 {
			
 
				+	struct Scsi_Host *sh = virtio_scsi_host(vdev);
			
 
				+	struct virtio_scsi *vscsi = shost_priv(sh);
			
 
				+
			
 
				+	virtscsi_set_affinity(vscsi, false);
			
 
				+
			
 
				 	/* Stop all the virtqueues. */
			
 
				 	vdev->config->reset(vdev);
			
 
				 
			
@@ -603,27 +793,43 @@ static int virtscsi_init(struct virtio_device *vdev,
 
				 			 struct virtio_scsi *vscsi)
			
 
				 {
			
 
				 	int err;
			
 
				-	struct virtqueue *vqs[3];
			
 
				+	u32 i;
			
 
				+	u32 num_vqs;
			
 
				+	vq_callback_t **callbacks;
			
 
				+	const char **names;
			
 
				+	struct virtqueue **vqs;
			
 
				+
			
 
				+	num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE;
			
 
				+	vqs = kmalloc(num_vqs * sizeof(struct virtqueue *), GFP_KERNEL);
			
 
				+	callbacks = kmalloc(num_vqs * sizeof(vq_callback_t *), GFP_KERNEL);
			
 
				+	names = kmalloc(num_vqs * sizeof(char *), GFP_KERNEL);
			
 
				+
			
 
				+	if (!callbacks || !vqs || !names) {
			
 
				+		err = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				 
			
 
				-	vq_callback_t *callbacks[] = {
			
 
				-		virtscsi_ctrl_done,
			
 
				-		virtscsi_event_done,
			
 
				-		virtscsi_req_done
			
 
				-	};
			
 
				-	const char *names[] = {
			
 
				-		"control",
			
 
				-		"event",
			
 
				-		"request"
			
 
				-	};
			
 
				+	callbacks[0] = virtscsi_ctrl_done;
			
 
				+	callbacks[1] = virtscsi_event_done;
			
 
				+	names[0] = "control";
			
 
				+	names[1] = "event";
			
 
				+	for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++) {
			
 
				+		callbacks[i] = virtscsi_req_done;
			
 
				+		names[i] = "request";
			
 
				+	}
			
 
				 
			
 
				 	/* Discover virtqueues and write information to configuration.  */
			
 
				-	err = vdev->config->find_vqs(vdev, 3, vqs, callbacks, names);
			
 
				+	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
			
 
				 	if (err)
			
 
				-		return err;
			
 
				+		goto out;
			
 
				 
			
 
				 	virtscsi_init_vq(&vscsi->ctrl_vq, vqs[0]);
			
 
				 	virtscsi_init_vq(&vscsi->event_vq, vqs[1]);
			
 
				-	virtscsi_init_vq(&vscsi->req_vq, vqs[2]);
			
 
				+	for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++)
			
 
				+		virtscsi_init_vq(&vscsi->req_vqs[i - VIRTIO_SCSI_VQ_BASE],
			
 
				+				 vqs[i]);
			
 
				+
			
 
				+	virtscsi_set_affinity(vscsi, true);
			
 
				 
			
 
				 	virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
			
 
				 	virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
			
@@ -631,6 +837,14 @@ static int virtscsi_init(struct virtio_device *vdev,
 
				 	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
			
 
				 		virtscsi_kick_event_all(vscsi);
			
 
				 
			
 
				+	err = 0;
			
 
				+
			
 
				+out:
			
 
				+	kfree(names);
			
 
				+	kfree(callbacks);
			
 
				+	kfree(vqs);
			
 
				+	if (err)
			
 
				+		virtscsi_remove_vqs(vdev);
			
 
				 	return err;
			
 
				 }
			
 
				 
			
@@ -641,10 +855,21 @@ static int virtscsi_probe(struct virtio_device *vdev)
 
				 	int err;
			
 
				 	u32 sg_elems, num_targets;
			
 
				 	u32 cmd_per_lun;
			
 
				+	u32 num_queues;
			
 
				+	struct scsi_host_template *hostt;
			
 
				+
			
 
				+	/* We need to know how many queues before we allocate. */
			
 
				+	num_queues = virtscsi_config_get(vdev, num_queues) ? : 1;
			
 
				 
			
 
				 	num_targets = virtscsi_config_get(vdev, max_target) + 1;
			
 
				 
			
 
				-	shost = scsi_host_alloc(&virtscsi_host_template, sizeof(*vscsi));
			
 
				+	if (num_queues == 1)
			
 
				+		hostt = &virtscsi_host_template_single;
			
 
				+	else
			
 
				+		hostt = &virtscsi_host_template_multi;
			
 
				+
			
 
				+	shost = scsi_host_alloc(hostt,
			
 
				+		sizeof(*vscsi) + sizeof(vscsi->req_vqs[0]) * num_queues);
			
 
				 	if (!shost)
			
 
				 		return -ENOMEM;
			
 
				 
			
@@ -652,6 +877,7 @@ static int virtscsi_probe(struct virtio_device *vdev)
 
				 	shost->sg_tablesize = sg_elems;
			
 
				 	vscsi = shost_priv(shost);
			
 
				 	vscsi->vdev = vdev;
			
 
				+	vscsi->num_queues = num_queues;
			
 
				 	vdev->priv = shost;
			
 
				 
			
 
				 	err = virtscsi_init(vdev, vscsi);