소스 검색

Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus

* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus:
  lguest: make Launcher see device status updates
  lguest: remove bogus NULL cpu check
  lguest: avoid using NR_CPUS as a bounds check.
  virtio: add virtio disk geometry feature
  virtio: explicit advertisement of driver features
  virtio: change config to guest endian.
  virtio: finer-grained features for virtio_net
  virtio: wean net driver off NETDEV_TX_BUSY
  virtio-blk: fix remove oops
  virtio: fix scatterlist sizing in net driver.
  virtio: de-structify virtio_block status byte
  virtio: export more headers to userspace
  virtio: fix sparse return void-valued expression warnings
  virtio: fix tx_ stats in virtio_net
  virtio: ignore corrupted virtqueues rather than spinning.
Linus Torvalds 17 년 전
부모
커밋
02cdf0827b

+ 41 - 21
Documentation/lguest/lguest.c

@@ -131,6 +131,9 @@ struct device
 	/* Any queues attached to this device */
 	struct virtqueue *vq;
 
+	/* Handle status being finalized (ie. feature bits stable). */
+	void (*ready)(struct device *me);
+
 	/* Device-specific data. */
 	void *priv;
 };
@@ -925,24 +928,40 @@ static void enable_fd(int fd, struct virtqueue *vq)
 	write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
 }
 
-/* When the Guest asks us to reset a device, it's is fairly easy. */
-static void reset_device(struct device *dev)
+/* When the Guest tells us they updated the status field, we handle it. */
+static void update_device_status(struct device *dev)
 {
 	struct virtqueue *vq;
 
-	verbose("Resetting device %s\n", dev->name);
-	/* Clear the status. */
-	dev->desc->status = 0;
+	/* This is a reset. */
+	if (dev->desc->status == 0) {
+		verbose("Resetting device %s\n", dev->name);
 
-	/* Clear any features they've acked. */
-	memset(get_feature_bits(dev) + dev->desc->feature_len, 0,
-	       dev->desc->feature_len);
+		/* Clear any features they've acked. */
+		memset(get_feature_bits(dev) + dev->desc->feature_len, 0,
+		       dev->desc->feature_len);
 
-	/* Zero out the virtqueues. */
-	for (vq = dev->vq; vq; vq = vq->next) {
-		memset(vq->vring.desc, 0,
-		       vring_size(vq->config.num, getpagesize()));
-		vq->last_avail_idx = 0;
+		/* Zero out the virtqueues. */
+		for (vq = dev->vq; vq; vq = vq->next) {
+			memset(vq->vring.desc, 0,
+			       vring_size(vq->config.num, getpagesize()));
+			vq->last_avail_idx = 0;
+		}
+	} else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
+		warnx("Device %s configuration FAILED", dev->name);
+	} else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
+		unsigned int i;
+
+		verbose("Device %s OK: offered", dev->name);
+		for (i = 0; i < dev->desc->feature_len; i++)
+			verbose(" %08x", get_feature_bits(dev)[i]);
+		verbose(", accepted");
+		for (i = 0; i < dev->desc->feature_len; i++)
+			verbose(" %08x", get_feature_bits(dev)
+				[dev->desc->feature_len+i]);
+
+		if (dev->ready)
+			dev->ready(dev);
 	}
 }
 
@@ -954,9 +973,9 @@ static void handle_output(int fd, unsigned long addr)
 
 	/* Check each device and virtqueue. */
 	for (i = devices.dev; i; i = i->next) {
-		/* Notifications to device descriptors reset the device. */
+		/* Notifications to device descriptors update device status. */
 		if (from_guest_phys(addr) == i->desc) {
-			reset_device(i);
+			update_device_status(i);
 			return;
 		}
 
@@ -1170,6 +1189,7 @@ static struct device *new_device(const char *name, u16 type, int fd,
 	dev->handle_input = handle_input;
 	dev->name = name;
 	dev->vq = NULL;
+	dev->ready = NULL;
 
 	/* Append to device list.  Prepending to a single-linked list is
 	 * easier, but the user expects the devices to be arranged on the bus
@@ -1398,7 +1418,7 @@ static bool service_io(struct device *dev)
 	struct vblk_info *vblk = dev->priv;
 	unsigned int head, out_num, in_num, wlen;
 	int ret;
-	struct virtio_blk_inhdr *in;
+	u8 *in;
 	struct virtio_blk_outhdr *out;
 	struct iovec iov[dev->vq->vring.num];
 	off64_t off;
@@ -1416,7 +1436,7 @@ static bool service_io(struct device *dev)
 		     head, out_num, in_num);
 
 	out = convert(&iov[0], struct virtio_blk_outhdr);
-	in = convert(&iov[out_num+in_num-1], struct virtio_blk_inhdr);
+	in = convert(&iov[out_num+in_num-1], u8);
 	off = out->sector * 512;
 
 	/* The block device implements "barriers", where the Guest indicates
@@ -1430,7 +1450,7 @@ static bool service_io(struct device *dev)
 	 * It'd be nice if we supported eject, for example, but we don't. */
 	if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
 		fprintf(stderr, "Scsi commands unsupported\n");
-		in->status = VIRTIO_BLK_S_UNSUPP;
+		*in = VIRTIO_BLK_S_UNSUPP;
 		wlen = sizeof(*in);
 	} else if (out->type & VIRTIO_BLK_T_OUT) {
 		/* Write */
@@ -1453,7 +1473,7 @@ static bool service_io(struct device *dev)
 			errx(1, "Write past end %llu+%u", off, ret);
 		}
 		wlen = sizeof(*in);
-		in->status = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
+		*in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
 	} else {
 		/* Read */
 
@@ -1466,10 +1486,10 @@ static bool service_io(struct device *dev)
 		verbose("READ from sector %llu: %i\n", out->sector, ret);
 		if (ret >= 0) {
 			wlen = sizeof(*in) + ret;
-			in->status = VIRTIO_BLK_S_OK;
+			*in = VIRTIO_BLK_S_OK;
 		} else {
 			wlen = sizeof(*in);
-			in->status = VIRTIO_BLK_S_IOERR;
+			*in = VIRTIO_BLK_S_IOERR;
 		}
 	}
 

+ 32 - 12
drivers/block/virtio_blk.c

@@ -35,7 +35,7 @@ struct virtblk_req
 	struct list_head list;
 	struct request *req;
 	struct virtio_blk_outhdr out_hdr;
-	struct virtio_blk_inhdr in_hdr;
+	u8 status;
 };
 
 static void blk_done(struct virtqueue *vq)
@@ -48,7 +48,7 @@ static void blk_done(struct virtqueue *vq)
 	spin_lock_irqsave(&vblk->lock, flags);
 	while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
 		int uptodate;
-		switch (vbr->in_hdr.status) {
+		switch (vbr->status) {
 		case VIRTIO_BLK_S_OK:
 			uptodate = 1;
 			break;
@@ -101,7 +101,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 	sg_init_table(vblk->sg, VIRTIO_MAX_SG);
 	sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr));
 	num = blk_rq_map_sg(q, vbr->req, vblk->sg+1);
-	sg_set_buf(&vblk->sg[num+1], &vbr->in_hdr, sizeof(vbr->in_hdr));
+	sg_set_buf(&vblk->sg[num+1], &vbr->status, sizeof(vbr->status));
 
 	if (rq_data_dir(vbr->req) == WRITE) {
 		vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
@@ -157,10 +157,25 @@ static int virtblk_ioctl(struct inode *inode, struct file *filp,
 /* We provide getgeo only to please some old bootloader/partitioning tools */
 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 {
-	/* some standard values, similar to sd */
-	geo->heads = 1 << 6;
-	geo->sectors = 1 << 5;
-	geo->cylinders = get_capacity(bd->bd_disk) >> 11;
+	struct virtio_blk *vblk = bd->bd_disk->private_data;
+	struct virtio_blk_geometry vgeo;
+	int err;
+
+	/* see if the host passed in geometry config */
+	err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
+				offsetof(struct virtio_blk_config, geometry),
+				&vgeo);
+
+	if (!err) {
+		geo->heads = vgeo.heads;
+		geo->sectors = vgeo.sectors;
+		geo->cylinders = vgeo.cylinders;
+	} else {
+		/* some standard values, similar to sd */
+		geo->heads = 1 << 6;
+		geo->sectors = 1 << 5;
+		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
+	}
 	return 0;
 }
 
@@ -242,12 +257,12 @@ static int virtblk_probe(struct virtio_device *vdev)
 	index++;
 
 	/* If barriers are supported, tell block layer that queue is ordered */
-	if (vdev->config->feature(vdev, VIRTIO_BLK_F_BARRIER))
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
 		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
 
 	/* Host must always specify the capacity. */
-	__virtio_config_val(vdev, offsetof(struct virtio_blk_config, capacity),
-			    &cap);
+	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
+			  &cap, sizeof(cap));
 
 	/* If capacity is too big, truncate with warning. */
 	if ((sector_t)cap != cap) {
@@ -289,7 +304,6 @@ out:
 static void virtblk_remove(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk = vdev->priv;
-	int major = vblk->disk->major;
 
 	/* Nothing should be pending. */
 	BUG_ON(!list_empty(&vblk->reqs));
@@ -299,7 +313,6 @@ static void virtblk_remove(struct virtio_device *vdev)
 
 	blk_cleanup_queue(vblk->disk->queue);
 	put_disk(vblk->disk);
-	unregister_blkdev(major, "virtblk");
 	mempool_destroy(vblk->pool);
 	vdev->config->del_vq(vblk->vq);
 	kfree(vblk);
@@ -310,7 +323,14 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+static unsigned int features[] = {
+	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
+	VIRTIO_BLK_F_GEOMETRY,
+};
+
 static struct virtio_driver virtio_blk = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,

+ 41 - 27
drivers/lguest/lguest_device.c

@@ -85,27 +85,34 @@ static unsigned desc_size(const struct lguest_device_desc *desc)
 		+ desc->config_len;
 }
 
-/* This tests (and acknowleges) a feature bit. */
-static bool lg_feature(struct virtio_device *vdev, unsigned fbit)
+/* This gets the device's feature bits. */
+static u32 lg_get_features(struct virtio_device *vdev)
 {
+	unsigned int i;
+	u32 features = 0;
 	struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
-	u8 *features;
-
-	/* Obviously if they ask for a feature off the end of our feature
-	 * bitmap, it's not set. */
-	if (fbit / 8 > desc->feature_len)
-		return false;
-
-	/* The feature bitmap comes after the virtqueues. */
-	features = lg_features(desc);
-	if (!(features[fbit / 8] & (1 << (fbit % 8))))
-		return false;
-
-	/* We set the matching bit in the other half of the bitmap to tell the
-	 * Host we want to use this feature.  We don't use this yet, but we
-	 * could in future. */
-	features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8));
-	return true;
+	u8 *in_features = lg_features(desc);
+
+	/* We do this the slow but generic way. */
+	for (i = 0; i < min(desc->feature_len * 8, 32); i++)
+		if (in_features[i / 8] & (1 << (i % 8)))
+			features |= (1 << i);
+
+	return features;
+}
+
+static void lg_set_features(struct virtio_device *vdev, u32 features)
+{
+	unsigned int i;
+	struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
+	/* Second half of bitmap is features we accept. */
+	u8 *out_features = lg_features(desc) + desc->feature_len;
+
+	memset(out_features, 0, desc->feature_len);
+	for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
+		if (features & (1 << i))
+			out_features[i / 8] |= (1 << (i % 8));
+	}
 }
 
 /* Once they've found a field, getting a copy of it is easy. */
@@ -137,20 +144,26 @@ static u8 lg_get_status(struct virtio_device *vdev)
 	return to_lgdev(vdev)->desc->status;
 }
 
+/* To notify on status updates, we (ab)use the NOTIFY hypercall, with the
+ * descriptor address of the device.  A zero status means "reset". */
+static void set_status(struct virtio_device *vdev, u8 status)
+{
+	unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices;
+
+	/* We set the status. */
+	to_lgdev(vdev)->desc->status = status;
+	hcall(LHCALL_NOTIFY, (max_pfn<<PAGE_SHIFT) + offset, 0, 0);
+}
+
 static void lg_set_status(struct virtio_device *vdev, u8 status)
 {
 	BUG_ON(!status);
-	to_lgdev(vdev)->desc->status = status;
+	set_status(vdev, status);
 }
 
-/* To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor
- * address of the device.  The Host will zero the status and all the
- * features. */
 static void lg_reset(struct virtio_device *vdev)
 {
-	unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices;
-
-	hcall(LHCALL_NOTIFY, (max_pfn<<PAGE_SHIFT) + offset, 0, 0);
+	set_status(vdev, 0);
 }
 
 /*
@@ -286,7 +299,8 @@ static void lg_del_vq(struct virtqueue *vq)
 
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
-	.feature = lg_feature,
+	.get_features = lg_get_features,
+	.set_features = lg_set_features,
 	.get = lg_get,
 	.set = lg_set,
 	.get_status = lg_get_status,

+ 1 - 3
drivers/lguest/lguest_user.c

@@ -102,7 +102,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
 static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 {
 	/* We have a limited number the number of CPUs in the lguest struct. */
-	if (id >= NR_CPUS)
+	if (id >= ARRAY_SIZE(cpu->lg->cpus))
 		return -EINVAL;
 
 	/* Set up this CPU's id, and pointer back to the lguest struct. */
@@ -251,8 +251,6 @@ static ssize_t write(struct file *file, const char __user *in,
 		if (!lg || (cpu_id >= lg->nr_cpus))
 			return -EINVAL;
 		cpu = &lg->cpus[cpu_id];
-		if (!cpu)
-			return -EINVAL;
 
 		/* Once the Guest is dead, you can only read() why it died. */
 		if (lg->dead)

+ 68 - 28
drivers/net/virtio_net.c

@@ -41,6 +41,9 @@ struct virtnet_info
 	struct net_device *dev;
 	struct napi_struct napi;
 
+	/* The skb we couldn't send because buffers were full. */
+	struct sk_buff *last_xmit_skb;
+
 	/* Number of input buffers, and max we've ever had. */
 	unsigned int num, max;
 
@@ -142,10 +145,10 @@ drop:
 static void try_fill_recv(struct virtnet_info *vi)
 {
 	struct sk_buff *skb;
-	struct scatterlist sg[1+MAX_SKB_FRAGS];
+	struct scatterlist sg[2+MAX_SKB_FRAGS];
 	int num, err;
 
-	sg_init_table(sg, 1+MAX_SKB_FRAGS);
+	sg_init_table(sg, 2+MAX_SKB_FRAGS);
 	for (;;) {
 		skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
 		if (unlikely(!skb))
@@ -221,23 +224,22 @@ static void free_old_xmit_skbs(struct virtnet_info *vi)
 	while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
 		pr_debug("Sent skb %p\n", skb);
 		__skb_unlink(skb, &vi->send);
-		vi->dev->stats.tx_bytes += len;
+		vi->dev->stats.tx_bytes += skb->len;
 		vi->dev->stats.tx_packets++;
 		kfree_skb(skb);
 	}
 }
 
-static int start_xmit(struct sk_buff *skb, struct net_device *dev)
+static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 {
-	struct virtnet_info *vi = netdev_priv(dev);
-	int num, err;
-	struct scatterlist sg[1+MAX_SKB_FRAGS];
+	int num;
+	struct scatterlist sg[2+MAX_SKB_FRAGS];
 	struct virtio_net_hdr *hdr;
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 
-	sg_init_table(sg, 1+MAX_SKB_FRAGS);
+	sg_init_table(sg, 2+MAX_SKB_FRAGS);
 
-	pr_debug("%s: xmit %p " MAC_FMT "\n", dev->name, skb,
+	pr_debug("%s: xmit %p " MAC_FMT "\n", vi->dev->name, skb,
 		 dest[0], dest[1], dest[2],
 		 dest[3], dest[4], dest[5]);
 
@@ -272,30 +274,51 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	vnet_hdr_to_sg(sg, skb);
 	num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
-	__skb_queue_head(&vi->send, skb);
+
+	return vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
+}
+
+static int start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
 
 again:
 	/* Free up any pending old buffers before queueing new ones. */
 	free_old_xmit_skbs(vi);
-	err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
-	if (err) {
-		pr_debug("%s: virtio not prepared to send\n", dev->name);
-		netif_stop_queue(dev);
-
-		/* Activate callback for using skbs: if this returns false it
-		 * means some were used in the meantime. */
-		if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
-			vi->svq->vq_ops->disable_cb(vi->svq);
-			netif_start_queue(dev);
-			goto again;
+
+	/* If we has a buffer left over from last time, send it now. */
+	if (vi->last_xmit_skb) {
+		if (xmit_skb(vi, vi->last_xmit_skb) != 0) {
+			/* Drop this skb: we only queue one. */
+			vi->dev->stats.tx_dropped++;
+			kfree_skb(skb);
+			goto stop_queue;
 		}
-		__skb_unlink(skb, &vi->send);
+		vi->last_xmit_skb = NULL;
+	}
 
-		return NETDEV_TX_BUSY;
+	/* Put new one in send queue and do transmit */
+	__skb_queue_head(&vi->send, skb);
+	if (xmit_skb(vi, skb) != 0) {
+		vi->last_xmit_skb = skb;
+		goto stop_queue;
 	}
+done:
 	vi->svq->vq_ops->kick(vi->svq);
-
-	return 0;
+	return NETDEV_TX_OK;
+
+stop_queue:
+	pr_debug("%s: virtio not prepared to send\n", dev->name);
+	netif_stop_queue(dev);
+
+	/* Activate callback for using skbs: if this returns false it
+	 * means some were used in the meantime. */
+	if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
+		vi->svq->vq_ops->disable_cb(vi->svq);
+		netif_start_queue(dev);
+		goto again;
+	}
+	goto done;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -355,17 +378,26 @@ static int virtnet_probe(struct virtio_device *vdev)
 	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	/* Do we support "hardware" checksums? */
-	if (csum && vdev->config->feature(vdev, VIRTIO_NET_F_CSUM)) {
+	if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
 		/* This opens up the world of extra features. */
 		dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
-		if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_GSO)) {
+		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
 			dev->features |= NETIF_F_TSO | NETIF_F_UFO
 				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
 		}
+		/* Individual feature bits: what can host handle? */
+		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
+			dev->features |= NETIF_F_TSO;
+		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
+			dev->features |= NETIF_F_TSO6;
+		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
+			dev->features |= NETIF_F_TSO_ECN;
+		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
+			dev->features |= NETIF_F_UFO;
 	}
 
 	/* Configuration may specify what MAC to use.  Otherwise random. */
-	if (vdev->config->feature(vdev, VIRTIO_NET_F_MAC)) {
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
 		vdev->config->get(vdev,
 				  offsetof(struct virtio_net_config, mac),
 				  dev->dev_addr, dev->addr_len);
@@ -454,7 +486,15 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+static unsigned int features[] = {
+	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
+	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
+	VIRTIO_NET_F_HOST_ECN,
+};
+
 static struct virtio_driver virtio_net = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,

+ 36 - 2
drivers/virtio/virtio.c

@@ -80,19 +80,51 @@ static void add_status(struct virtio_device *dev, unsigned status)
 	dev->config->set_status(dev, dev->config->get_status(dev) | status);
 }
 
+void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
+					 unsigned int fbit)
+{
+	unsigned int i;
+	struct virtio_driver *drv = container_of(vdev->dev.driver,
+						 struct virtio_driver, driver);
+
+	for (i = 0; i < drv->feature_table_size; i++)
+		if (drv->feature_table[i] == fbit)
+			return;
+	BUG();
+}
+EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);
+
 static int virtio_dev_probe(struct device *_d)
 {
-	int err;
+	int err, i;
 	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
 	struct virtio_driver *drv = container_of(dev->dev.driver,
 						 struct virtio_driver, driver);
+	u32 device_features;
 
+	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
+
+	/* Figure out what features the device supports. */
+	device_features = dev->config->get_features(dev);
+
+	/* Features supported by both device and driver into dev->features. */
+	memset(dev->features, 0, sizeof(dev->features));
+	for (i = 0; i < drv->feature_table_size; i++) {
+		unsigned int f = drv->feature_table[i];
+		BUG_ON(f >= 32);
+		if (device_features & (1 << f))
+			set_bit(f, dev->features);
+	}
+
 	err = drv->probe(dev);
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
-	else
+	else {
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+		/* They should never have set feature bits beyond 32 */
+		dev->config->set_features(dev, dev->features[0]);
+	}
 	return err;
 }
 
@@ -114,6 +146,8 @@ static int virtio_dev_remove(struct device *_d)
 
 int register_virtio_driver(struct virtio_driver *driver)
 {
+	/* Catch this early. */
+	BUG_ON(driver->feature_table_size && !driver->feature_table);
 	driver->driver.bus = &virtio_bus;
 	driver->driver.probe = virtio_dev_probe;
 	driver->driver.remove = virtio_dev_remove;

+ 8 - 4
drivers/virtio/virtio_balloon.c

@@ -155,9 +155,9 @@ static void virtballoon_changed(struct virtio_device *vdev)
 static inline s64 towards_target(struct virtio_balloon *vb)
 {
 	u32 v;
-	__virtio_config_val(vb->vdev,
-			    offsetof(struct virtio_balloon_config, num_pages),
-			    &v);
+	vb->vdev->config->get(vb->vdev,
+			      offsetof(struct virtio_balloon_config, num_pages),
+			      &v, sizeof(v));
 	return v - vb->num_pages;
 }
 
@@ -227,7 +227,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	}
 
 	vb->tell_host_first
-		= vdev->config->feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
+		= virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
 
 	return 0;
 
@@ -259,7 +259,11 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	kfree(vb);
 }
 
+static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
+
 static struct virtio_driver virtio_balloon = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,

+ 17 - 17
drivers/virtio/virtio_pci.c

@@ -87,23 +87,22 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
 	return container_of(vdev, struct virtio_pci_device, vdev);
 }
 
-/* virtio config->feature() implementation */
-static bool vp_feature(struct virtio_device *vdev, unsigned bit)
+/* virtio config->get_features() implementation */
+static u32 vp_get_features(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	/* When someone needs more than 32 feature bits, we'll need to
+	 * steal a bit to indicate that the rest are somewhere else. */
+	return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
+}
+
+/* virtio config->set_features() implementation */
+static void vp_set_features(struct virtio_device *vdev, u32 features)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	u32 mask;
-
-	/* Since this function is supposed to have the side effect of
-	 * enabling a queried feature, we simulate that by doing a read
-	 * from the host feature bitmask and then writing to the guest
-	 * feature bitmask */
-	mask = ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
-	if (mask & (1 << bit)) {
-		mask |= (1 << bit);
-		iowrite32(mask, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
-	}
 
-	return !!(mask & (1 << bit));
+	iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
 }
 
 /* virtio config->get() implementation */
@@ -145,14 +144,14 @@ static void vp_set_status(struct virtio_device *vdev, u8 status)
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* We should never be setting status to 0. */
 	BUG_ON(status == 0);
-	return iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 }
 
 static void vp_reset(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* 0 status means a reset. */
-	return iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 }
 
 /* the notify function used when creating a virt queue */
@@ -293,7 +292,6 @@ static void vp_del_vq(struct virtqueue *vq)
 }
 
 static struct virtio_config_ops virtio_pci_config_ops = {
-	.feature	= vp_feature,
 	.get		= vp_get,
 	.set		= vp_set,
 	.get_status	= vp_get_status,
@@ -301,6 +299,8 @@ static struct virtio_config_ops virtio_pci_config_ops = {
 	.reset		= vp_reset,
 	.find_vq	= vp_find_vq,
 	.del_vq		= vp_del_vq,
+	.get_features	= vp_get_features,
+	.set_features	= vp_set_features,
 };
 
 /* the PCI probing function */

+ 5 - 0
drivers/virtio/virtio_ring.c

@@ -184,6 +184,11 @@ static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len)
 
 	START_USE(vq);
 
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return NULL;
+	}
+
 	if (!more_used(vq)) {
 		pr_debug("No more buffers in queue\n");
 		END_USE(vq);

+ 5 - 0
include/linux/Kbuild

@@ -346,6 +346,11 @@ unifdef-y += videodev.h
 unifdef-y += virtio_config.h
 unifdef-y += virtio_blk.h
 unifdef-y += virtio_net.h
+unifdef-y += virtio_9p.h
+unifdef-y += virtio_balloon.h
+unifdef-y += virtio_console.h
+unifdef-y += virtio_pci.h
+unifdef-y += virtio_ring.h
 unifdef-y += vt.h
 unifdef-y += wait.h
 unifdef-y += wanrouter.h

+ 7 - 0
include/linux/virtio.h

@@ -76,6 +76,7 @@ struct virtqueue_ops {
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
+ * @features: the features supported by both driver and device.
  * @priv: private pointer for the driver's use.
  */
 struct virtio_device
@@ -84,6 +85,8 @@ struct virtio_device
 	struct device dev;
 	struct virtio_device_id id;
 	struct virtio_config_ops *config;
+	/* Note that this is a Linux set_bit-style bitmap. */
+	unsigned long features[1];
 	void *priv;
 };
 
@@ -94,6 +97,8 @@ void unregister_virtio_device(struct virtio_device *dev);
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).
  * @id_table: the ids serviced by this driver.
+ * @feature_table: an array of feature numbers supported by this device.
+ * @feature_table_size: number of entries in the feature table array.
  * @probe: the function to call when a device is found.  Returns a token for
  *    remove, or PTR_ERR().
  * @remove: the function when a device is removed.
@@ -103,6 +108,8 @@ void unregister_virtio_device(struct virtio_device *dev);
 struct virtio_driver {
 	struct device_driver driver;
 	const struct virtio_device_id *id_table;
+	const unsigned int *feature_table;
+	unsigned int feature_table_size;
 	int (*probe)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);
 	void (*config_changed)(struct virtio_device *dev);

+ 8 - 6
include/linux/virtio_blk.h

@@ -9,6 +9,7 @@
 #define VIRTIO_BLK_F_BARRIER	0	/* Does host support barriers? */
 #define VIRTIO_BLK_F_SIZE_MAX	1	/* Indicates maximum segment size */
 #define VIRTIO_BLK_F_SEG_MAX	2	/* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY	4	/* Legacy geometry available  */
 
 struct virtio_blk_config
 {
@@ -18,6 +19,12 @@ struct virtio_blk_config
 	__le32 size_max;
 	/* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */
 	__le32 seg_max;
+	/* geometry the device (if VIRTIO_BLK_F_GEOMETRY) */
+	struct virtio_blk_geometry {
+		__le16 cylinders;
+		__u8 heads;
+		__u8 sectors;
+	} geometry;
 } __attribute__((packed));
 
 /* These two define direction. */
@@ -41,13 +48,8 @@ struct virtio_blk_outhdr
 	__u64 sector;
 };
 
+/* And this is the final byte of the write scatter-gather list. */
 #define VIRTIO_BLK_S_OK		0
 #define VIRTIO_BLK_S_IOERR	1
 #define VIRTIO_BLK_S_UNSUPP	2
-
-/* This is the first element of the write scatter-gather list */
-struct virtio_blk_inhdr
-{
-	unsigned char status;
-};
 #endif /* _LINUX_VIRTIO_BLK_H */

+ 44 - 37
include/linux/virtio_config.h

@@ -16,27 +16,20 @@
 #define VIRTIO_CONFIG_S_FAILED		0x80
 
 #ifdef __KERNEL__
-struct virtio_device;
+#include <linux/virtio.h>
 
 /**
  * virtio_config_ops - operations for configuring a virtio device
- * @feature: search for a feature in this config
- *	vdev: the virtio_device
- *	bit: the feature bit
- *	Returns true if the feature is supported.  Acknowledges the feature
- *	so the host can see it.
  * @get: read the value of a configuration field
  *	vdev: the virtio_device
  *	offset: the offset of the configuration field
  *	buf: the buffer to write the field value into.
  *	len: the length of the buffer
- *	Note that contents are conventionally little-endian.
  * @set: write the value of a configuration field
  *	vdev: the virtio_device
  *	offset: the offset of the configuration field
  *	buf: the buffer to read the field value from.
  *	len: the length of the buffer
- *	Note that contents are conventionally little-endian.
  * @get_status: read the status byte
  *	vdev: the virtio_device
  *	Returns the status byte
@@ -52,10 +45,15 @@ struct virtio_device;
  *	callback: the virqtueue callback
  *	Returns the new virtqueue or ERR_PTR() (eg. -ENOENT).
  * @del_vq: free a virtqueue found by find_vq().
+ * @get_features: get the array of feature bits for this device.
+ *	vdev: the virtio_device
+ *	Returns the first 32 feature bits (all we currently need).
+ * @set_features: confirm what device features we'll be using.
+ *	vdev: the virtio_device
+ *	feature: the first 32 feature bits
  */
 struct virtio_config_ops
 {
-	bool (*feature)(struct virtio_device *vdev, unsigned bit);
 	void (*get)(struct virtio_device *vdev, unsigned offset,
 		    void *buf, unsigned len);
 	void (*set)(struct virtio_device *vdev, unsigned offset,
@@ -67,43 +65,52 @@ struct virtio_config_ops
 				     unsigned index,
 				     void (*callback)(struct virtqueue *));
 	void (*del_vq)(struct virtqueue *vq);
+	u32 (*get_features)(struct virtio_device *vdev);
+	void (*set_features)(struct virtio_device *vdev, u32 features);
 };
 
+/* If driver didn't advertise the feature, it will never appear. */
+void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
+					 unsigned int fbit);
+
 /**
- * virtio_config_val - look for a feature and get a single virtio config.
- * @vdev: the virtio device
+ * virtio_has_feature - helper to determine if this device has this feature.
+ * @vdev: the device
  * @fbit: the feature bit
- * @offset: the type to search for.
- * @val: a pointer to the value to fill in.
- *
- * The return value is -ENOENT if the feature doesn't exist.  Otherwise
- * the value is endian-corrected and returned in v. */
-#define virtio_config_val(vdev, fbit, offset, v) ({			\
-	int _err;							\
-	if ((vdev)->config->feature((vdev), (fbit))) {			\
-		__virtio_config_val((vdev), (offset), (v));		\
-		_err = 0;						\
-	} else								\
-		_err = -ENOENT;						\
-	_err;								\
-})
+ */
+static inline bool virtio_has_feature(const struct virtio_device *vdev,
+				      unsigned int fbit)
+{
+	/* Did you forget to fix assumptions on max features? */
+	if (__builtin_constant_p(fbit))
+		BUILD_BUG_ON(fbit >= 32);
+
+	virtio_check_driver_offered_feature(vdev, fbit);
+	return test_bit(fbit, vdev->features);
+}
 
 /**
- * __virtio_config_val - get a single virtio config without feature check.
+ * virtio_config_val - look for a feature and get a virtio config entry.
  * @vdev: the virtio device
+ * @fbit: the feature bit
  * @offset: the type to search for.
  * @val: a pointer to the value to fill in.
  *
- * The value is endian-corrected and returned in v. */
-#define __virtio_config_val(vdev, offset, v) do {			\
-	BUILD_BUG_ON(sizeof(*(v)) != 1 && sizeof(*(v)) != 2		\
-		     && sizeof(*(v)) != 4 && sizeof(*(v)) != 8);	\
-	(vdev)->config->get((vdev), (offset), (v), sizeof(*(v)));	\
-	switch (sizeof(*(v))) {						\
-	case 2: le16_to_cpus((__u16 *) v); break;			\
-	case 4: le32_to_cpus((__u32 *) v); break;			\
-	case 8: le64_to_cpus((__u64 *) v); break;			\
-	}								\
-} while(0)
+ * The return value is -ENOENT if the feature doesn't exist.  Otherwise
+ * the config value is copied into whatever is pointed to by v. */
+#define virtio_config_val(vdev, fbit, offset, v) \
+	virtio_config_buf((vdev), (fbit), (offset), (v), sizeof(v))
+
+static inline int virtio_config_buf(struct virtio_device *vdev,
+				    unsigned int fbit,
+				    unsigned int offset,
+				    void *buf, unsigned len)
+{
+	if (!virtio_has_feature(vdev, fbit))
+		return -ENOENT;
+
+	vdev->config->get(vdev, offset, buf, len);
+	return 0;
+}
 #endif /* __KERNEL__ */
 #endif /* _LINUX_VIRTIO_CONFIG_H */

+ 11 - 2
include/linux/virtio_net.h

@@ -6,9 +6,18 @@
 #define VIRTIO_ID_NET	1
 
 /* The feature bitmap for virtio net */
-#define VIRTIO_NET_F_CSUM	0	/* Can handle pkts w/ partial csum */
+#define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
 #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
-#define VIRTIO_NET_F_GSO	6	/* Can handle pkts w/ any GSO type */
+#define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
+#define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6	8	/* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN	9	/* Guest can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO	10	/* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4	11	/* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6	12	/* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN	13	/* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO	14	/* Host can handle UFO in. */
 
 struct virtio_net_config
 {