Browse Source

Merge tag 'vfio-v3.9-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:
 - Fixes PCIe v1 extended capability support

 - Cleans up read/write access functions

 - Fix Removal test to properly wait until devices are unused

 - Enable pcieport driver usage for non-accessible devices w/in groups

 - Extensions for PCI VGA support

* tag 'vfio-v3.9-rc1' of git://github.com/awilliam/linux-vfio:
  drivers/vfio: remove depends on CONFIG_EXPERIMENTAL
  vfio-pci: Add support for VGA region access
  vfio-pci: Manage user power state transitions
  vfio: whitelist pcieport
  vfio: Protect vfio_dev_present against device_del
  vfio-pci: Cleanup BAR access
  vfio-pci: Cleanup read/write functions
  vfio-pci: Enable PCIe extended capabilities on v1
Linus Torvalds 12 years ago
parent
commit
515d01f772

+ 10 - 0
drivers/vfio/pci/Kconfig

@@ -6,3 +6,13 @@ config VFIO_PCI
 	  use of PCI drivers using the VFIO framework.
 
 	  If you don't know what to do here, say N.
+
+config VFIO_PCI_VGA
+	bool "VFIO PCI support for VGA devices"
+	depends on VFIO_PCI && X86 && VGA_ARB
+	help
+	  Support for VGA extension to VFIO PCI.  This exposes an additional
+	  region on VGA devices for accessing legacy VGA addresses used by
+	  BIOS and generic video drivers.
+
+	  If you don't know what to do here, say N.

+ 44 - 31
drivers/vfio/pci/vfio_pci.c

@@ -84,6 +84,11 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
 	} else
 		vdev->msix_bar = 0xFF;
 
+#ifdef CONFIG_VFIO_PCI_VGA
+	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+		vdev->has_vga = true;
+#endif
+
 	return 0;
 }
 
@@ -285,6 +290,16 @@ static long vfio_pci_ioctl(void *device_data,
 			info.flags = VFIO_REGION_INFO_FLAG_READ;
 			break;
 		}
+		case VFIO_PCI_VGA_REGION_INDEX:
+			if (!vdev->has_vga)
+				return -EINVAL;
+
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = 0xc0000;
+			info.flags = VFIO_REGION_INFO_FLAG_READ |
+				     VFIO_REGION_INFO_FLAG_WRITE;
+
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -366,52 +381,50 @@ static long vfio_pci_ioctl(void *device_data,
 	return -ENOTTY;
 }
 
-static ssize_t vfio_pci_read(void *device_data, char __user *buf,
-			     size_t count, loff_t *ppos)
+static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
+			   size_t count, loff_t *ppos, bool iswrite)
 {
 	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 	struct vfio_pci_device *vdev = device_data;
-	struct pci_dev *pdev = vdev->pdev;
 
 	if (index >= VFIO_PCI_NUM_REGIONS)
 		return -EINVAL;
 
-	if (index == VFIO_PCI_CONFIG_REGION_INDEX)
-		return vfio_pci_config_readwrite(vdev, buf, count, ppos, false);
-	else if (index == VFIO_PCI_ROM_REGION_INDEX)
-		return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false);
-	else if (pci_resource_flags(pdev, index) & IORESOURCE_IO)
-		return vfio_pci_io_readwrite(vdev, buf, count, ppos, false);
-	else if (pci_resource_flags(pdev, index) & IORESOURCE_MEM)
-		return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false);
+	switch (index) {
+	case VFIO_PCI_CONFIG_REGION_INDEX:
+		return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
+
+	case VFIO_PCI_ROM_REGION_INDEX:
+		if (iswrite)
+			return -EINVAL;
+		return vfio_pci_bar_rw(vdev, buf, count, ppos, false);
+
+	case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
+		return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);
+
+	case VFIO_PCI_VGA_REGION_INDEX:
+		return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
+	}
 
 	return -EINVAL;
 }
 
-static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
-			      size_t count, loff_t *ppos)
+static ssize_t vfio_pci_read(void *device_data, char __user *buf,
+			     size_t count, loff_t *ppos)
 {
-	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
-	struct vfio_pci_device *vdev = device_data;
-	struct pci_dev *pdev = vdev->pdev;
+	if (!count)
+		return 0;
 
-	if (index >= VFIO_PCI_NUM_REGIONS)
-		return -EINVAL;
+	return vfio_pci_rw(device_data, buf, count, ppos, false);
+}
 
-	if (index == VFIO_PCI_CONFIG_REGION_INDEX)
-		return vfio_pci_config_readwrite(vdev, (char __user *)buf,
-						 count, ppos, true);
-	else if (index == VFIO_PCI_ROM_REGION_INDEX)
-		return -EINVAL;
-	else if (pci_resource_flags(pdev, index) & IORESOURCE_IO)
-		return vfio_pci_io_readwrite(vdev, (char __user *)buf,
-					     count, ppos, true);
-	else if (pci_resource_flags(pdev, index) & IORESOURCE_MEM) {
-		return vfio_pci_mem_readwrite(vdev, (char __user *)buf,
-					      count, ppos, true);
-	}
+static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
+			      size_t count, loff_t *ppos)
+{
+	if (!count)
+		return 0;
 
-	return -EINVAL;
+	return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
 }
 
 static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)

+ 43 - 9
drivers/vfio/pci/vfio_pci_config.c

@@ -587,12 +587,46 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
 	return 0;
 }
 
+static int vfio_pm_config_write(struct vfio_pci_device *vdev, int pos,
+				int count, struct perm_bits *perm,
+				int offset, __le32 val)
+{
+	count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
+	if (count < 0)
+		return count;
+
+	if (offset == PCI_PM_CTRL) {
+		pci_power_t state;
+
+		switch (le32_to_cpu(val) & PCI_PM_CTRL_STATE_MASK) {
+		case 0:
+			state = PCI_D0;
+			break;
+		case 1:
+			state = PCI_D1;
+			break;
+		case 2:
+			state = PCI_D2;
+			break;
+		case 3:
+			state = PCI_D3hot;
+			break;
+		}
+
+		pci_set_power_state(vdev->pdev, state);
+	}
+
+	return count;
+}
+
 /* Permissions for the Power Management capability */
 static int __init init_pci_cap_pm_perm(struct perm_bits *perm)
 {
 	if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_PM]))
 		return -ENOMEM;
 
+	perm->writefn = vfio_pm_config_write;
+
 	/*
 	 * We always virtualize the next field so we can remove
 	 * capabilities from the chain if we want to.
@@ -600,10 +634,11 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm)
 	p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
 
 	/*
-	 * Power management is defined *per function*,
-	 * so we let the user write this
+	 * Power management is defined *per function*, so we can let
+	 * the user change power state, but we trap and initiate the
+	 * change ourselves, so the state bits are read-only.
 	 */
-	p_setd(perm, PCI_PM_CTRL, NO_VIRT, ALL_WRITE);
+	p_setd(perm, PCI_PM_CTRL, NO_VIRT, ~PCI_PM_CTRL_STATE_MASK);
 	return 0;
 }
 
@@ -985,12 +1020,12 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos)
 		if (ret)
 			return pcibios_err_to_errno(ret);
 
+		vdev->extended_caps = true;
+
 		if ((word & PCI_EXP_FLAGS_VERS) == 1)
 			return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1;
-		else {
-			vdev->extended_caps = true;
+		else
 			return PCI_CAP_EXP_ENDPOINT_SIZEOF_V2;
-		}
 	case PCI_CAP_ID_HT:
 		ret = pci_read_config_byte(pdev, pos + 3, &byte);
 		if (ret)
@@ -1501,9 +1536,8 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,
 	return ret;
 }
 
-ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev,
-				  char __user *buf, size_t count,
-				  loff_t *ppos, bool iswrite)
+ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, char __user *buf,
+			   size_t count, loff_t *ppos, bool iswrite)
 {
 	size_t done = 0;
 	int ret = 0;

+ 10 - 9
drivers/vfio/pci/vfio_pci_private.h

@@ -53,6 +53,7 @@ struct vfio_pci_device {
 	bool			reset_works;
 	bool			extended_caps;
 	bool			bardirty;
+	bool			has_vga;
 	struct pci_saved_state	*pci_saved_state;
 	atomic_t		refcnt;
 };
@@ -70,15 +71,15 @@ extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
 				   uint32_t flags, unsigned index,
 				   unsigned start, unsigned count, void *data);
 
-extern ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev,
-					 char __user *buf, size_t count,
-					 loff_t *ppos, bool iswrite);
-extern ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev,
-				      char __user *buf, size_t count,
-				      loff_t *ppos, bool iswrite);
-extern ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev,
-				     char __user *buf, size_t count,
-				     loff_t *ppos, bool iswrite);
+extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev,
+				  char __user *buf, size_t count,
+				  loff_t *ppos, bool iswrite);
+
+extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
+			       size_t count, loff_t *ppos, bool iswrite);
+
+extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
+			       size_t count, loff_t *ppos, bool iswrite);
 
 extern int vfio_pci_init_perm_bits(void);
 extern void vfio_pci_uninit_perm_bits(void);

+ 125 - 156
drivers/vfio/pci/vfio_pci_rdwr.c

@@ -17,253 +17,222 @@
 #include <linux/pci.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
+#include <linux/vgaarb.h>
 
 #include "vfio_pci_private.h"
 
-/* I/O Port BAR access */
-ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev, char __user *buf,
-			      size_t count, loff_t *ppos, bool iswrite)
+/*
+ * Read or write from an __iomem region (MMIO or I/O port) with an excluded
+ * range which is inaccessible.  The excluded range drops writes and fills
+ * reads with -1.  This is intended for handling MSI-X vector tables and
+ * leftover space for ROM BARs.
+ */
+static ssize_t do_io_rw(void __iomem *io, char __user *buf,
+			loff_t off, size_t count, size_t x_start,
+			size_t x_end, bool iswrite)
 {
-	struct pci_dev *pdev = vdev->pdev;
-	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
-	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
-	void __iomem *io;
-	size_t done = 0;
-
-	if (!pci_resource_start(pdev, bar))
-		return -EINVAL;
-
-	if (pos + count > pci_resource_len(pdev, bar))
-		return -EINVAL;
-
-	if (!vdev->barmap[bar]) {
-		int ret;
-
-		ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
-		if (ret)
-			return ret;
-
-		vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
-
-		if (!vdev->barmap[bar]) {
-			pci_release_selected_regions(pdev, 1 << bar);
-			return -EINVAL;
-		}
-	}
-
-	io = vdev->barmap[bar];
+	ssize_t done = 0;
 
 	while (count) {
-		int filled;
+		size_t fillable, filled;
+
+		if (off < x_start)
+			fillable = min(count, (size_t)(x_start - off));
+		else if (off >= x_end)
+			fillable = count;
+		else
+			fillable = 0;
 
-		if (count >= 3 && !(pos % 4)) {
+		if (fillable >= 4 && !(off % 4)) {
 			__le32 val;
 
 			if (iswrite) {
 				if (copy_from_user(&val, buf, 4))
 					return -EFAULT;
 
-				iowrite32(le32_to_cpu(val), io + pos);
+				iowrite32(le32_to_cpu(val), io + off);
 			} else {
-				val = cpu_to_le32(ioread32(io + pos));
+				val = cpu_to_le32(ioread32(io + off));
 
 				if (copy_to_user(buf, &val, 4))
 					return -EFAULT;
 			}
 
 			filled = 4;
-
-		} else if ((pos % 2) == 0 && count >= 2) {
+		} else if (fillable >= 2 && !(off % 2)) {
 			__le16 val;
 
 			if (iswrite) {
 				if (copy_from_user(&val, buf, 2))
 					return -EFAULT;
 
-				iowrite16(le16_to_cpu(val), io + pos);
+				iowrite16(le16_to_cpu(val), io + off);
 			} else {
-				val = cpu_to_le16(ioread16(io + pos));
+				val = cpu_to_le16(ioread16(io + off));
 
 				if (copy_to_user(buf, &val, 2))
 					return -EFAULT;
 			}
 
 			filled = 2;
-		} else {
+		} else if (fillable) {
 			u8 val;
 
 			if (iswrite) {
 				if (copy_from_user(&val, buf, 1))
 					return -EFAULT;
 
-				iowrite8(val, io + pos);
+				iowrite8(val, io + off);
 			} else {
-				val = ioread8(io + pos);
+				val = ioread8(io + off);
 
 				if (copy_to_user(buf, &val, 1))
 					return -EFAULT;
 			}
 
 			filled = 1;
+		} else {
+			/* Fill reads with -1, drop writes */
+			filled = min(count, (size_t)(x_end - off));
+			if (!iswrite) {
+				u8 val = 0xFF;
+				size_t i;
+
+				for (i = 0; i < filled; i++)
+					if (copy_to_user(buf + i, &val, 1))
+						return -EFAULT;
+			}
 		}
 
 		count -= filled;
 		done += filled;
+		off += filled;
 		buf += filled;
-		pos += filled;
 	}
 
-	*ppos += done;
-
 	return done;
 }
 
-/*
- * MMIO BAR access
- * We handle two excluded ranges here as well, if the user tries to read
- * the ROM beyond what PCI tells us is available or the MSI-X table region,
- * we return 0xFF and writes are dropped.
- */
-ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev, char __user *buf,
-			       size_t count, loff_t *ppos, bool iswrite)
+ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
+			size_t count, loff_t *ppos, bool iswrite)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
-	void __iomem *io;
+	size_t x_start = 0, x_end = 0;
 	resource_size_t end;
-	size_t done = 0;
-	size_t x_start = 0, x_end = 0; /* excluded range */
+	void __iomem *io;
+	ssize_t done;
 
 	if (!pci_resource_start(pdev, bar))
 		return -EINVAL;
 
 	end = pci_resource_len(pdev, bar);
 
-	if (pos > end)
+	if (pos >= end)
 		return -EINVAL;
 
-	if (pos == end)
-		return 0;
-
-	if (pos + count > end)
-		count = end - pos;
+	count = min(count, (size_t)(end - pos));
 
 	if (bar == PCI_ROM_RESOURCE) {
+		/*
+		 * The ROM can fill less space than the BAR, so we start the
+		 * excluded range at the end of the actual ROM.  This makes
+		 * filling large ROM BARs much faster.
+		 */
 		io = pci_map_rom(pdev, &x_start);
+		if (!io)
+			return -ENOMEM;
 		x_end = end;
-	} else {
-		if (!vdev->barmap[bar]) {
-			int ret;
-
-			ret = pci_request_selected_regions(pdev, 1 << bar,
-							   "vfio");
-			if (ret)
-				return ret;
+	} else if (!vdev->barmap[bar]) {
+		int ret;
 
-			vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
+		ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
+		if (ret)
+			return ret;
 
-			if (!vdev->barmap[bar]) {
-				pci_release_selected_regions(pdev, 1 << bar);
-				return -EINVAL;
-			}
+		io = pci_iomap(pdev, bar, 0);
+		if (!io) {
+			pci_release_selected_regions(pdev, 1 << bar);
+			return -ENOMEM;
 		}
 
+		vdev->barmap[bar] = io;
+	} else
 		io = vdev->barmap[bar];
 
-		if (bar == vdev->msix_bar) {
-			x_start = vdev->msix_offset;
-			x_end = vdev->msix_offset + vdev->msix_size;
-		}
+	if (bar == vdev->msix_bar) {
+		x_start = vdev->msix_offset;
+		x_end = vdev->msix_offset + vdev->msix_size;
 	}
 
-	if (!io)
-		return -EINVAL;
-
-	while (count) {
-		size_t fillable, filled;
-
-		if (pos < x_start)
-			fillable = x_start - pos;
-		else if (pos >= x_end)
-			fillable = end - pos;
-		else
-			fillable = 0;
-
-		if (fillable >= 4 && !(pos % 4) && (count >= 4)) {
-			__le32 val;
-
-			if (iswrite) {
-				if (copy_from_user(&val, buf, 4))
-					goto out;
-
-				iowrite32(le32_to_cpu(val), io + pos);
-			} else {
-				val = cpu_to_le32(ioread32(io + pos));
+	done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
 
-				if (copy_to_user(buf, &val, 4))
-					goto out;
-			}
+	if (done >= 0)
+		*ppos += done;
 
-			filled = 4;
-		} else if (fillable >= 2 && !(pos % 2) && (count >= 2)) {
-			__le16 val;
-
-			if (iswrite) {
-				if (copy_from_user(&val, buf, 2))
-					goto out;
-
-				iowrite16(le16_to_cpu(val), io + pos);
-			} else {
-				val = cpu_to_le16(ioread16(io + pos));
-
-				if (copy_to_user(buf, &val, 2))
-					goto out;
-			}
-
-			filled = 2;
-		} else if (fillable) {
-			u8 val;
+	if (bar == PCI_ROM_RESOURCE)
+		pci_unmap_rom(pdev, io);
 
-			if (iswrite) {
-				if (copy_from_user(&val, buf, 1))
-					goto out;
+	return done;
+}
 
-				iowrite8(val, io + pos);
-			} else {
-				val = ioread8(io + pos);
+ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
+			       size_t count, loff_t *ppos, bool iswrite)
+{
+	int ret;
+	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	void __iomem *iomem = NULL;
+	unsigned int rsrc;
+	bool is_ioport;
+	ssize_t done;
+
+	if (!vdev->has_vga)
+		return -EINVAL;
 
-				if (copy_to_user(buf, &val, 1))
-					goto out;
-			}
+	switch (pos) {
+	case 0xa0000 ... 0xbffff:
+		count = min(count, (size_t)(0xc0000 - pos));
+		iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1);
+		off = pos - 0xa0000;
+		rsrc = VGA_RSRC_LEGACY_MEM;
+		is_ioport = false;
+		break;
+	case 0x3b0 ... 0x3bb:
+		count = min(count, (size_t)(0x3bc - pos));
+		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
+		off = pos - 0x3b0;
+		rsrc = VGA_RSRC_LEGACY_IO;
+		is_ioport = true;
+		break;
+	case 0x3c0 ... 0x3df:
+		count = min(count, (size_t)(0x3e0 - pos));
+		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
+		off = pos - 0x3c0;
+		rsrc = VGA_RSRC_LEGACY_IO;
+		is_ioport = true;
+		break;
+	default:
+		return -EINVAL;
+	}
 
-			filled = 1;
-		} else {
-			/* Drop writes, fill reads with FF */
-			filled = min((size_t)(x_end - pos), count);
-			if (!iswrite) {
-				char val = 0xFF;
-				size_t i;
+	if (!iomem)
+		return -ENOMEM;
 
-				for (i = 0; i < filled; i++) {
-					if (put_user(val, buf + i))
-						goto out;
-				}
-			}
+	ret = vga_get_interruptible(vdev->pdev, rsrc);
+	if (ret) {
+		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
+		return ret;
+	}
 
-		}
+	done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite);
 
-		count -= filled;
-		done += filled;
-		buf += filled;
-		pos += filled;
-	}
+	vga_put(vdev->pdev, rsrc);
 
-	*ppos += done;
+	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
 
-out:
-	if (bar == PCI_ROM_RESOURCE)
-		pci_unmap_rom(pdev, io);
+	if (done >= 0)
+		*ppos += done;
 
-	return count ? -EFAULT : done;
+	return done;
 }

+ 13 - 22
drivers/vfio/vfio.c

@@ -442,7 +442,7 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
  * a device.  It's not always practical to leave a device within a group
  * driverless as it could get re-bound to something unsafe.
  */
-static const char * const vfio_driver_whitelist[] = { "pci-stub" };
+static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" };
 
 static bool vfio_whitelisted_driver(struct device_driver *drv)
 {
@@ -642,33 +642,16 @@ int vfio_add_group_dev(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(vfio_add_group_dev);
 
-/* Test whether a struct device is present in our tracking */
-static bool vfio_dev_present(struct device *dev)
+/* Given a referenced group, check if it contains the device */
+static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
 {
-	struct iommu_group *iommu_group;
-	struct vfio_group *group;
 	struct vfio_device *device;
 
-	iommu_group = iommu_group_get(dev);
-	if (!iommu_group)
-		return false;
-
-	group = vfio_group_get_from_iommu(iommu_group);
-	if (!group) {
-		iommu_group_put(iommu_group);
-		return false;
-	}
-
 	device = vfio_group_get_device(group, dev);
-	if (!device) {
-		vfio_group_put(group);
-		iommu_group_put(iommu_group);
+	if (!device)
 		return false;
-	}
 
 	vfio_device_put(device);
-	vfio_group_put(group);
-	iommu_group_put(iommu_group);
 	return true;
 }
 
@@ -682,10 +665,18 @@ void *vfio_del_group_dev(struct device *dev)
 	struct iommu_group *iommu_group = group->iommu_group;
 	void *device_data = device->device_data;
 
+	/*
+	 * The group exists so long as we have a device reference.  Get
+	 * a group reference and use it to scan for the device going away.
+	 */
+	vfio_group_get(group);
+
 	vfio_device_put(device);
 
 	/* TODO send a signal to encourage this to be released */
-	wait_event(vfio.release_q, !vfio_dev_present(dev));
+	wait_event(vfio.release_q, !vfio_dev_present(group, dev));
+
+	vfio_group_put(group);
 
 	iommu_group_put(iommu_group);
 

+ 9 - 0
include/uapi/linux/vfio.h

@@ -303,6 +303,15 @@ enum {
 	VFIO_PCI_BAR5_REGION_INDEX,
 	VFIO_PCI_ROM_REGION_INDEX,
 	VFIO_PCI_CONFIG_REGION_INDEX,
+	/*
+	 * Expose VGA regions defined for PCI base class 03, subclass 00.
+	 * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df
+	 * as well as the MMIO range 0xa0000 to 0xbffff.  Each implemented
+	 * range is found at it's identity mapped offset from the region
+	 * offset, for example 0x3b0 is region_info.offset + 0x3b0.  Areas
+	 * between described ranges are unimplemented.
+	 */
+	VFIO_PCI_VGA_REGION_INDEX,
 	VFIO_PCI_NUM_REGIONS
 };