Browse Source

Merge branches 'stable/drivers-3.2', 'stable/drivers.bugfixes-3.2' and 'stable/pci.fixes-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen

* 'stable/drivers-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xenbus: don't rely on xen_initial_domain to detect local xenstore
  xenbus: Fix loopback event channel assuming domain 0
  xen/pv-on-hvm:kexec: Fix implicit declaration of function 'xen_hvm_domain'
  xen/pv-on-hvm kexec: add xs_reset_watches to shutdown watches from old kernel
  xen/pv-on-hvm kexec: update xs_wire.h:xsd_sockmsg_type from xen-unstable
  xen/pv-on-hvm kexec+kdump: reset PV devices in kexec or crash kernel
  xen/pv-on-hvm kexec: rebind virqs to existing eventchannel ports
  xen/pv-on-hvm kexec: prevent crash in xenwatch_thread() when stale watch events arrive

* 'stable/drivers.bugfixes-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen/pciback: Check if the device is found instead of blindly assuming so.
  xen/pciback: Do not dereference psdev during printk when it is NULL.
  xen: remove XEN_PLATFORM_PCI config option
  xen: XEN_PVHVM depends on PCI
  xen/pciback: double lock typo
  xen/pciback: use mutex rather than spinlock in vpci backend
  xen/pciback: Use mutexes when working with Xenbus state transitions.
  xen/pciback: miscellaneous adjustments
  xen/pciback: use mutex rather than spinlock in passthrough backend
  xen/pciback: use resource_size()

* 'stable/pci.fixes-3.2' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen/pci: support multi-segment systems
  xen-swiotlb: When doing coherent alloc/dealloc check before swizzling the MFNs.
  xen/pci: make bus notifier handler return sane values
  xen-swiotlb: fix printk and panic args
  xen-swiotlb: Fix wrong panic.
  xen-swiotlb: Retry up three times to allocate Xen-SWIOTLB
  xen-pcifront: Update warning comment to use 'e820_host' option.
Linus Torvalds 13 years ago
parent
commit
04a8752485

+ 19 - 3
arch/x86/pci/xen.c

@@ -252,6 +252,8 @@ error:
 }
 
 #ifdef CONFIG_XEN_DOM0
+static bool __read_mostly pci_seg_supported = true;
+
 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	int ret = 0;
@@ -269,10 +271,11 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 		memset(&map_irq, 0, sizeof(map_irq));
 		map_irq.domid = domid;
-		map_irq.type = MAP_PIRQ_TYPE_MSI;
+		map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
 		map_irq.index = -1;
 		map_irq.pirq = -1;
-		map_irq.bus = dev->bus->number;
+		map_irq.bus = dev->bus->number |
+			      (pci_domain_nr(dev->bus) << 16);
 		map_irq.devfn = dev->devfn;
 
 		if (type == PCI_CAP_ID_MSIX) {
@@ -289,7 +292,20 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 			map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
 		}
 
-		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+		ret = -EINVAL;
+		if (pci_seg_supported)
+			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+						    &map_irq);
+		if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
+			map_irq.type = MAP_PIRQ_TYPE_MSI;
+			map_irq.index = -1;
+			map_irq.pirq = -1;
+			map_irq.bus = dev->bus->number;
+			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+						    &map_irq);
+			if (ret != -EINVAL)
+				pci_seg_supported = false;
+		}
 		if (ret) {
 			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
 				 ret, domid);

+ 1 - 2
arch/x86/xen/Kconfig

@@ -25,8 +25,7 @@ config XEN_PRIVILEGED_GUEST
 
 config XEN_PVHVM
 	def_bool y
-	depends on XEN
-	depends on X86_LOCAL_APIC
+	depends on XEN && PCI && X86_LOCAL_APIC
 
 config XEN_MAX_DOMAIN_MEMORY
        int

+ 2 - 3
drivers/pci/xen-pcifront.c

@@ -400,9 +400,8 @@ static int pcifront_claim_resource(struct pci_dev *dev, void *data)
 			dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
 				pci_name(dev), i);
 			if (pci_claim_resource(dev, i)) {
-				dev_err(&pdev->xdev->dev, "Could not claim "
-					"resource %s/%d! Device offline. Try "
-					"giving less than 4GB to domain.\n",
+				dev_err(&pdev->xdev->dev, "Could not claim resource %s/%d! "
+					"Device offline. Try using e820_host=1 in the guest config.\n",
 					pci_name(dev), i);
 			}
 		}

+ 0 - 10
drivers/xen/Kconfig

@@ -137,16 +137,6 @@ config XEN_GRANT_DEV_ALLOC
 	  to other domains. This can be used to implement frontend drivers
 	  or as part of an inter-domain shared memory channel.
 
-config XEN_PLATFORM_PCI
-	tristate "xen platform pci device driver"
-	depends on XEN_PVHVM && PCI
-	default m
-	help
-	  Driver for the Xen PCI Platform device: it is responsible for
-	  initializing xenbus and grant_table when running in a Xen HVM
-	  domain. As a consequence this driver is required to run any Xen PV
-	  frontend on Xen HVM.
-
 config SWIOTLB_XEN
 	def_bool y
 	depends on PCI

+ 1 - 3
drivers/xen/Makefile

@@ -14,7 +14,7 @@ obj-$(CONFIG_XEN_GNTDEV)		+= xen-gntdev.o
 obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-obj-$(CONFIG_XEN_PLATFORM_PCI)		+= xen-platform-pci.o
+obj-$(CONFIG_XEN_PVHVM)			+= platform-pci.o
 obj-$(CONFIG_XEN_TMEM)			+= tmem.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
@@ -23,5 +23,3 @@ obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
-
-xen-platform-pci-y			:= platform-pci.o

+ 32 - 5
drivers/xen/events.c

@@ -873,11 +873,32 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
 	return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
 }
 
+static int find_virq(unsigned int virq, unsigned int cpu)
+{
+	struct evtchn_status status;
+	int port, rc = -ENOENT;
+
+	memset(&status, 0, sizeof(status));
+	for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+		status.dom = DOMID_SELF;
+		status.port = port;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
+		if (rc < 0)
+			continue;
+		if (status.status != EVTCHNSTAT_virq)
+			continue;
+		if (status.u.virq == virq && status.vcpu == cpu) {
+			rc = port;
+			break;
+		}
+	}
+	return rc;
+}
 
 int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 {
 	struct evtchn_bind_virq bind_virq;
-	int evtchn, irq;
+	int evtchn, irq, ret;
 
 	mutex_lock(&irq_mapping_update_lock);
 
@@ -893,10 +914,16 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 
 		bind_virq.virq = virq;
 		bind_virq.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
-						&bind_virq) != 0)
-			BUG();
-		evtchn = bind_virq.port;
+		ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+						&bind_virq);
+		if (ret == 0)
+			evtchn = bind_virq.port;
+		else {
+			if (ret == -EEXIST)
+				ret = find_virq(virq, cpu);
+			BUG_ON(ret < 0);
+			evtchn = ret;
+		}
 
 		xen_irq_info_virq_init(cpu, irq, evtchn, virq);
 

+ 91 - 14
drivers/xen/pci.c

@@ -18,6 +18,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/acpi.h>
 #include <xen/xen.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/xen.h>
@@ -26,26 +27,85 @@
 #include <asm/xen/hypercall.h>
 #include "../pci/pci.h"
 
+static bool __read_mostly pci_seg_supported = true;
+
 static int xen_add_device(struct device *dev)
 {
 	int r;
 	struct pci_dev *pci_dev = to_pci_dev(dev);
+#ifdef CONFIG_PCI_IOV
+	struct pci_dev *physfn = pci_dev->physfn;
+#endif
+
+	if (pci_seg_supported) {
+		struct physdev_pci_device_add add = {
+			.seg = pci_domain_nr(pci_dev->bus),
+			.bus = pci_dev->bus->number,
+			.devfn = pci_dev->devfn
+		};
+#ifdef CONFIG_ACPI
+		acpi_handle handle;
+#endif
+
+#ifdef CONFIG_PCI_IOV
+		if (pci_dev->is_virtfn) {
+			add.flags = XEN_PCI_DEV_VIRTFN;
+			add.physfn.bus = physfn->bus->number;
+			add.physfn.devfn = physfn->devfn;
+		} else
+#endif
+		if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
+			add.flags = XEN_PCI_DEV_EXTFN;
+
+#ifdef CONFIG_ACPI
+		handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+		if (!handle)
+			handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+#ifdef CONFIG_PCI_IOV
+		if (!handle && pci_dev->is_virtfn)
+			handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+#endif
+		if (handle) {
+			acpi_status status;
+
+			do {
+				unsigned long long pxm;
+
+				status = acpi_evaluate_integer(handle, "_PXM",
+							       NULL, &pxm);
+				if (ACPI_SUCCESS(status)) {
+					add.optarr[0] = pxm;
+					add.flags |= XEN_PCI_DEV_PXM;
+					break;
+				}
+				status = acpi_get_parent(handle, &handle);
+			} while (ACPI_SUCCESS(status));
+		}
+#endif /* CONFIG_ACPI */
+
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
+		if (r != -ENOSYS)
+			return r;
+		pci_seg_supported = false;
+	}
 
+	if (pci_domain_nr(pci_dev->bus))
+		r = -ENOSYS;
 #ifdef CONFIG_PCI_IOV
-	if (pci_dev->is_virtfn) {
+	else if (pci_dev->is_virtfn) {
 		struct physdev_manage_pci_ext manage_pci_ext = {
 			.bus		= pci_dev->bus->number,
 			.devfn		= pci_dev->devfn,
 			.is_virtfn 	= 1,
-			.physfn.bus	= pci_dev->physfn->bus->number,
-			.physfn.devfn	= pci_dev->physfn->devfn,
+			.physfn.bus	= physfn->bus->number,
+			.physfn.devfn	= physfn->devfn,
 		};
 
 		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
 			&manage_pci_ext);
-	} else
+	}
 #endif
-	if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
+	else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
 		struct physdev_manage_pci_ext manage_pci_ext = {
 			.bus		= pci_dev->bus->number,
 			.devfn		= pci_dev->devfn,
@@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev)
 {
 	int r;
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct physdev_manage_pci manage_pci;
 
-	manage_pci.bus = pci_dev->bus->number;
-	manage_pci.devfn = pci_dev->devfn;
+	if (pci_seg_supported) {
+		struct physdev_pci_device device = {
+			.seg = pci_domain_nr(pci_dev->bus),
+			.bus = pci_dev->bus->number,
+			.devfn = pci_dev->devfn
+		};
 
-	r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
-		&manage_pci);
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove,
+					  &device);
+	} else if (pci_domain_nr(pci_dev->bus))
+		r = -ENOSYS;
+	else {
+		struct physdev_manage_pci manage_pci = {
+			.bus = pci_dev->bus->number,
+			.devfn = pci_dev->devfn
+		};
+
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
+					  &manage_pci);
+	}
 
 	return r;
 }
@@ -96,13 +170,16 @@ static int xen_pci_notifier(struct notifier_block *nb,
 		r = xen_remove_device(dev);
 		break;
 	default:
-		break;
+		return NOTIFY_DONE;
 	}
-
-	return r;
+	if (r)
+		dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n",
+			action == BUS_NOTIFY_ADD_DEVICE ? "add" :
+			(action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?"));
+	return NOTIFY_OK;
 }
 
-struct notifier_block device_nb = {
+static struct notifier_block device_nb = {
 	.notifier_call = xen_pci_notifier,
 };
 

+ 53 - 17
drivers/xen/swiotlb-xen.c

@@ -38,6 +38,7 @@
 #include <xen/swiotlb-xen.h>
 #include <xen/page.h>
 #include <xen/xen-ops.h>
+#include <xen/hvc-console.h>
 /*
  * Used to do a quick range check in swiotlb_tbl_unmap_single and
  * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
@@ -146,8 +147,10 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
 void __init xen_swiotlb_init(int verbose)
 {
 	unsigned long bytes;
-	int rc;
+	int rc = -ENOMEM;
 	unsigned long nr_tbl;
+	char *m = NULL;
+	unsigned int repeat = 3;
 
 	nr_tbl = swioltb_nr_tbl();
 	if (nr_tbl)
@@ -156,16 +159,17 @@ void __init xen_swiotlb_init(int verbose)
 		xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
 		xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
 	}
-
+retry:
 	bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
 
 	/*
 	 * Get IO TLB memory from any location.
 	 */
 	xen_io_tlb_start = alloc_bootmem(bytes);
-	if (!xen_io_tlb_start)
-		panic("Cannot allocate SWIOTLB buffer");
-
+	if (!xen_io_tlb_start) {
+		m = "Cannot allocate Xen-SWIOTLB buffer!\n";
+		goto error;
+	}
 	xen_io_tlb_end = xen_io_tlb_start + bytes;
 	/*
 	 * And replace that memory with pages under 4GB.
@@ -173,17 +177,28 @@ void __init xen_swiotlb_init(int verbose)
 	rc = xen_swiotlb_fixup(xen_io_tlb_start,
 			       bytes,
 			       xen_io_tlb_nslabs);
-	if (rc)
+	if (rc) {
+		free_bootmem(__pa(xen_io_tlb_start), bytes);
+		m = "Failed to get contiguous memory for DMA from Xen!\n"\
+		    "You either: don't have the permissions, do not have"\
+		    " enough free memory under 4GB, or the hypervisor memory"\
+		    "is too fragmented!";
 		goto error;
-
+	}
 	start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
 	swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
 
 	return;
 error:
-	panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\
-	      "We either don't have the permission or you do not have enough"\
-	      "free memory under 4GB!\n", rc);
+	if (repeat--) {
+		xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
+					(xen_io_tlb_nslabs >> 1));
+		printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n",
+		      (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
+		goto retry;
+	}
+	xen_raw_printk("%s (rc:%d)", m, rc);
+	panic("%s (rc:%d)", m, rc);
 }
 
 void *
@@ -194,6 +209,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	int order = get_order(size);
 	u64 dma_mask = DMA_BIT_MASK(32);
 	unsigned long vstart;
+	phys_addr_t phys;
+	dma_addr_t dev_addr;
 
 	/*
 	* Ignore region specifiers - the kernel's ideas of
@@ -209,18 +226,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	vstart = __get_free_pages(flags, order);
 	ret = (void *)vstart;
 
+	if (!ret)
+		return ret;
+
 	if (hwdev && hwdev->coherent_dma_mask)
-		dma_mask = dma_alloc_coherent_mask(hwdev, flags);
+		dma_mask = hwdev->coherent_dma_mask;
 
-	if (ret) {
+	phys = virt_to_phys(ret);
+	dev_addr = xen_phys_to_bus(phys);
+	if (((dev_addr + size - 1 <= dma_mask)) &&
+	    !range_straddles_page_boundary(phys, size))
+		*dma_handle = dev_addr;
+	else {
 		if (xen_create_contiguous_region(vstart, order,
 						 fls64(dma_mask)) != 0) {
 			free_pages(vstart, order);
 			return NULL;
 		}
-		memset(ret, 0, size);
 		*dma_handle = virt_to_machine(ret).maddr;
 	}
+	memset(ret, 0, size);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent);
@@ -230,11 +255,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 			  dma_addr_t dev_addr)
 {
 	int order = get_order(size);
+	phys_addr_t phys;
+	u64 dma_mask = DMA_BIT_MASK(32);
 
 	if (dma_release_from_coherent(hwdev, order, vaddr))
 		return;
 
-	xen_destroy_contiguous_region((unsigned long)vaddr, order);
+	if (hwdev && hwdev->coherent_dma_mask)
+		dma_mask = hwdev->coherent_dma_mask;
+
+	phys = virt_to_phys(vaddr);
+
+	if (((dev_addr + size - 1 > dma_mask)) ||
+	    range_straddles_page_boundary(phys, size))
+		xen_destroy_contiguous_region((unsigned long)vaddr, order);
+
 	free_pages((unsigned long)vaddr, order);
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
@@ -278,9 +313,10 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
-	if (!dma_capable(dev, dev_addr, size))
-		panic("map_single: bounce buffer is not DMA'ble");
-
+	if (!dma_capable(dev, dev_addr, size)) {
+		swiotlb_tbl_unmap_single(dev, map, size, dir);
+		dev_addr = 0;
+	}
 	return dev_addr;
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);

+ 0 - 1
drivers/xen/xen-pciback/conf_space.c

@@ -15,7 +15,6 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"
 
-#define DRV_NAME	"xen-pciback"
 static int permissive;
 module_param(permissive, bool, 0644);
 

+ 2 - 3
drivers/xen/xen-pciback/conf_space_header.c

@@ -15,7 +15,6 @@ struct pci_bar_info {
 	int which;
 };
 
-#define DRV_NAME	"xen-pciback"
 #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
 #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
 
@@ -25,7 +24,7 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
 	int ret;
 
 	ret = xen_pcibk_read_config_word(dev, offset, value, data);
-	if (!atomic_read(&dev->enable_cnt))
+	if (!pci_is_enabled(dev))
 		return ret;
 
 	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
@@ -187,7 +186,7 @@ static inline void read_dev_bar(struct pci_dev *dev,
 
 	bar_info->val = res[pos].start |
 			(res[pos].flags & PCI_REGION_FLAG_MASK);
-	bar_info->len_val = res[pos].end - res[pos].start + 1;
+	bar_info->len_val = resource_size(&res[pos]);
 }
 
 static void *bar_init(struct pci_dev *dev, int offset)

+ 1 - 2
drivers/xen/xen-pciback/conf_space_quirks.c

@@ -12,7 +12,6 @@
 #include "conf_space_quirks.h"
 
 LIST_HEAD(xen_pcibk_quirks);
-#define	DRV_NAME	"xen-pciback"
 static inline const struct pci_device_id *
 match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
 {
@@ -36,7 +35,7 @@ static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
 			goto out;
 	tmp_quirk = NULL;
 	printk(KERN_DEBUG DRV_NAME
-	       ":quirk didn't match any device xen_pciback knows about\n");
+	       ": quirk didn't match any device known\n");
 out:
 	return tmp_quirk;
 }

+ 14 - 20
drivers/xen/xen-pciback/passthrough.c

@@ -7,13 +7,13 @@
 
 #include <linux/list.h>
 #include <linux/pci.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include "pciback.h"
 
 struct passthrough_dev_data {
 	/* Access to dev_list must be protected by lock */
 	struct list_head dev_list;
-	spinlock_t lock;
+	struct mutex lock;
 };
 
 static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
@@ -24,9 +24,8 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
 	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
 	struct pci_dev_entry *dev_entry;
 	struct pci_dev *dev = NULL;
-	unsigned long flags;
 
-	spin_lock_irqsave(&dev_data->lock, flags);
+	mutex_lock(&dev_data->lock);
 
 	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
 		if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
@@ -37,7 +36,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
 		}
 	}
 
-	spin_unlock_irqrestore(&dev_data->lock, flags);
+	mutex_unlock(&dev_data->lock);
 
 	return dev;
 }
@@ -48,7 +47,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 {
 	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
 	struct pci_dev_entry *dev_entry;
-	unsigned long flags;
 	unsigned int domain, bus, devfn;
 	int err;
 
@@ -57,9 +55,9 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 		return -ENOMEM;
 	dev_entry->dev = dev;
 
-	spin_lock_irqsave(&dev_data->lock, flags);
+	mutex_lock(&dev_data->lock);
 	list_add_tail(&dev_entry->list, &dev_data->dev_list);
-	spin_unlock_irqrestore(&dev_data->lock, flags);
+	mutex_unlock(&dev_data->lock);
 
 	/* Publish this device. */
 	domain = (unsigned int)pci_domain_nr(dev->bus);
@@ -76,9 +74,8 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
 	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
 	struct pci_dev_entry *dev_entry, *t;
 	struct pci_dev *found_dev = NULL;
-	unsigned long flags;
 
-	spin_lock_irqsave(&dev_data->lock, flags);
+	mutex_lock(&dev_data->lock);
 
 	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
 		if (dev_entry->dev == dev) {
@@ -88,7 +85,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
 		}
 	}
 
-	spin_unlock_irqrestore(&dev_data->lock, flags);
+	mutex_unlock(&dev_data->lock);
 
 	if (found_dev)
 		pcistub_put_pci_dev(found_dev);
@@ -102,7 +99,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
 	if (!dev_data)
 		return -ENOMEM;
 
-	spin_lock_init(&dev_data->lock);
+	mutex_init(&dev_data->lock);
 
 	INIT_LIST_HEAD(&dev_data->dev_list);
 
@@ -116,14 +113,14 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
 {
 	int err = 0;
 	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-	struct pci_dev_entry *dev_entry, *e, *tmp;
+	struct pci_dev_entry *dev_entry, *e;
 	struct pci_dev *dev;
 	int found;
 	unsigned int domain, bus;
 
-	spin_lock(&dev_data->lock);
+	mutex_lock(&dev_data->lock);
 
-	list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) {
+	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
 		/* Only publish this device as a root if none of its
 		 * parent bridges are exported
 		 */
@@ -142,16 +139,13 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
 		bus = (unsigned int)dev_entry->dev->bus->number;
 
 		if (!found) {
-			spin_unlock(&dev_data->lock);
 			err = publish_root_cb(pdev, domain, bus);
 			if (err)
 				break;
-			spin_lock(&dev_data->lock);
 		}
 	}
 
-	if (!err)
-		spin_unlock(&dev_data->lock);
+	mutex_unlock(&dev_data->lock);
 
 	return err;
 }
@@ -182,7 +176,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
 	return 1;
 }
 
-struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
+const struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
 	.name           = "passthrough",
 	.init           = __xen_pcibk_init_devices,
 	.free		= __xen_pcibk_release_devices,

+ 15 - 20
drivers/xen/xen-pciback/pci_stub.c

@@ -21,8 +21,6 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"
 
-#define DRV_NAME	"xen-pciback"
-
 static char *pci_devs_to_hide;
 wait_queue_head_t xen_pcibk_aer_wait_queue;
 /*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
@@ -222,6 +220,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
 	}
 
 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+	if (WARN_ON(!found_psdev))
+		return;
 
 	/*hold this lock for avoiding breaking link between
 	* pcistub and xen_pcibk when AER is in processing
@@ -514,12 +514,9 @@ static void kill_domain_by_device(struct pcistub_device *psdev)
 	int err;
 	char nodename[PCI_NODENAME_MAX];
 
-	if (!psdev)
-		dev_err(&psdev->dev->dev,
-			"device is NULL when do AER recovery/kill_domain\n");
+	BUG_ON(!psdev);
 	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
 		psdev->pdev->xdev->otherend_id);
-	nodename[strlen(nodename)] = '\0';
 
 again:
 	err = xenbus_transaction_start(&xbt);
@@ -605,7 +602,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
 	if (test_bit(_XEN_PCIF_active,
 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
 		dev_dbg(&psdev->dev->dev,
-			"schedule pci_conf service in xen_pcibk\n");
+			"schedule pci_conf service in " DRV_NAME "\n");
 		xen_pcibk_test_and_schedule_op(psdev->pdev);
 	}
 
@@ -995,8 +992,7 @@ out:
 		err = count;
 	return err;
 }
-
-DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
+static DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
 
 static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
 				   size_t count)
@@ -1015,8 +1011,7 @@ out:
 		err = count;
 	return err;
 }
-
-DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
+static DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
 
 static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
 {
@@ -1039,8 +1034,7 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
 
 	return count;
 }
-
-DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
+static DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
 
 static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
 {
@@ -1069,8 +1063,7 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
 	return count;
 }
-
-DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
+static DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
 
 static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
 					  const char *buf,
@@ -1106,7 +1099,8 @@ out:
 		err = count;
 	return err;
 }
-DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
+static DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL,
+		   pcistub_irq_handler_switch);
 
 static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
 				 size_t count)
@@ -1170,8 +1164,8 @@ out:
 
 	return count;
 }
-
-DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
+static DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show,
+		   pcistub_quirk_add);
 
 static ssize_t permissive_add(struct device_driver *drv, const char *buf,
 			      size_t count)
@@ -1236,8 +1230,8 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)
 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
 	return count;
 }
-
-DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
+static DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show,
+		   permissive_add);
 
 static void pcistub_exit(void)
 {
@@ -1374,3 +1368,4 @@ module_init(xen_pcibk_init);
 module_exit(xen_pcibk_cleanup);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("xen-backend:pci");

+ 20 - 12
drivers/xen/xen-pciback/pciback.h

@@ -15,6 +15,8 @@
 #include <linux/atomic.h>
 #include <xen/interface/io/pciif.h>
 
+#define DRV_NAME	"xen-pciback"
+
 struct pci_dev_entry {
 	struct list_head list;
 	struct pci_dev *dev;
@@ -27,7 +29,7 @@ struct pci_dev_entry {
 
 struct xen_pcibk_device {
 	void *pci_dev_data;
-	spinlock_t dev_lock;
+	struct mutex dev_lock;
 	struct xenbus_device *xdev;
 	struct xenbus_watch be_watch;
 	u8 be_watching;
@@ -89,7 +91,7 @@ typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,
  *  passthrough - BDFs are exactly like in the host.
  */
 struct xen_pcibk_backend {
-	char *name;
+	const char *name;
 	int (*init)(struct xen_pcibk_device *pdev);
 	void (*free)(struct xen_pcibk_device *pdev);
 	int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
@@ -104,9 +106,9 @@ struct xen_pcibk_backend {
 			       unsigned int devfn);
 };
 
-extern struct xen_pcibk_backend xen_pcibk_vpci_backend;
-extern struct xen_pcibk_backend xen_pcibk_passthrough_backend;
-extern struct xen_pcibk_backend *xen_pcibk_backend;
+extern const struct xen_pcibk_backend xen_pcibk_vpci_backend;
+extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend;
+extern const struct xen_pcibk_backend *xen_pcibk_backend;
 
 static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 					struct pci_dev *dev,
@@ -116,13 +118,14 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 	if (xen_pcibk_backend && xen_pcibk_backend->add)
 		return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
 	return -1;
-};
+}
+
 static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
 					     struct pci_dev *dev)
 {
 	if (xen_pcibk_backend && xen_pcibk_backend->free)
 		return xen_pcibk_backend->release(pdev, dev);
-};
+}
 
 static inline struct pci_dev *
 xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
@@ -131,7 +134,8 @@ xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
 	if (xen_pcibk_backend && xen_pcibk_backend->get)
 		return xen_pcibk_backend->get(pdev, domain, bus, devfn);
 	return NULL;
-};
+}
+
 /**
 * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
 * before sending aer request to pcifront, so that guest could identify
@@ -148,25 +152,29 @@ static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
 		return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
 					       devfn);
 	return -1;
-};
+}
+
 static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
 {
 	if (xen_pcibk_backend && xen_pcibk_backend->init)
 		return xen_pcibk_backend->init(pdev);
 	return -1;
-};
+}
+
 static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
 					      publish_pci_root_cb cb)
 {
 	if (xen_pcibk_backend && xen_pcibk_backend->publish)
 		return xen_pcibk_backend->publish(pdev, cb);
 	return -1;
-};
+}
+
 static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
 {
 	if (xen_pcibk_backend && xen_pcibk_backend->free)
 		return xen_pcibk_backend->free(pdev);
-};
+}
+
 /* Handles events from front-end */
 irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
 void xen_pcibk_do_op(struct work_struct *data);

+ 0 - 1
drivers/xen/xen-pciback/pciback_ops.c

@@ -10,7 +10,6 @@
 #include <linux/sched.h>
 #include "pciback.h"
 
-#define DRV_NAME	"xen-pciback"
 int verbose_request;
 module_param(verbose_request, int, 0644);
 

+ 15 - 20
drivers/xen/xen-pciback/vpci.c

@@ -8,16 +8,15 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include "pciback.h"
 
 #define PCI_SLOT_MAX 32
-#define DRV_NAME	"xen-pciback"
 
 struct vpci_dev_data {
 	/* Access to dev_list must be protected by lock */
 	struct list_head dev_list[PCI_SLOT_MAX];
-	spinlock_t lock;
+	struct mutex lock;
 };
 
 static inline struct list_head *list_first(struct list_head *head)
@@ -33,13 +32,12 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
 	struct pci_dev_entry *entry;
 	struct pci_dev *dev = NULL;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-	unsigned long flags;
 
 	if (domain != 0 || bus != 0)
 		return NULL;
 
 	if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
-		spin_lock_irqsave(&vpci_dev->lock, flags);
+		mutex_lock(&vpci_dev->lock);
 
 		list_for_each_entry(entry,
 				    &vpci_dev->dev_list[PCI_SLOT(devfn)],
@@ -50,7 +48,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
 			}
 		}
 
-		spin_unlock_irqrestore(&vpci_dev->lock, flags);
+		mutex_unlock(&vpci_dev->lock);
 	}
 	return dev;
 }
@@ -71,7 +69,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 	int err = 0, slot, func = -1;
 	struct pci_dev_entry *t, *dev_entry;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-	unsigned long flags;
 
 	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
 		err = -EFAULT;
@@ -90,7 +87,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 
 	dev_entry->dev = dev;
 
-	spin_lock_irqsave(&vpci_dev->lock, flags);
+	mutex_lock(&vpci_dev->lock);
 
 	/* Keep multi-function devices together on the virtual PCI bus */
 	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
@@ -129,7 +126,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 			 "No more space on root virtual PCI bus");
 
 unlock:
-	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+	mutex_unlock(&vpci_dev->lock);
 
 	/* Publish this device. */
 	if (!err)
@@ -145,14 +142,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
 	int slot;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
 	struct pci_dev *found_dev = NULL;
-	unsigned long flags;
 
-	spin_lock_irqsave(&vpci_dev->lock, flags);
+	mutex_lock(&vpci_dev->lock);
 
 	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-		struct pci_dev_entry *e, *tmp;
-		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
-					 list) {
+		struct pci_dev_entry *e;
+
+		list_for_each_entry(e, &vpci_dev->dev_list[slot], list) {
 			if (e->dev == dev) {
 				list_del(&e->list);
 				found_dev = e->dev;
@@ -163,7 +159,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
 	}
 
 out:
-	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+	mutex_unlock(&vpci_dev->lock);
 
 	if (found_dev)
 		pcistub_put_pci_dev(found_dev);
@@ -178,7 +174,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
 	if (!vpci_dev)
 		return -ENOMEM;
 
-	spin_lock_init(&vpci_dev->lock);
+	mutex_init(&vpci_dev->lock);
 
 	for (slot = 0; slot < PCI_SLOT_MAX; slot++)
 		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
@@ -222,10 +218,9 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
 	struct pci_dev_entry *entry;
 	struct pci_dev *dev = NULL;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-	unsigned long flags;
 	int found = 0, slot;
 
-	spin_lock_irqsave(&vpci_dev->lock, flags);
+	mutex_lock(&vpci_dev->lock);
 	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
 		list_for_each_entry(entry,
 			    &vpci_dev->dev_list[slot],
@@ -243,11 +238,11 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
 			}
 		}
 	}
-	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+	mutex_unlock(&vpci_dev->lock);
 	return found;
 }
 
-struct xen_pcibk_backend xen_pcibk_vpci_backend = {
+const struct xen_pcibk_backend xen_pcibk_vpci_backend = {
 	.name		= "vpci",
 	.init		= __xen_pcibk_init_devices,
 	.free		= __xen_pcibk_release_devices,

+ 11 - 16
drivers/xen/xen-pciback/xenbus.c

@@ -13,7 +13,6 @@
 #include <asm/xen/pci.h>
 #include "pciback.h"
 
-#define	DRV_NAME	"xen-pciback"
 #define INVALID_EVTCHN_IRQ  (-1)
 struct workqueue_struct *xen_pcibk_wq;
 
@@ -44,7 +43,7 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
 	pdev->xdev = xdev;
 	dev_set_drvdata(&xdev->dev, pdev);
 
-	spin_lock_init(&pdev->dev_lock);
+	mutex_init(&pdev->dev_lock);
 
 	pdev->sh_info = NULL;
 	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
@@ -62,14 +61,12 @@ out:
 
 static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
 {
-	spin_lock(&pdev->dev_lock);
-
+	mutex_lock(&pdev->dev_lock);
 	/* Ensure the guest can't trigger our handler before removing devices */
 	if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
 		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
 		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
 	}
-	spin_unlock(&pdev->dev_lock);
 
 	/* If the driver domain started an op, make sure we complete it
 	 * before releasing the shared memory */
@@ -77,13 +74,11 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
 	/* Note, the workqueue does not use spinlocks at all.*/
 	flush_workqueue(xen_pcibk_wq);
 
-	spin_lock(&pdev->dev_lock);
 	if (pdev->sh_info != NULL) {
 		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
 		pdev->sh_info = NULL;
 	}
-	spin_unlock(&pdev->dev_lock);
-
+	mutex_unlock(&pdev->dev_lock);
 }
 
 static void free_pdev(struct xen_pcibk_device *pdev)
@@ -120,9 +115,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
 		goto out;
 	}
 
-	spin_lock(&pdev->dev_lock);
 	pdev->sh_info = vaddr;
-	spin_unlock(&pdev->dev_lock);
 
 	err = bind_interdomain_evtchn_to_irqhandler(
 		pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
@@ -132,10 +125,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
 				 "Error binding event channel to IRQ");
 		goto out;
 	}
-
-	spin_lock(&pdev->dev_lock);
 	pdev->evtchn_irq = err;
-	spin_unlock(&pdev->dev_lock);
 	err = 0;
 
 	dev_dbg(&pdev->xdev->dev, "Attached!\n");
@@ -150,6 +140,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
 	char *magic = NULL;
 
 
+	mutex_lock(&pdev->dev_lock);
 	/* Make sure we only do this setup once */
 	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 	    XenbusStateInitialised)
@@ -176,7 +167,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
 	if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
 		xenbus_dev_fatal(pdev->xdev, -EFAULT,
 				 "version mismatch (%s/%s) with pcifront - "
-				 "halting xen_pcibk",
+				 "halting " DRV_NAME,
 				 magic, XEN_PCI_MAGIC);
 		goto out;
 	}
@@ -194,6 +185,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
 
 	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
 out:
+	mutex_unlock(&pdev->dev_lock);
 
 	kfree(magic);
 
@@ -369,6 +361,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
 
 	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
 
+	mutex_lock(&pdev->dev_lock);
 	/* Make sure we only reconfigure once */
 	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 	    XenbusStateReconfiguring)
@@ -506,6 +499,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
 	}
 
 out:
+	mutex_unlock(&pdev->dev_lock);
 	return 0;
 }
 
@@ -562,6 +556,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
 	char dev_str[64];
 	char state_str[64];
 
+	mutex_lock(&pdev->dev_lock);
 	/* It's possible we could get the call to setup twice, so make sure
 	 * we're not already connected.
 	 */
@@ -642,10 +637,10 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
 				 "Error switching to initialised state!");
 
 out:
+	mutex_unlock(&pdev->dev_lock);
 	if (!err)
 		/* see if pcifront is already configured (if not, we'll wait) */
 		xen_pcibk_attach(pdev);
-
 	return err;
 }
 
@@ -724,7 +719,7 @@ static struct xenbus_driver xenbus_xen_pcibk_driver = {
 	.otherend_changed	= xen_pcibk_frontend_changed,
 };
 
-struct xen_pcibk_backend *xen_pcibk_backend;
+const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
 
 int __init xen_pcibk_xenbus_register(void)
 {

+ 3 - 1
drivers/xen/xenbus/xenbus_comms.c

@@ -212,7 +212,9 @@ int xb_init_comms(void)
 		printk(KERN_WARNING "XENBUS response ring is not quiescent "
 		       "(%08x:%08x): fixing up\n",
 		       intf->rsp_cons, intf->rsp_prod);
-		intf->rsp_cons = intf->rsp_prod;
+		/* breaks kdump */
+		if (!reset_devices)
+			intf->rsp_cons = intf->rsp_prod;
 	}
 
 	if (xenbus_irq) {

+ 53 - 48
drivers/xen/xenbus/xenbus_probe.c

@@ -684,64 +684,74 @@ static int __init xenbus_probe_initcall(void)
 
 device_initcall(xenbus_probe_initcall);
 
-static int __init xenbus_init(void)
+/* Set up event channel for xenstored which is run as a local process
+ * (this is normally used only in dom0)
+ */
+static int __init xenstored_local_init(void)
 {
 	int err = 0;
 	unsigned long page = 0;
+	struct evtchn_alloc_unbound alloc_unbound;
 
-	DPRINTK("");
+	/* Allocate Xenstore page */
+	page = get_zeroed_page(GFP_KERNEL);
+	if (!page)
+		goto out_err;
 
-	err = -ENODEV;
-	if (!xen_domain())
-		return err;
+	xen_store_mfn = xen_start_info->store_mfn =
+		pfn_to_mfn(virt_to_phys((void *)page) >>
+			   PAGE_SHIFT);
 
-	/*
-	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
-	 */
-	if (xen_initial_domain()) {
-		struct evtchn_alloc_unbound alloc_unbound;
+	/* Next allocate a local port which xenstored can bind to */
+	alloc_unbound.dom        = DOMID_SELF;
+	alloc_unbound.remote_dom = DOMID_SELF;
 
-		/* Allocate Xenstore page */
-		page = get_zeroed_page(GFP_KERNEL);
-		if (!page)
-			goto out_error;
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+					  &alloc_unbound);
+	if (err == -ENOSYS)
+		goto out_err;
 
-		xen_store_mfn = xen_start_info->store_mfn =
-			pfn_to_mfn(virt_to_phys((void *)page) >>
-				   PAGE_SHIFT);
+	BUG_ON(err);
+	xen_store_evtchn = xen_start_info->store_evtchn =
+		alloc_unbound.port;
 
-		/* Next allocate a local port which xenstored can bind to */
-		alloc_unbound.dom        = DOMID_SELF;
-		alloc_unbound.remote_dom = 0;
+	return 0;
 
-		err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
-						  &alloc_unbound);
-		if (err == -ENOSYS)
-			goto out_error;
+ out_err:
+	if (page != 0)
+		free_page(page);
+	return err;
+}
 
-		BUG_ON(err);
-		xen_store_evtchn = xen_start_info->store_evtchn =
-			alloc_unbound.port;
+static int __init xenbus_init(void)
+{
+	int err = 0;
 
-		xen_store_interface = mfn_to_virt(xen_store_mfn);
+	if (!xen_domain())
+		return -ENODEV;
+
+	if (xen_hvm_domain()) {
+		uint64_t v = 0;
+		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+		if (err)
+			goto out_error;
+		xen_store_evtchn = (int)v;
+		err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+		if (err)
+			goto out_error;
+		xen_store_mfn = (unsigned long)v;
+		xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
 	} else {
-		if (xen_hvm_domain()) {
-			uint64_t v = 0;
-			err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
-			if (err)
-				goto out_error;
-			xen_store_evtchn = (int)v;
-			err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+		xen_store_evtchn = xen_start_info->store_evtchn;
+		xen_store_mfn = xen_start_info->store_mfn;
+		if (xen_store_evtchn)
+			xenstored_ready = 1;
+		else {
+			err = xenstored_local_init();
 			if (err)
 				goto out_error;
-			xen_store_mfn = (unsigned long)v;
-			xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
-		} else {
-			xen_store_evtchn = xen_start_info->store_evtchn;
-			xen_store_mfn = xen_start_info->store_mfn;
-			xen_store_interface = mfn_to_virt(xen_store_mfn);
-			xenstored_ready = 1;
 		}
+		xen_store_interface = mfn_to_virt(xen_store_mfn);
 	}
 
 	/* Initialize the interface to xenstore. */
@@ -760,12 +770,7 @@ static int __init xenbus_init(void)
 	proc_mkdir("xen", NULL);
 #endif
 
-	return 0;
-
-  out_error:
-	if (page != 0)
-		free_page(page);
-
+ out_error:
 	return err;
 }
 

+ 121 - 0
drivers/xen/xenbus/xenbus_probe_frontend.c

@@ -248,10 +248,131 @@ int __xenbus_register_frontend(struct xenbus_driver *drv,
 }
 EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
 
+static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
+static int backend_state;
+
+static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
+					const char **v, unsigned int l)
+{
+	xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state);
+	printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+			v[XS_WATCH_PATH], xenbus_strstate(backend_state));
+	wake_up(&backend_state_wq);
+}
+
+static void xenbus_reset_wait_for_backend(char *be, int expected)
+{
+	long timeout;
+	timeout = wait_event_interruptible_timeout(backend_state_wq,
+			backend_state == expected, 5 * HZ);
+	if (timeout <= 0)
+		printk(KERN_INFO "XENBUS: backend %s timed out.\n", be);
+}
+
+/*
+ * Reset frontend if it is in Connected or Closed state.
+ * Wait for backend to catch up.
+ * State Connected happens during kdump, Closed after kexec.
+ */
+static void xenbus_reset_frontend(char *fe, char *be, int be_state)
+{
+	struct xenbus_watch be_watch;
+
+	printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+			be, xenbus_strstate(be_state));
+
+	memset(&be_watch, 0, sizeof(be_watch));
+	be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be);
+	if (!be_watch.node)
+		return;
+
+	be_watch.callback = xenbus_reset_backend_state_changed;
+	backend_state = XenbusStateUnknown;
+
+	printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be);
+	register_xenbus_watch(&be_watch);
+
+	/* fall through to forward backend to state XenbusStateInitialising */
+	switch (be_state) {
+	case XenbusStateConnected:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
+		xenbus_reset_wait_for_backend(be, XenbusStateClosing);
+
+	case XenbusStateClosing:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
+		xenbus_reset_wait_for_backend(be, XenbusStateClosed);
+
+	case XenbusStateClosed:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
+		xenbus_reset_wait_for_backend(be, XenbusStateInitWait);
+	}
+
+	unregister_xenbus_watch(&be_watch);
+	printk(KERN_INFO "XENBUS: reconnect done on %s\n", be);
+	kfree(be_watch.node);
+}
+
+static void xenbus_check_frontend(char *class, char *dev)
+{
+	int be_state, fe_state, err;
+	char *backend, *frontend;
+
+	frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev);
+	if (!frontend)
+		return;
+
+	err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state);
+	if (err != 1)
+		goto out;
+
+	switch (fe_state) {
+	case XenbusStateConnected:
+	case XenbusStateClosed:
+		printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
+				frontend, xenbus_strstate(fe_state));
+		backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
+		if (!backend || IS_ERR(backend))
+			goto out;
+		err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
+		if (err == 1)
+			xenbus_reset_frontend(frontend, backend, be_state);
+		kfree(backend);
+		break;
+	default:
+		break;
+	}
+out:
+	kfree(frontend);
+}
+
+static void xenbus_reset_state(void)
+{
+	char **devclass, **dev;
+	int devclass_n, dev_n;
+	int i, j;
+
+	devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n);
+	if (IS_ERR(devclass))
+		return;
+
+	for (i = 0; i < devclass_n; i++) {
+		dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n);
+		if (IS_ERR(dev))
+			continue;
+		for (j = 0; j < dev_n; j++)
+			xenbus_check_frontend(devclass[i], dev[j]);
+		kfree(dev);
+	}
+	kfree(devclass);
+}
+
 static int frontend_probe_and_watch(struct notifier_block *notifier,
 				   unsigned long event,
 				   void *data)
 {
+	/* reset devices in Connected or Closed state */
+	if (xen_hvm_domain())
+		xenbus_reset_state();
 	/* Enumerate devices in xenstore and watch for changes. */
 	xenbus_probe_devices(&xenbus_frontend);
 	register_xenbus_watch(&fe_watch);

+ 15 - 2
drivers/xen/xenbus/xenbus_xs.c

@@ -45,6 +45,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <xen/xenbus.h>
+#include <xen/xen.h>
 #include "xenbus_comms.h"
 
 struct xs_stored_msg {
@@ -620,6 +621,15 @@ static struct xenbus_watch *find_watch(const char *token)
 	return NULL;
 }
 
+static void xs_reset_watches(void)
+{
+	int err;
+
+	err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
+	if (err && err != -EEXIST)
+		printk(KERN_WARNING "xs_reset_watches failed: %d\n", err);
+}
+
 /* Register callback to watch this node. */
 int register_xenbus_watch(struct xenbus_watch *watch)
 {
@@ -638,8 +648,7 @@ int register_xenbus_watch(struct xenbus_watch *watch)
 
 	err = xs_watch(watch->node, token);
 
-	/* Ignore errors due to multiple registration. */
-	if ((err != 0) && (err != -EEXIST)) {
+	if (err) {
 		spin_lock(&watches_lock);
 		list_del(&watch->list);
 		spin_unlock(&watches_lock);
@@ -897,5 +906,9 @@ int xs_init(void)
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 
+	/* shutdown watches for kexec boot */
+	if (xen_hvm_domain())
+		xs_reset_watches();
+
 	return 0;
 }

+ 5 - 1
include/xen/interface/io/xs_wire.h

@@ -26,7 +26,11 @@ enum xsd_sockmsg_type
     XS_SET_PERMS,
     XS_WATCH_EVENT,
     XS_ERROR,
-    XS_IS_DOMAIN_INTRODUCED
+    XS_IS_DOMAIN_INTRODUCED,
+    XS_RESUME,
+    XS_SET_TARGET,
+    XS_RESTRICT,
+    XS_RESET_WATCHES
 };
 
 #define XS_WRITE_NONE "NONE"

+ 33 - 1
include/xen/interface/physdev.h

@@ -109,6 +109,7 @@ struct physdev_irq {
 #define MAP_PIRQ_TYPE_MSI		0x0
 #define MAP_PIRQ_TYPE_GSI		0x1
 #define MAP_PIRQ_TYPE_UNKNOWN		0x2
+#define MAP_PIRQ_TYPE_MSI_SEG		0x3
 
 #define PHYSDEVOP_map_pirq		13
 struct physdev_map_pirq {
@@ -119,7 +120,7 @@ struct physdev_map_pirq {
     int index;
     /* IN or OUT */
     int pirq;
-    /* IN */
+    /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
     int bus;
     /* IN */
     int devfn;
@@ -198,6 +199,37 @@ struct physdev_get_free_pirq {
     uint32_t pirq;
 };
 
+#define XEN_PCI_DEV_EXTFN              0x1
+#define XEN_PCI_DEV_VIRTFN             0x2
+#define XEN_PCI_DEV_PXM                0x4
+
+#define PHYSDEVOP_pci_device_add        25
+struct physdev_pci_device_add {
+    /* IN */
+    uint16_t seg;
+    uint8_t bus;
+    uint8_t devfn;
+    uint32_t flags;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    uint32_t optarr[];
+#elif defined(__GNUC__)
+    uint32_t optarr[0];
+#endif
+};
+
+#define PHYSDEVOP_pci_device_remove     26
+#define PHYSDEVOP_restore_msi_ext       27
+struct physdev_pci_device {
+    /* IN */
+    uint16_t seg;
+    uint8_t bus;
+    uint8_t devfn;
+};
+
 /*
  * Notify that some PIRQ-bound event channels have been unmasked.
  * ** This command is obsolete since interface version 0x00030202 and is **