Prechádzať zdrojové kódy

Merge commit 'konrad/stable/xen-pcifront-0.8.2' into 2.6.36-rc8-initial-domain-v6

Stefano Stabellini 14 rokov pred
rodič
commit
67ba37293e
100 zmenil súbory, kde vykonal 2813 pridanie a 1359 odobranie
  1. 52 32
      Documentation/DocBook/genericirq.tmpl
  2. 23 3
      MAINTAINERS
  3. 2 0
      arch/arm/include/asm/hw_irq.h
  4. 1 9
      arch/arm/kernel/irq.c
  5. 3 3
      arch/arm/mach-bcmring/irq.c
  6. 4 4
      arch/arm/mach-iop13xx/msi.c
  7. 4 4
      arch/ia64/kernel/msi_ia64.c
  8. 2 2
      arch/ia64/sn/kernel/msi_sn.c
  9. 1 1
      arch/m32r/kernel/irq.c
  10. 1 1
      arch/m32r/platforms/m32104ut/setup.c
  11. 4 4
      arch/m32r/platforms/m32700ut/setup.c
  12. 1 1
      arch/m32r/platforms/mappi/setup.c
  13. 1 1
      arch/m32r/platforms/mappi2/setup.c
  14. 1 1
      arch/m32r/platforms/mappi3/setup.c
  15. 1 1
      arch/m32r/platforms/oaks32r/setup.c
  16. 3 3
      arch/m32r/platforms/opsput/setup.c
  17. 2 2
      arch/m32r/platforms/usrv/setup.c
  18. 3 3
      arch/powerpc/platforms/cell/axon_msi.c
  19. 1 1
      arch/powerpc/platforms/pseries/xics.c
  20. 2 2
      arch/powerpc/sysdev/fsl_msi.c
  21. 11 11
      arch/powerpc/sysdev/mpic_pasemi_msi.c
  22. 9 9
      arch/powerpc/sysdev/mpic_u3msi.c
  23. 1 1
      arch/sh/kernel/irq.c
  24. 4 4
      arch/sparc/kernel/pci_msi.c
  25. 2 2
      arch/tile/kernel/irq.c
  26. 3 3
      arch/um/kernel/irq.c
  27. 9 31
      arch/x86/Kconfig
  28. 0 1
      arch/x86/include/asm/apb_timer.h
  29. 0 1
      arch/x86/include/asm/cpu.h
  30. 6 4
      arch/x86/include/asm/hpet.h
  31. 13 4
      arch/x86/include/asm/hw_irq.h
  32. 2 0
      arch/x86/include/asm/i8259.h
  33. 13 0
      arch/x86/include/asm/io.h
  34. 1 6
      arch/x86/include/asm/io_apic.h
  35. 35 0
      arch/x86/include/asm/irq_remapping.h
  36. 31 2
      arch/x86/include/asm/pci.h
  37. 1 0
      arch/x86/include/asm/pci_x86.h
  38. 9 0
      arch/x86/include/asm/x86_init.h
  39. 53 0
      arch/x86/include/asm/xen/pci.h
  40. 25 31
      arch/x86/kernel/apb_timer.c
  41. 0 3
      arch/x86/kernel/apic/apic.c
  42. 189 378
      arch/x86/kernel/apic/io_apic.c
  43. 1 1
      arch/x86/kernel/apic/nmi.c
  44. 3 0
      arch/x86/kernel/apic/probe_64.c
  45. 1 1
      arch/x86/kernel/cpu/amd.c
  46. 1 1
      arch/x86/kernel/cpu/common.c
  47. 1 1
      arch/x86/kernel/cpu/intel.c
  48. 0 2
      arch/x86/kernel/early-quirks.c
  49. 6 10
      arch/x86/kernel/hpet.c
  50. 32 31
      arch/x86/kernel/i8259.c
  51. 13 11
      arch/x86/kernel/irq.c
  52. 4 13
      arch/x86/kernel/irqinit.c
  53. 2 2
      arch/x86/kernel/machine_kexec_64.c
  54. 1 1
      arch/x86/kernel/reboot.c
  55. 0 1
      arch/x86/kernel/setup.c
  56. 1 1
      arch/x86/kernel/setup_percpu.c
  57. 4 4
      arch/x86/kernel/smpboot.c
  58. 19 36
      arch/x86/kernel/uv_irq.c
  59. 44 96
      arch/x86/kernel/visws_quirks.c
  60. 7 0
      arch/x86/kernel/x86_init.c
  61. 1 2
      arch/x86/kvm/lapic.c
  62. 9 9
      arch/x86/lguest/boot.c
  63. 2 2
      arch/x86/mm/init_32.c
  64. 1 1
      arch/x86/mm/init_64.c
  65. 3 3
      arch/x86/mm/k8topology_64.c
  66. 1 0
      arch/x86/pci/Makefile
  67. 10 7
      arch/x86/pci/common.c
  68. 2 0
      arch/x86/pci/i386.c
  69. 142 0
      arch/x86/pci/xen.c
  70. 3 0
      arch/x86/xen/enlighten.c
  71. 4 0
      arch/x86/xen/pci-swiotlb-xen.c
  72. 0 2
      arch/x86/xen/setup.c
  73. 1 1
      arch/xtensa/kernel/irq.c
  74. 2 0
      drivers/block/xen-blkfront.c
  75. 2 0
      drivers/input/xen-kbdfront.c
  76. 3 3
      drivers/isdn/act2000/act2000.h
  77. 14 4
      drivers/isdn/hisax/config.c
  78. 1 0
      drivers/isdn/hisax/hisax.h
  79. 2 2
      drivers/mfd/twl4030-irq.c
  80. 2 0
      drivers/net/xen-netfront.c
  81. 21 0
      drivers/pci/Kconfig
  82. 2 0
      drivers/pci/Makefile
  83. 1 0
      drivers/pci/bus.c
  84. 4 4
      drivers/pci/dmar.c
  85. 8 14
      drivers/pci/htirq.c
  86. 27 185
      drivers/pci/intr_remapping.c
  87. 28 24
      drivers/pci/msi.c
  88. 1148 0
      drivers/pci/xen-pcifront.c
  89. 2 0
      drivers/video/xen-fbfront.c
  90. 2 1
      drivers/xen/Kconfig
  91. 1 0
      drivers/xen/Makefile
  92. 13 0
      drivers/xen/biomerge.c
  93. 338 27
      drivers/xen/events.c
  94. 2 0
      drivers/xen/xenbus/xenbus_client.c
  95. 4 6
      include/linux/dmar.h
  96. 3 2
      include/linux/htirq.h
  97. 0 3
      include/linux/interrupt.h
  98. 171 276
      include/linux/irq.h
  99. 159 0
      include/linux/irqdesc.h
  100. 5 0
      include/linux/irqnr.h

+ 52 - 32
Documentation/DocBook/genericirq.tmpl

@@ -28,7 +28,7 @@
   </authorgroup>
 
   <copyright>
-   <year>2005-2006</year>
+   <year>2005-2010</year>
    <holder>Thomas Gleixner</holder>
   </copyright>
   <copyright>
@@ -100,6 +100,10 @@
 	  <listitem><para>Edge type</para></listitem>
 	  <listitem><para>Simple type</para></listitem>
 	</itemizedlist>
+	During the implementation we identified another type:
+	<itemizedlist>
+	  <listitem><para>Fast EOI type</para></listitem>
+	</itemizedlist>
 	In the SMP world of the __do_IRQ() super-handler another type
 	was identified:
 	<itemizedlist>
@@ -153,6 +157,7 @@
 	is still available. This leads to a kind of duality for the time
 	being. Over time the new model should be used in more and more
 	architectures, as it enables smaller and cleaner IRQ subsystems.
+	It's deprecated for three years now and about to be removed.
 	</para>
   </chapter>
   <chapter id="bugs">
@@ -217,6 +222,7 @@
 	  <itemizedlist>
 	  <listitem><para>handle_level_irq</para></listitem>
 	  <listitem><para>handle_edge_irq</para></listitem>
+	  <listitem><para>handle_fasteoi_irq</para></listitem>
 	  <listitem><para>handle_simple_irq</para></listitem>
 	  <listitem><para>handle_percpu_irq</para></listitem>
 	  </itemizedlist>
@@ -233,33 +239,33 @@
 		are used by the default flow implementations.
 		The following helper functions are implemented (simplified excerpt):
 		<programlisting>
-default_enable(irq)
+default_enable(struct irq_data *data)
 {
-	desc->chip->unmask(irq);
+	desc->chip->irq_unmask(data);
 }
 
-default_disable(irq)
+default_disable(struct irq_data *data)
 {
-	if (!delay_disable(irq))
-		desc->chip->mask(irq);
+	if (!delay_disable(data))
+		desc->chip->irq_mask(data);
 }
 
-default_ack(irq)
+default_ack(struct irq_data *data)
 {
-	chip->ack(irq);
+	chip->irq_ack(data);
 }
 
-default_mask_ack(irq)
+default_mask_ack(struct irq_data *data)
 {
-	if (chip->mask_ack) {
-		chip->mask_ack(irq);
+	if (chip->irq_mask_ack) {
+		chip->irq_mask_ack(data);
 	} else {
-		chip->mask(irq);
-		chip->ack(irq);
+		chip->irq_mask(data);
+		chip->irq_ack(data);
 	}
 }
 
-noop(irq)
+noop(struct irq_data *data))
 {
 }
 
@@ -278,12 +284,27 @@ noop(irq)
 		<para>
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
-desc->chip->start();
+desc->chip->irq_mask();
 handle_IRQ_event(desc->action);
-desc->chip->end();
+desc->chip->irq_unmask();
 		</programlisting>
 		</para>
-   	    </sect3>
+	    </sect3>
+	    <sect3 id="Default_FASTEOI_IRQ_flow_handler">
+		<title>Default Fast EOI IRQ flow handler</title>
+		<para>
+		handle_fasteoi_irq provides a generic implementation
+		for interrupts, which only need an EOI at the end of
+		the handler
+		</para>
+		<para>
+		The following control flow is implemented (simplified excerpt):
+		<programlisting>
+handle_IRQ_event(desc->action);
+desc->chip->irq_eoi();
+		</programlisting>
+		</para>
+	    </sect3>
 	    <sect3 id="Default_Edge_IRQ_flow_handler">
 	 	<title>Default Edge IRQ flow handler</title>
 		<para>
@@ -294,20 +315,19 @@ desc->chip->end();
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
 if (desc->status &amp; running) {
-	desc->chip->hold();
+	desc->chip->irq_mask();
 	desc->status |= pending | masked;
 	return;
 }
-desc->chip->start();
+desc->chip->irq_ack();
 desc->status |= running;
 do {
 	if (desc->status &amp; masked)
-		desc->chip->enable();
+		desc->chip->irq_unmask();
 	desc->status &amp;= ~pending;
 	handle_IRQ_event(desc->action);
 } while (status &amp; pending);
 desc->status &amp;= ~running;
-desc->chip->end();
 		</programlisting>
 		</para>
    	    </sect3>
@@ -342,9 +362,9 @@ handle_IRQ_event(desc->action);
 		<para>
 		The following control flow is implemented (simplified excerpt):
 		<programlisting>
-desc->chip->start();
 handle_IRQ_event(desc->action);
-desc->chip->end();
+if (desc->chip->irq_eoi)
+        desc->chip->irq_eoi();
 		</programlisting>
 		</para>
    	    </sect3>
@@ -375,8 +395,7 @@ desc->chip->end();
 	mechanism. (It's necessary to enable CONFIG_HARDIRQS_SW_RESEND when
 	you want to use the delayed interrupt disable feature and your
 	hardware is not capable of retriggering	an interrupt.)
-	The delayed interrupt disable can be runtime enabled, per interrupt,
-	by setting the IRQ_DELAYED_DISABLE flag in the irq_desc status field.
+	The delayed interrupt disable is not configurable.
 	</para>
 	</sect2>
     </sect1>
@@ -387,13 +406,13 @@ desc->chip->end();
 	contains all the direct chip relevant functions, which
 	can be utilized by the irq flow implementations.
 	  <itemizedlist>
-	  <listitem><para>ack()</para></listitem>
-	  <listitem><para>mask_ack() - Optional, recommended for performance</para></listitem>
-	  <listitem><para>mask()</para></listitem>
-	  <listitem><para>unmask()</para></listitem>
-	  <listitem><para>retrigger() - Optional</para></listitem>
-	  <listitem><para>set_type() - Optional</para></listitem>
-	  <listitem><para>set_wake() - Optional</para></listitem>
+	  <listitem><para>irq_ack()</para></listitem>
+	  <listitem><para>irq_mask_ack() - Optional, recommended for performance</para></listitem>
+	  <listitem><para>irq_mask()</para></listitem>
+	  <listitem><para>irq_unmask()</para></listitem>
+	  <listitem><para>irq_retrigger() - Optional</para></listitem>
+	  <listitem><para>irq_set_type() - Optional</para></listitem>
+	  <listitem><para>irq_set_wake() - Optional</para></listitem>
 	  </itemizedlist>
 	These primitives are strictly intended to mean what they say: ack means
 	ACK, masking means masking of an IRQ line, etc. It is up to the flow
@@ -458,6 +477,7 @@ desc->chip->end();
      <para>
      This chapter contains the autogenerated documentation of the internal functions.
      </para>
+!Ikernel/irq/irqdesc.c
 !Ikernel/irq/handle.c
 !Ikernel/irq/chip.c
   </chapter>

+ 23 - 3
MAINTAINERS

@@ -3239,6 +3239,12 @@ F:	drivers/net/irda/
 F:	include/net/irda/
 F:	net/irda/
 
+IRQ SUBSYSTEM
+M:	Thomas Gleixner <tglx@linutronix.de>
+S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git irq/core
+F:	kernel/irq/
+
 ISAPNP
 M:	Jaroslav Kysela <perex@perex.cz>
 S:	Maintained
@@ -6522,11 +6528,25 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86.
 S:	Maintained
 F:	drivers/platform/x86
 
+XEN PCI SUBSYSTEM
+M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+L:	xen-devel@lists.xensource.com
+S:	Supported
+F:	arch/x86/pci/*xen*
+F:	drivers/pci/*xen*
+
+XEN SWIOTLB SUBSYSTEM
+M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+L:	xen-devel@lists.xensource.com
+S:	Supported
+F:	arch/x86/xen/*swiotlb*
+F:	drivers/xen/*swiotlb*
+
 XEN HYPERVISOR INTERFACE
-M:	Jeremy Fitzhardinge <jeremy@xensource.com>
-M:	Chris Wright <chrisw@sous-sol.org>
+M:	Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
+M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+L:	xen-devel@lists.xen.org
 L:	virtualization@lists.osdl.org
-L:	xen-devel@lists.xensource.com
 S:	Supported
 F:	arch/x86/xen/
 F:	drivers/*/xen-*front.c

+ 2 - 0
arch/arm/include/asm/hw_irq.h

@@ -24,4 +24,6 @@ void set_irq_flags(unsigned int irq, unsigned int flags);
 #define IRQF_PROBE	(1 << 1)
 #define IRQF_NOAUTOEN	(1 << 2)
 
+#define ARCH_IRQ_INIT_FLAGS	(IRQ_NOREQUEST | IRQ_NOPROBE)
+
 #endif

+ 1 - 9
arch/arm/kernel/irq.c

@@ -154,14 +154,6 @@ void set_irq_flags(unsigned int irq, unsigned int iflags)
 
 void __init init_IRQ(void)
 {
-	struct irq_desc *desc;
-	int irq;
-
-	for (irq = 0; irq < nr_irqs; irq++) {
-		desc = irq_to_desc_alloc_node(irq, 0);
-		desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
-	}
-
 	init_arch_irq();
 }
 
@@ -169,7 +161,7 @@ void __init init_IRQ(void)
 int __init arch_probe_nr_irqs(void)
 {
 	nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS;
-	return 0;
+	return nr_irqs;
 }
 #endif
 

+ 3 - 3
arch/arm/mach-bcmring/irq.c

@@ -67,21 +67,21 @@ static void bcmring_unmask_irq2(unsigned int irq)
 }
 
 static struct irq_chip bcmring_irq0_chip = {
-	.typename = "ARM-INTC0",
+	.name = "ARM-INTC0",
 	.ack = bcmring_mask_irq0,
 	.mask = bcmring_mask_irq0,	/* mask a specific interrupt, blocking its delivery. */
 	.unmask = bcmring_unmask_irq0,	/* unmaks an interrupt */
 };
 
 static struct irq_chip bcmring_irq1_chip = {
-	.typename = "ARM-INTC1",
+	.name = "ARM-INTC1",
 	.ack = bcmring_mask_irq1,
 	.mask = bcmring_mask_irq1,
 	.unmask = bcmring_unmask_irq1,
 };
 
 static struct irq_chip bcmring_irq2_chip = {
-	.typename = "ARM-SINTC",
+	.name = "ARM-SINTC",
 	.ack = bcmring_mask_irq2,
 	.mask = bcmring_mask_irq2,
 	.unmask = bcmring_unmask_irq2,

+ 4 - 4
arch/arm/mach-iop13xx/msi.c

@@ -164,10 +164,10 @@ static void iop13xx_msi_nop(unsigned int irq)
 static struct irq_chip iop13xx_msi_chip = {
 	.name = "PCI-MSI",
 	.ack = iop13xx_msi_nop,
-	.enable = unmask_msi_irq,
-	.disable = mask_msi_irq,
-	.mask = mask_msi_irq,
-	.unmask = unmask_msi_irq,
+	.irq_enable = unmask_msi_irq,
+	.irq_disable = mask_msi_irq,
+	.irq_mask = mask_msi_irq,
+	.irq_unmask = unmask_msi_irq,
 };
 
 int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)

+ 4 - 4
arch/ia64/kernel/msi_ia64.c

@@ -104,8 +104,8 @@ static int ia64_msi_retrigger_irq(unsigned int irq)
  */
 static struct irq_chip ia64_msi_chip = {
 	.name		= "PCI-MSI",
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
 	.ack		= ia64_ack_msi_irq,
 #ifdef CONFIG_SMP
 	.set_affinity	= ia64_set_msi_irq_affinity,
@@ -160,8 +160,8 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 static struct irq_chip dmar_msi_type = {
 	.name = "DMAR_MSI",
-	.unmask = dmar_msi_unmask,
-	.mask = dmar_msi_mask,
+	.irq_unmask = dmar_msi_unmask,
+	.irq_mask = dmar_msi_mask,
 	.ack = ia64_ack_msi_irq,
 #ifdef CONFIG_SMP
 	.set_affinity = dmar_msi_set_affinity,

+ 2 - 2
arch/ia64/sn/kernel/msi_sn.c

@@ -228,8 +228,8 @@ static int sn_msi_retrigger_irq(unsigned int irq)
 
 static struct irq_chip sn_msi_chip = {
 	.name		= "PCI-MSI",
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
 	.ack		= sn_ack_msi_irq,
 #ifdef CONFIG_SMP
 	.set_affinity	= sn_set_msi_irq_affinity,

+ 1 - 1
arch/m32r/kernel/irq.c

@@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);
 
 		for (action=action->next; action; action = action->next)

+ 1 - 1
arch/m32r/platforms/m32104ut/setup.c

@@ -65,7 +65,7 @@ static void shutdown_m32104ut_irq(unsigned int irq)
 
 static struct irq_chip m32104ut_irq_type =
 {
-	.typename = "M32104UT-IRQ",
+	.name = "M32104UT-IRQ",
 	.startup = startup_m32104ut_irq,
 	.shutdown = shutdown_m32104ut_irq,
 	.enable = enable_m32104ut_irq,

+ 4 - 4
arch/m32r/platforms/m32700ut/setup.c

@@ -71,7 +71,7 @@ static void shutdown_m32700ut_irq(unsigned int irq)
 
 static struct irq_chip m32700ut_irq_type =
 {
-	.typename = "M32700UT-IRQ",
+	.name = "M32700UT-IRQ",
 	.startup = startup_m32700ut_irq,
 	.shutdown = shutdown_m32700ut_irq,
 	.enable = enable_m32700ut_irq,
@@ -148,7 +148,7 @@ static void shutdown_m32700ut_pld_irq(unsigned int irq)
 
 static struct irq_chip m32700ut_pld_irq_type =
 {
-	.typename = "M32700UT-PLD-IRQ",
+	.name = "M32700UT-PLD-IRQ",
 	.startup = startup_m32700ut_pld_irq,
 	.shutdown = shutdown_m32700ut_pld_irq,
 	.enable = enable_m32700ut_pld_irq,
@@ -217,7 +217,7 @@ static void shutdown_m32700ut_lanpld_irq(unsigned int irq)
 
 static struct irq_chip m32700ut_lanpld_irq_type =
 {
-	.typename = "M32700UT-PLD-LAN-IRQ",
+	.name = "M32700UT-PLD-LAN-IRQ",
 	.startup = startup_m32700ut_lanpld_irq,
 	.shutdown = shutdown_m32700ut_lanpld_irq,
 	.enable = enable_m32700ut_lanpld_irq,
@@ -286,7 +286,7 @@ static void shutdown_m32700ut_lcdpld_irq(unsigned int irq)
 
 static struct irq_chip m32700ut_lcdpld_irq_type =
 {
-	.typename = "M32700UT-PLD-LCD-IRQ",
+	.name = "M32700UT-PLD-LCD-IRQ",
 	.startup = startup_m32700ut_lcdpld_irq,
 	.shutdown = shutdown_m32700ut_lcdpld_irq,
 	.enable = enable_m32700ut_lcdpld_irq,

+ 1 - 1
arch/m32r/platforms/mappi/setup.c

@@ -65,7 +65,7 @@ static void shutdown_mappi_irq(unsigned int irq)
 
 static struct irq_chip mappi_irq_type =
 {
-	.typename = "MAPPI-IRQ",
+	.name = "MAPPI-IRQ",
 	.startup = startup_mappi_irq,
 	.shutdown = shutdown_mappi_irq,
 	.enable = enable_mappi_irq,

+ 1 - 1
arch/m32r/platforms/mappi2/setup.c

@@ -72,7 +72,7 @@ static void shutdown_mappi2_irq(unsigned int irq)
 
 static struct irq_chip mappi2_irq_type =
 {
-	.typename = "MAPPI2-IRQ",
+	.name = "MAPPI2-IRQ",
 	.startup = startup_mappi2_irq,
 	.shutdown = shutdown_mappi2_irq,
 	.enable = enable_mappi2_irq,

+ 1 - 1
arch/m32r/platforms/mappi3/setup.c

@@ -72,7 +72,7 @@ static void shutdown_mappi3_irq(unsigned int irq)
 
 static struct irq_chip mappi3_irq_type =
 {
-	.typename = "MAPPI3-IRQ",
+	.name = "MAPPI3-IRQ",
 	.startup = startup_mappi3_irq,
 	.shutdown = shutdown_mappi3_irq,
 	.enable = enable_mappi3_irq,

+ 1 - 1
arch/m32r/platforms/oaks32r/setup.c

@@ -63,7 +63,7 @@ static void shutdown_oaks32r_irq(unsigned int irq)
 
 static struct irq_chip oaks32r_irq_type =
 {
-	.typename = "OAKS32R-IRQ",
+	.name = "OAKS32R-IRQ",
 	.startup = startup_oaks32r_irq,
 	.shutdown = shutdown_oaks32r_irq,
 	.enable = enable_oaks32r_irq,

+ 3 - 3
arch/m32r/platforms/opsput/setup.c

@@ -72,7 +72,7 @@ static void shutdown_opsput_irq(unsigned int irq)
 
 static struct irq_chip opsput_irq_type =
 {
-	.typename = "OPSPUT-IRQ",
+	.name = "OPSPUT-IRQ",
 	.startup = startup_opsput_irq,
 	.shutdown = shutdown_opsput_irq,
 	.enable = enable_opsput_irq,
@@ -149,7 +149,7 @@ static void shutdown_opsput_pld_irq(unsigned int irq)
 
 static struct irq_chip opsput_pld_irq_type =
 {
-	.typename = "OPSPUT-PLD-IRQ",
+	.name = "OPSPUT-PLD-IRQ",
 	.startup = startup_opsput_pld_irq,
 	.shutdown = shutdown_opsput_pld_irq,
 	.enable = enable_opsput_pld_irq,
@@ -218,7 +218,7 @@ static void shutdown_opsput_lanpld_irq(unsigned int irq)
 
 static struct irq_chip opsput_lanpld_irq_type =
 {
-	.typename = "OPSPUT-PLD-LAN-IRQ",
+	.name = "OPSPUT-PLD-LAN-IRQ",
 	.startup = startup_opsput_lanpld_irq,
 	.shutdown = shutdown_opsput_lanpld_irq,
 	.enable = enable_opsput_lanpld_irq,

+ 2 - 2
arch/m32r/platforms/usrv/setup.c

@@ -63,7 +63,7 @@ static void shutdown_mappi_irq(unsigned int irq)
 
 static struct irq_chip mappi_irq_type =
 {
-	.typename = "M32700-IRQ",
+	.name = "M32700-IRQ",
 	.startup = startup_mappi_irq,
 	.shutdown = shutdown_mappi_irq,
 	.enable = enable_mappi_irq,
@@ -136,7 +136,7 @@ static void shutdown_m32700ut_pld_irq(unsigned int irq)
 
 static struct irq_chip m32700ut_pld_irq_type =
 {
-	.typename = "USRV-PLD-IRQ",
+	.name = "USRV-PLD-IRQ",
 	.startup = startup_m32700ut_pld_irq,
 	.shutdown = shutdown_m32700ut_pld_irq,
 	.enable = enable_m32700ut_pld_irq,

+ 3 - 3
arch/powerpc/platforms/cell/axon_msi.c

@@ -310,9 +310,9 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
 }
 
 static struct irq_chip msic_irq_chip = {
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
-	.shutdown	= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
+	.irq_shutdown	= mask_msi_irq,
 	.name		= "AXON-MSI",
 };
 

+ 1 - 1
arch/powerpc/platforms/pseries/xics.c

@@ -243,7 +243,7 @@ static unsigned int xics_startup(unsigned int virq)
 	 * at that level, so we do it here by hand.
 	 */
 	if (irq_to_desc(virq)->msi_desc)
-		unmask_msi_irq(virq);
+		unmask_msi_irq(irq_get_irq_data(virq));
 
 	/* unmask it */
 	xics_unmask_irq(virq);

+ 2 - 2
arch/powerpc/sysdev/fsl_msi.c

@@ -51,8 +51,8 @@ static void fsl_msi_end_irq(unsigned int virq)
 }
 
 static struct irq_chip fsl_msi_chip = {
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
 	.ack		= fsl_msi_end_irq,
 	.name		= "FSL-MSI",
 };

+ 11 - 11
arch/powerpc/sysdev/mpic_pasemi_msi.c

@@ -39,24 +39,24 @@
 static struct mpic *msi_mpic;
 
 
-static void mpic_pasemi_msi_mask_irq(unsigned int irq)
+static void mpic_pasemi_msi_mask_irq(struct irq_data *data)
 {
-	pr_debug("mpic_pasemi_msi_mask_irq %d\n", irq);
-	mask_msi_irq(irq);
-	mpic_mask_irq(irq);
+	pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq);
+	mask_msi_irq(data);
+	mpic_mask_irq(data->irq);
 }
 
-static void mpic_pasemi_msi_unmask_irq(unsigned int irq)
+static void mpic_pasemi_msi_unmask_irq(struct irq_data *data)
 {
-	pr_debug("mpic_pasemi_msi_unmask_irq %d\n", irq);
-	mpic_unmask_irq(irq);
-	unmask_msi_irq(irq);
+	pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq);
+	mpic_unmask_irq(data->irq);
+	unmask_msi_irq(data);
 }
 
 static struct irq_chip mpic_pasemi_msi_chip = {
-	.shutdown	= mpic_pasemi_msi_mask_irq,
-	.mask		= mpic_pasemi_msi_mask_irq,
-	.unmask		= mpic_pasemi_msi_unmask_irq,
+	.irq_shutdown	= mpic_pasemi_msi_mask_irq,
+	.irq_mask	= mpic_pasemi_msi_mask_irq,
+	.irq_unmask	= mpic_pasemi_msi_unmask_irq,
 	.eoi		= mpic_end_irq,
 	.set_type	= mpic_set_irq_type,
 	.set_affinity	= mpic_set_affinity,

+ 9 - 9
arch/powerpc/sysdev/mpic_u3msi.c

@@ -23,22 +23,22 @@
 /* A bit ugly, can we get this from the pci_dev somehow? */
 static struct mpic *msi_mpic;
 
-static void mpic_u3msi_mask_irq(unsigned int irq)
+static void mpic_u3msi_mask_irq(struct irq_data *data)
 {
-	mask_msi_irq(irq);
-	mpic_mask_irq(irq);
+	mask_msi_irq(data);
+	mpic_mask_irq(data->irq);
 }
 
-static void mpic_u3msi_unmask_irq(unsigned int irq)
+static void mpic_u3msi_unmask_irq(struct irq_data *data)
 {
-	mpic_unmask_irq(irq);
-	unmask_msi_irq(irq);
+	mpic_unmask_irq(data->irq);
+	unmask_msi_irq(data);
 }
 
 static struct irq_chip mpic_u3msi_chip = {
-	.shutdown	= mpic_u3msi_mask_irq,
-	.mask		= mpic_u3msi_mask_irq,
-	.unmask		= mpic_u3msi_unmask_irq,
+	.irq_shutdown	= mpic_u3msi_mask_irq,
+	.irq_mask	= mpic_u3msi_mask_irq,
+	.irq_unmask	= mpic_u3msi_unmask_irq,
 	.eoi		= mpic_end_irq,
 	.set_type	= mpic_set_irq_type,
 	.set_affinity	= mpic_set_affinity,

+ 1 - 1
arch/sh/kernel/irq.c

@@ -290,7 +290,7 @@ void __init init_IRQ(void)
 int __init arch_probe_nr_irqs(void)
 {
 	nr_irqs = sh_mv.mv_nr_irqs;
-	return 0;
+	return NR_IRQS_LEGACY;
 }
 #endif
 

+ 4 - 4
arch/sparc/kernel/pci_msi.c

@@ -114,10 +114,10 @@ static void free_msi(struct pci_pbm_info *pbm, int msi_num)
 
 static struct irq_chip msi_irq = {
 	.name		= "PCI-MSI",
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
-	.enable		= unmask_msi_irq,
-	.disable	= mask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
+	.irq_enable	= unmask_msi_irq,
+	.irq_disable	= mask_msi_irq,
 	/* XXX affinity XXX */
 };
 

+ 2 - 2
arch/tile/kernel/irq.c

@@ -208,7 +208,7 @@ static void tile_irq_chip_eoi(unsigned int irq)
 }
 
 static struct irq_chip tile_irq_chip = {
-	.typename = "tile_irq_chip",
+	.name = "tile_irq_chip",
 	.ack = tile_irq_chip_ack,
 	.eoi = tile_irq_chip_eoi,
 	.mask = tile_irq_chip_mask,
@@ -288,7 +288,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);
 
 		for (action = action->next; action; action = action->next)

+ 3 - 3
arch/um/kernel/irq.c

@@ -46,7 +46,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);
 
 		for (action=action->next; action; action = action->next)
@@ -369,7 +369,7 @@ static void dummy(unsigned int irq)
 
 /* This is used for everything else than the timer. */
 static struct irq_chip normal_irq_type = {
-	.typename = "SIGIO",
+	.name = "SIGIO",
 	.release = free_irq_by_irq_and_dev,
 	.disable = dummy,
 	.enable = dummy,
@@ -378,7 +378,7 @@ static struct irq_chip normal_irq_type = {
 };
 
 static struct irq_chip SIGVTALRM_irq_type = {
-	.typename = "SIGVTALRM",
+	.name = "SIGVTALRM",
 	.release = free_irq_by_irq_and_dev,
 	.shutdown = dummy, /* never called */
 	.disable = dummy,

+ 9 - 31
arch/x86/Kconfig

@@ -59,6 +59,10 @@ config X86
 	select ANON_INODES
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_USER_RETURN_NOTIFIER
+	select HAVE_GENERIC_HARDIRQS
+	select HAVE_SPARSE_IRQ
+	select GENERIC_IRQ_PROBE
+	select GENERIC_PENDING_IRQ if SMP
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
@@ -200,20 +204,6 @@ config HAVE_INTEL_TXT
 	def_bool y
 	depends on EXPERIMENTAL && DMAR && ACPI
 
-# Use the generic interrupt handling code in kernel/irq/:
-config GENERIC_HARDIRQS
-	def_bool y
-
-config GENERIC_HARDIRQS_NO__DO_IRQ
-       def_bool y
-
-config GENERIC_IRQ_PROBE
-	def_bool y
-
-config GENERIC_PENDING_IRQ
-	def_bool y
-	depends on GENERIC_HARDIRQS && SMP
-
 config USE_GENERIC_SMP_HELPERS
 	def_bool y
 	depends on SMP
@@ -296,23 +286,6 @@ config X86_X2APIC
 
 	  If you don't know what to do here, say N.
 
-config SPARSE_IRQ
-	bool "Support sparse irq numbering"
-	depends on PCI_MSI || HT_IRQ
-	---help---
-	  This enables support for sparse irqs. This is useful for distro
-	  kernels that want to define a high CONFIG_NR_CPUS value but still
-	  want to have low kernel memory footprint on smaller machines.
-
-	  ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
-	    out the irq_desc[] array in a more NUMA-friendly way. )
-
-	  If you don't know what to do here, say N.
-
-config NUMA_IRQ_DESC
-	def_bool y
-	depends on SPARSE_IRQ && NUMA
-
 config X86_MPPARSE
 	bool "Enable MPS table" if ACPI
 	default y
@@ -1925,6 +1898,11 @@ config PCI_OLPC
 	def_bool y
 	depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY)
 
+config PCI_XEN
+	def_bool y
+	depends on PCI && XEN
+	select SWIOTLB_XEN
+
 config PCI_DOMAINS
 	def_bool y
 	depends on PCI

+ 0 - 1
arch/x86/include/asm/apb_timer.h

@@ -54,7 +54,6 @@ extern struct clock_event_device *global_clock_event;
 extern unsigned long apbt_quick_calibrate(void);
 extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
 extern void apbt_setup_secondary_clock(void);
-extern unsigned int boot_cpu_id;
 
 extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
 extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr);

+ 0 - 1
arch/x86/include/asm/cpu.h

@@ -32,6 +32,5 @@ extern void arch_unregister_cpu(int);
 
 DECLARE_PER_CPU(int, cpu_state);
 
-extern unsigned int boot_cpu_id;
 
 #endif /* _ASM_X86_CPU_H */

+ 6 - 4
arch/x86/include/asm/hpet.h

@@ -74,10 +74,12 @@ extern void hpet_disable(void);
 extern unsigned int hpet_readl(unsigned int a);
 extern void force_hpet_resume(void);
 
-extern void hpet_msi_unmask(unsigned int irq);
-extern void hpet_msi_mask(unsigned int irq);
-extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg);
-extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg);
+struct irq_data;
+extern void hpet_msi_unmask(struct irq_data *data);
+extern void hpet_msi_mask(struct irq_data *data);
+struct hpet_dev;
+extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
+extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
 
 #ifdef CONFIG_PCI_MSI
 extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id);

+ 13 - 4
arch/x86/include/asm/hw_irq.h

@@ -78,6 +78,13 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
 	irq_attr->polarity	= polarity;
 }
 
+struct irq_2_iommu {
+	struct intel_iommu *iommu;
+	u16 irte_index;
+	u16 sub_handle;
+	u8  irte_mask;
+};
+
 /*
  * This is performance-critical, we want to do it O(1)
  *
@@ -89,15 +96,17 @@ struct irq_cfg {
 	cpumask_var_t		old_domain;
 	u8			vector;
 	u8			move_in_progress : 1;
+#ifdef CONFIG_INTR_REMAP
+	struct irq_2_iommu	irq_2_iommu;
+#endif
 };
 
-extern struct irq_cfg *irq_cfg(unsigned int);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
 extern void send_cleanup_vector(struct irq_cfg *);
 
-struct irq_desc;
-extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *,
-				      unsigned int *dest_id);
+struct irq_data;
+int __ioapic_set_affinity(struct irq_data *, const struct cpumask *,
+			  unsigned int *dest_id);
 extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);
 

+ 2 - 0
arch/x86/include/asm/i8259.h

@@ -55,6 +55,8 @@ extern struct irq_chip i8259A_chip;
 struct legacy_pic {
 	int nr_legacy_irqs;
 	struct irq_chip *chip;
+	void (*mask)(unsigned int irq);
+	void (*unmask)(unsigned int irq);
 	void (*mask_all)(void);
 	void (*restore_mask)(void);
 	void (*init)(int auto_eoi);

+ 13 - 0
arch/x86/include/asm/io.h

@@ -41,6 +41,8 @@
 #include <asm-generic/int-ll64.h>
 #include <asm/page.h>
 
+#include <xen/xen.h>
+
 #define build_mmio_read(name, size, type, reg, barrier) \
 static inline type name(const volatile void __iomem *addr) \
 { type ret; asm volatile("mov" size " %1,%0":reg (ret) \
@@ -349,6 +351,17 @@ extern void __iomem *early_memremap(resource_size_t phys_addr,
 extern void early_iounmap(void __iomem *addr, unsigned long size);
 extern void fixup_early_ioremap(void);
 
+#ifdef CONFIG_XEN
+struct bio_vec;
+
+extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
+				      const struct bio_vec *vec2);
+
+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2)				\
+	(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&				\
+	 (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
+#endif	/* CONFIG_XEN */
+
 #define IO_SPACE_LIMIT 0xffff
 
 #endif /* _ASM_X86_IO_H */

+ 1 - 6
arch/x86/include/asm/io_apic.h

@@ -169,13 +169,8 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 
 extern void probe_nr_irqs_gsi(void);
+extern int get_nr_irqs_gsi(void);
 
-extern int setup_ioapic_entry(int apic, int irq,
-			      struct IO_APIC_route_entry *entry,
-			      unsigned int destination, int trigger,
-			      int polarity, int vector, int pin);
-extern void ioapic_write_entry(int apic, int pin,
-			       struct IO_APIC_route_entry e);
 extern void setup_ioapic_ids_from_mpc(void);
 
 struct mp_ioapic_gsi{

+ 35 - 0
arch/x86/include/asm/irq_remapping.h

@@ -3,4 +3,39 @@
 
 #define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
 
+#ifdef CONFIG_INTR_REMAP
+static inline void prepare_irte(struct irte *irte, int vector,
+			        unsigned int dest)
+{
+	memset(irte, 0, sizeof(*irte));
+
+	irte->present = 1;
+	irte->dst_mode = apic->irq_dest_mode;
+	/*
+	 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
+	 * actual level or edge trigger will be setup in the IO-APIC
+	 * RTE. This will help simplify level triggered irq migration.
+	 * For more details, see the comments (in io_apic.c) explainig IO-APIC
+	 * irq migration in the presence of interrupt-remapping.
+	*/
+	irte->trigger_mode = 0;
+	irte->dlvry_mode = apic->irq_delivery_mode;
+	irte->vector = vector;
+	irte->dest_id = IRTE_DEST(dest);
+	irte->redir_hint = 1;
+}
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+	return cfg->irq_2_iommu.iommu != NULL;
+}
+#else
+static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
+{
+}
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+	return false;
+}
+#endif
+
 #endif	/* _ASM_X86_IRQ_REMAPPING_H */

+ 31 - 2
arch/x86/include/asm/pci.h

@@ -7,6 +7,7 @@
 #include <linux/string.h>
 #include <asm/scatterlist.h>
 #include <asm/io.h>
+#include <asm/x86_init.h>
 
 #ifdef __KERNEL__
 
@@ -94,8 +95,36 @@ static inline void early_quirks(void) { }
 
 extern void pci_iommu_alloc(void);
 
-/* MSI arch hook */
-#define arch_setup_msi_irqs arch_setup_msi_irqs
+#ifdef CONFIG_PCI_MSI
+/* MSI arch specific hooks */
+static inline int x86_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	return x86_msi.setup_msi_irqs(dev, nvec, type);
+}
+
+static inline void x86_teardown_msi_irqs(struct pci_dev *dev)
+{
+	x86_msi.teardown_msi_irqs(dev);
+}
+
+static inline void x86_teardown_msi_irq(unsigned int irq)
+{
+	x86_msi.teardown_msi_irq(irq);
+}
+#define arch_setup_msi_irqs x86_setup_msi_irqs
+#define arch_teardown_msi_irqs x86_teardown_msi_irqs
+#define arch_teardown_msi_irq x86_teardown_msi_irq
+/* implemented in arch/x86/kernel/apic/io_apic. */
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
+void native_teardown_msi_irq(unsigned int irq);
+/* default to the implementation in drivers/lib/msi.c */
+#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
+void default_teardown_msi_irqs(struct pci_dev *dev);
+#else
+#define native_setup_msi_irqs		NULL
+#define native_teardown_msi_irq		NULL
+#define default_teardown_msi_irqs	NULL
+#endif
 
 #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
 

+ 1 - 0
arch/x86/include/asm/pci_x86.h

@@ -47,6 +47,7 @@ enum pci_bf_sort_state {
 extern unsigned int pcibios_max_latency;
 
 void pcibios_resource_survey(void);
+void pcibios_set_cache_line_size(void);
 
 /* pci-pc.c */
 

+ 9 - 0
arch/x86/include/asm/x86_init.h

@@ -154,9 +154,18 @@ struct x86_platform_ops {
 	int (*i8042_detect)(void);
 };
 
+struct pci_dev;
+
+struct x86_msi_ops {
+	int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
+	void (*teardown_msi_irq)(unsigned int irq);
+	void (*teardown_msi_irqs)(struct pci_dev *dev);
+};
+
 extern struct x86_init_ops x86_init;
 extern struct x86_cpuinit_ops x86_cpuinit;
 extern struct x86_platform_ops x86_platform;
+extern struct x86_msi_ops x86_msi;
 
 extern void x86_init_noop(void);
 extern void x86_init_uint_noop(unsigned int unused);

+ 53 - 0
arch/x86/include/asm/xen/pci.h

@@ -0,0 +1,53 @@
+#ifndef _ASM_X86_XEN_PCI_H
+#define _ASM_X86_XEN_PCI_H
+
+#if defined(CONFIG_PCI_XEN)
+extern int __init pci_xen_init(void);
+#define pci_xen 1
+#else
+#define pci_xen 0
+#define pci_xen_init (0)
+#endif
+
+#if defined(CONFIG_PCI_MSI)
+#if defined(CONFIG_PCI_XEN)
+/* The drivers/pci/xen-pcifront.c sets this structure to
+ * its own functions.
+ */
+struct xen_pci_frontend_ops {
+	int (*enable_msi)(struct pci_dev *dev, int **vectors);
+	void (*disable_msi)(struct pci_dev *dev);
+	int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
+	void (*disable_msix)(struct pci_dev *dev);
+};
+
+extern struct xen_pci_frontend_ops *xen_pci_frontend;
+
+static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
+					      int **vectors)
+{
+	if (xen_pci_frontend && xen_pci_frontend->enable_msi)
+		return xen_pci_frontend->enable_msi(dev, vectors);
+	return -ENODEV;
+}
+static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
+{
+	if (xen_pci_frontend && xen_pci_frontend->disable_msi)
+			xen_pci_frontend->disable_msi(dev);
+}
+static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
+					       int **vectors, int nvec)
+{
+	if (xen_pci_frontend && xen_pci_frontend->enable_msix)
+		return xen_pci_frontend->enable_msix(dev, vectors, nvec);
+	return -ENODEV;
+}
+static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev)
+{
+	if (xen_pci_frontend && xen_pci_frontend->disable_msix)
+			xen_pci_frontend->disable_msix(dev);
+}
+#endif /* CONFIG_PCI_XEN */
+#endif /* CONFIG_PCI_MSI */
+
+#endif	/* _ASM_X86_XEN_PCI_H */

+ 25 - 31
arch/x86/kernel/apb_timer.c

@@ -231,34 +231,6 @@ static void apbt_restart_clocksource(struct clocksource *cs)
 	apbt_start_counter(phy_cs_timer_id);
 }
 
-/* Setup IRQ routing via IOAPIC */
-#ifdef CONFIG_SMP
-static void apbt_setup_irq(struct apbt_dev *adev)
-{
-	struct irq_chip *chip;
-	struct irq_desc *desc;
-
-	/* timer0 irq has been setup early */
-	if (adev->irq == 0)
-		return;
-	desc = irq_to_desc(adev->irq);
-	chip = get_irq_chip(adev->irq);
-	disable_irq(adev->irq);
-	desc->status |= IRQ_MOVE_PCNTXT;
-	irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
-	/* APB timer irqs are set up as mp_irqs, timer is edge triggerred */
-	set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge");
-	enable_irq(adev->irq);
-	if (system_state == SYSTEM_BOOTING)
-		if (request_irq(adev->irq, apbt_interrupt_handler,
-				IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
-				adev->name, adev)) {
-			printk(KERN_ERR "Failed request IRQ for APBT%d\n",
-			       adev->num);
-		}
-}
-#endif
-
 static void apbt_enable_int(int n)
 {
 	unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
@@ -334,6 +306,27 @@ static int __init apbt_clockevent_register(void)
 }
 
 #ifdef CONFIG_SMP
+
+static void apbt_setup_irq(struct apbt_dev *adev)
+{
+	/* timer0 irq has been setup early */
+	if (adev->irq == 0)
+		return;
+
+	if (system_state == SYSTEM_BOOTING) {
+		irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
+		/* APB timer irqs are set up as mp_irqs, timer is edge type */
+		__set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
+		if (request_irq(adev->irq, apbt_interrupt_handler,
+				IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
+				adev->name, adev)) {
+			printk(KERN_ERR "Failed request IRQ for APBT%d\n",
+			       adev->num);
+		}
+	} else
+		enable_irq(adev->irq);
+}
+
 /* Should be called with per cpu */
 void apbt_setup_secondary_clock(void)
 {
@@ -343,7 +336,7 @@ void apbt_setup_secondary_clock(void)
 
 	/* Don't register boot CPU clockevent */
 	cpu = smp_processor_id();
-	if (cpu == boot_cpu_id)
+	if (!cpu)
 		return;
 	/*
 	 * We need to calculate the scaled math multiplication factor for
@@ -389,10 +382,11 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
 
 	switch (action & 0xf) {
 	case CPU_DEAD:
+		disable_irq(adev->irq);
 		apbt_disable_int(cpu);
-		if (system_state == SYSTEM_RUNNING)
+		if (system_state == SYSTEM_RUNNING) {
 			pr_debug("skipping APBT CPU %lu offline\n", cpu);
-		else if (adev) {
+		} else if (adev) {
 			pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
 			free_irq(adev->irq, adev);
 		}

+ 0 - 3
arch/x86/kernel/apic/apic.c

@@ -1665,10 +1665,7 @@ int __init APIC_init_uniprocessor(void)
 	}
 #endif
 
-#ifndef CONFIG_SMP
-	enable_IR_x2apic();
 	default_setup_apic_routing();
-#endif
 
 	verify_local_APIC();
 	connect_bsp_APIC();

Rozdielové dáta súboru neboli zobrazené, pretože súbor je príliš veľký
+ 189 - 378
arch/x86/kernel/apic/io_apic.c


+ 1 - 1
arch/x86/kernel/apic/nmi.c

@@ -178,7 +178,7 @@ int __init check_nmi_watchdog(void)
 error:
 	if (nmi_watchdog == NMI_IO_APIC) {
 		if (!timer_through_8259)
-			legacy_pic->chip->mask(0);
+			legacy_pic->mask(0);
 		on_each_cpu(__acpi_nmi_disable, NULL, 1);
 	}
 

+ 3 - 0
arch/x86/kernel/apic/probe_64.c

@@ -54,6 +54,9 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
  */
 void __init default_setup_apic_routing(void)
 {
+
+	enable_IR_x2apic();
+
 #ifdef CONFIG_X86_X2APIC
 	if (x2apic_mode
 #ifdef CONFIG_X86_UV

+ 1 - 1
arch/x86/kernel/cpu/amd.c

@@ -148,7 +148,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	/* calling is from identify_secondary_cpu() ? */
-	if (c->cpu_index == boot_cpu_id)
+	if (!c->cpu_index)
 		return;
 
 	/*

+ 1 - 1
arch/x86/kernel/cpu/common.c

@@ -665,7 +665,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 		this_cpu->c_early_init(c);
 
 #ifdef CONFIG_SMP
-	c->cpu_index = boot_cpu_id;
+	c->cpu_index = 0;
 #endif
 	filter_cpuid_features(c, false);
 }

+ 1 - 1
arch/x86/kernel/cpu/intel.c

@@ -170,7 +170,7 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	/* calling is from identify_secondary_cpu() ? */
-	if (c->cpu_index == boot_cpu_id)
+	if (!c->cpu_index)
 		return;
 
 	/*

+ 0 - 2
arch/x86/kernel/early-quirks.c

@@ -96,7 +96,6 @@ static void __init nvidia_bugs(int num, int slot, int func)
 
 }
 
-#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
 #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
 static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
 {
@@ -115,7 +114,6 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
 	d &= 0xff;
 	return d;
 }
-#endif
 
 static void __init ati_bugs(int num, int slot, int func)
 {

+ 6 - 10
arch/x86/kernel/hpet.c

@@ -440,9 +440,9 @@ static int hpet_legacy_next_event(unsigned long delta,
 static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
 static struct hpet_dev	*hpet_devs;
 
-void hpet_msi_unmask(unsigned int irq)
+void hpet_msi_unmask(struct irq_data *data)
 {
-	struct hpet_dev *hdev = get_irq_data(irq);
+	struct hpet_dev *hdev = data->handler_data;
 	unsigned int cfg;
 
 	/* unmask it */
@@ -451,10 +451,10 @@ void hpet_msi_unmask(unsigned int irq)
 	hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
 }
 
-void hpet_msi_mask(unsigned int irq)
+void hpet_msi_mask(struct irq_data *data)
 {
+	struct hpet_dev *hdev = data->handler_data;
 	unsigned int cfg;
-	struct hpet_dev *hdev = get_irq_data(irq);
 
 	/* mask it */
 	cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
@@ -462,18 +462,14 @@ void hpet_msi_mask(unsigned int irq)
 	hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
 }
 
-void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
+void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg)
 {
-	struct hpet_dev *hdev = get_irq_data(irq);
-
 	hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
 	hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
 }
 
-void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
+void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg)
 {
-	struct hpet_dev *hdev = get_irq_data(irq);
-
 	msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
 	msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
 	msg->address_hi = 0;

+ 32 - 31
arch/x86/kernel/i8259.c

@@ -29,24 +29,10 @@
  * plus some generic x86 specific things if generic specifics makes
  * any sense at all.
  */
+static void init_8259A(int auto_eoi);
 
 static int i8259A_auto_eoi;
 DEFINE_RAW_SPINLOCK(i8259A_lock);
-static void mask_and_ack_8259A(unsigned int);
-static void mask_8259A(void);
-static void unmask_8259A(void);
-static void disable_8259A_irq(unsigned int irq);
-static void enable_8259A_irq(unsigned int irq);
-static void init_8259A(int auto_eoi);
-static int i8259A_irq_pending(unsigned int irq);
-
-struct irq_chip i8259A_chip = {
-	.name		= "XT-PIC",
-	.mask		= disable_8259A_irq,
-	.disable	= disable_8259A_irq,
-	.unmask		= enable_8259A_irq,
-	.mask_ack	= mask_and_ack_8259A,
-};
 
 /*
  * 8259A PIC functions to handle ISA devices:
@@ -68,7 +54,7 @@ unsigned int cached_irq_mask = 0xffff;
  */
 unsigned long io_apic_irqs;
 
-static void disable_8259A_irq(unsigned int irq)
+static void mask_8259A_irq(unsigned int irq)
 {
 	unsigned int mask = 1 << irq;
 	unsigned long flags;
@@ -82,7 +68,12 @@ static void disable_8259A_irq(unsigned int irq)
 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
-static void enable_8259A_irq(unsigned int irq)
+static void disable_8259A_irq(struct irq_data *data)
+{
+	mask_8259A_irq(data->irq);
+}
+
+static void unmask_8259A_irq(unsigned int irq)
 {
 	unsigned int mask = ~(1 << irq);
 	unsigned long flags;
@@ -96,6 +87,11 @@ static void enable_8259A_irq(unsigned int irq)
 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
+static void enable_8259A_irq(struct irq_data *data)
+{
+	unmask_8259A_irq(data->irq);
+}
+
 static int i8259A_irq_pending(unsigned int irq)
 {
 	unsigned int mask = 1<<irq;
@@ -117,7 +113,7 @@ static void make_8259A_irq(unsigned int irq)
 	disable_irq_nosync(irq);
 	io_apic_irqs &= ~(1<<irq);
 	set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
-				      "XT");
+				      i8259A_chip.name);
 	enable_irq(irq);
 }
 
@@ -150,8 +146,9 @@ static inline int i8259A_irq_real(unsigned int irq)
  * first, _then_ send the EOI, and the order of EOI
  * to the two 8259s is important!
  */
-static void mask_and_ack_8259A(unsigned int irq)
+static void mask_and_ack_8259A(struct irq_data *data)
 {
+	unsigned int irq = data->irq;
 	unsigned int irqmask = 1 << irq;
 	unsigned long flags;
 
@@ -223,6 +220,14 @@ spurious_8259A_irq:
 	}
 }
 
+struct irq_chip i8259A_chip = {
+	.name		= "XT-PIC",
+	.irq_mask	= disable_8259A_irq,
+	.irq_disable	= disable_8259A_irq,
+	.irq_unmask	= enable_8259A_irq,
+	.irq_mask_ack	= mask_and_ack_8259A,
+};
+
 static char irq_trigger[2];
 /**
  * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
@@ -342,9 +347,9 @@ static void init_8259A(int auto_eoi)
 		 * In AEOI mode we just have to mask the interrupt
 		 * when acking.
 		 */
-		i8259A_chip.mask_ack = disable_8259A_irq;
+		i8259A_chip.irq_mask_ack = disable_8259A_irq;
 	else
-		i8259A_chip.mask_ack = mask_and_ack_8259A;
+		i8259A_chip.irq_mask_ack = mask_and_ack_8259A;
 
 	udelay(100);		/* wait for 8259A to initialize */
 
@@ -363,14 +368,6 @@ static void init_8259A(int auto_eoi)
 static void legacy_pic_noop(void) { };
 static void legacy_pic_uint_noop(unsigned int unused) { };
 static void legacy_pic_int_noop(int unused) { };
-
-static struct irq_chip dummy_pic_chip  = {
-	.name = "dummy pic",
-	.mask = legacy_pic_uint_noop,
-	.unmask = legacy_pic_uint_noop,
-	.disable = legacy_pic_uint_noop,
-	.mask_ack = legacy_pic_uint_noop,
-};
 static int legacy_pic_irq_pending_noop(unsigned int irq)
 {
 	return 0;
@@ -378,7 +375,9 @@ static int legacy_pic_irq_pending_noop(unsigned int irq)
 
 struct legacy_pic null_legacy_pic = {
 	.nr_legacy_irqs = 0,
-	.chip = &dummy_pic_chip,
+	.chip = &dummy_irq_chip,
+	.mask = legacy_pic_uint_noop,
+	.unmask = legacy_pic_uint_noop,
 	.mask_all = legacy_pic_noop,
 	.restore_mask = legacy_pic_noop,
 	.init = legacy_pic_int_noop,
@@ -389,7 +388,9 @@ struct legacy_pic null_legacy_pic = {
 struct legacy_pic default_legacy_pic = {
 	.nr_legacy_irqs = NR_IRQS_LEGACY,
 	.chip  = &i8259A_chip,
-	.mask_all  = mask_8259A,
+	.mask = mask_8259A_irq,
+	.unmask = unmask_8259A_irq,
+	.mask_all = mask_8259A,
 	.restore_mask = unmask_8259A,
 	.init = init_8259A,
 	.irq_pending = i8259A_irq_pending,

+ 13 - 11
arch/x86/kernel/irq.c

@@ -159,7 +159,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	seq_printf(p, "%*d: ", prec, i);
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-	seq_printf(p, " %8s", desc->chip->name);
+	seq_printf(p, " %8s", desc->irq_data.chip->name);
 	seq_printf(p, "-%-8s", desc->name);
 
 	if (action) {
@@ -282,6 +282,7 @@ void fixup_irqs(void)
 	unsigned int irq, vector;
 	static int warned;
 	struct irq_desc *desc;
+	struct irq_data *data;
 
 	for_each_irq_desc(irq, desc) {
 		int break_affinity = 0;
@@ -296,7 +297,8 @@ void fixup_irqs(void)
 		/* interrupt's are disabled at this point */
 		raw_spin_lock(&desc->lock);
 
-		affinity = desc->affinity;
+		data = &desc->irq_data;
+		affinity = data->affinity;
 		if (!irq_has_action(irq) ||
 		    cpumask_equal(affinity, cpu_online_mask)) {
 			raw_spin_unlock(&desc->lock);
@@ -315,16 +317,16 @@ void fixup_irqs(void)
 			affinity = cpu_all_mask;
 		}
 
-		if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
-			desc->chip->mask(irq);
+		if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask)
+			data->chip->irq_mask(data);
 
-		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, affinity);
+		if (data->chip->irq_set_affinity)
+			data->chip->irq_set_affinity(data, affinity, true);
 		else if (!(warned++))
 			set_affinity = 0;
 
-		if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
-			desc->chip->unmask(irq);
+		if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask)
+			data->chip->irq_unmask(data);
 
 		raw_spin_unlock(&desc->lock);
 
@@ -355,10 +357,10 @@ void fixup_irqs(void)
 		if (irr  & (1 << (vector % 32))) {
 			irq = __get_cpu_var(vector_irq)[vector];
 
-			desc = irq_to_desc(irq);
+			data = irq_get_irq_data(irq);
 			raw_spin_lock(&desc->lock);
-			if (desc->chip->retrigger)
-				desc->chip->retrigger(irq);
+			if (data->chip->irq_retrigger)
+				data->chip->irq_retrigger(data);
 			raw_spin_unlock(&desc->lock);
 		}
 	}

+ 4 - 13
arch/x86/kernel/irqinit.c

@@ -100,6 +100,8 @@ int vector_used_by_percpu_irq(unsigned int vector)
 
 void __init init_ISA_irqs(void)
 {
+	struct irq_chip *chip = legacy_pic->chip;
+	const char *name = chip->name;
 	int i;
 
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
@@ -107,19 +109,8 @@ void __init init_ISA_irqs(void)
 #endif
 	legacy_pic->init(0);
 
-	/*
-	 * 16 old-style INTA-cycle interrupts:
-	 */
-	for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		desc->status = IRQ_DISABLED;
-		desc->action = NULL;
-		desc->depth = 1;
-
-		set_irq_chip_and_handler_name(i, &i8259A_chip,
-					      handle_level_irq, "XT");
-	}
+	for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
+		set_irq_chip_and_handler_name(i, chip, handle_level_irq, name);
 }
 
 void __init init_IRQ(void)

+ 2 - 2
arch/x86/kernel/machine_kexec_64.c

@@ -36,7 +36,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
 		if (!page)
 			goto out;
 		pud = (pud_t *)page_address(page);
-		memset(pud, 0, PAGE_SIZE);
+		clear_page(pud);
 		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
 	}
 	pud = pud_offset(pgd, addr);
@@ -45,7 +45,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
 		if (!page)
 			goto out;
 		pmd = (pmd_t *)page_address(page);
-		memset(pmd, 0, PAGE_SIZE);
+		clear_page(pmd);
 		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
 	}
 	pmd = pmd_offset(pud, addr);

+ 1 - 1
arch/x86/kernel/reboot.c

@@ -84,7 +84,7 @@ static int __init reboot_setup(char *str)
 			}
 				/* we will leave sorting out the final value
 				   when we are ready to reboot, since we might not
-				   have set up boot_cpu_id or smp_num_cpu */
+				   have detected BSP APIC ID or smp_num_cpu */
 			break;
 #endif /* CONFIG_SMP */
 

+ 0 - 1
arch/x86/kernel/setup.c

@@ -125,7 +125,6 @@ unsigned long max_pfn_mapped;
 RESERVE_BRK(dmi_alloc, 65536);
 #endif
 
-unsigned int boot_cpu_id __read_mostly;
 
 static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
 unsigned long _brk_end = (unsigned long)__brk_base;

+ 1 - 1
arch/x86/kernel/setup_percpu.c

@@ -253,7 +253,7 @@ void __init setup_per_cpu_areas(void)
 		 * Up to this point, the boot CPU has been using .init.data
 		 * area.  Reload any changed state for the boot CPU.
 		 */
-		if (cpu == boot_cpu_id)
+		if (!cpu)
 			switch_to_new_gdt(cpu);
 	}
 

+ 4 - 4
arch/x86/kernel/smpboot.c

@@ -324,9 +324,9 @@ notrace static void __cpuinit start_secondary(void *unused)
 	check_tsc_sync_target();
 
 	if (nmi_watchdog == NMI_IO_APIC) {
-		legacy_pic->chip->mask(0);
+		legacy_pic->mask(0);
 		enable_NMI_through_LVT0();
-		legacy_pic->chip->unmask(0);
+		legacy_pic->unmask(0);
 	}
 
 	/* This must be done before setting cpu_online_mask */
@@ -1109,8 +1109,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	}
 	set_cpu_sibling_map(0);
 
-	enable_IR_x2apic();
-	default_setup_apic_routing();
 
 	if (smp_sanity_check(max_cpus) < 0) {
 		printk(KERN_INFO "SMP disabled\n");
@@ -1118,6 +1116,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 		goto out;
 	}
 
+	default_setup_apic_routing();
+
 	preempt_disable();
 	if (read_apic_id() != boot_cpu_physical_apicid) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",

+ 19 - 36
arch/x86/kernel/uv_irq.c

@@ -28,34 +28,21 @@ struct uv_irq_2_mmr_pnode{
 static spinlock_t		uv_irq_lock;
 static struct rb_root		uv_irq_root;
 
-static int uv_set_irq_affinity(unsigned int, const struct cpumask *);
+static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
 
-static void uv_noop(unsigned int irq)
-{
-}
-
-static unsigned int uv_noop_ret(unsigned int irq)
-{
-	return 0;
-}
+static void uv_noop(struct irq_data *data) { }
 
-static void uv_ack_apic(unsigned int irq)
+static void uv_ack_apic(struct irq_data *data)
 {
 	ack_APIC_irq();
 }
 
 static struct irq_chip uv_irq_chip = {
-	.name		= "UV-CORE",
-	.startup	= uv_noop_ret,
-	.shutdown	= uv_noop,
-	.enable		= uv_noop,
-	.disable	= uv_noop,
-	.ack		= uv_noop,
-	.mask		= uv_noop,
-	.unmask		= uv_noop,
-	.eoi		= uv_ack_apic,
-	.end		= uv_noop,
-	.set_affinity	= uv_set_irq_affinity,
+	.name			= "UV-CORE",
+	.irq_mask		= uv_noop,
+	.irq_unmask		= uv_noop,
+	.irq_eoi		= uv_ack_apic,
+	.irq_set_affinity	= uv_set_irq_affinity,
 };
 
 /*
@@ -144,26 +131,22 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 		       unsigned long mmr_offset, int limit)
 {
 	const struct cpumask *eligible_cpu = cpumask_of(cpu);
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
-	int mmr_pnode;
+	struct irq_cfg *cfg = get_irq_chip_data(irq);
 	unsigned long mmr_value;
 	struct uv_IO_APIC_route_entry *entry;
-	int err;
+	int mmr_pnode, err;
 
 	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
 			sizeof(unsigned long));
 
-	cfg = irq_cfg(irq);
-
 	err = assign_irq_vector(irq, cfg, eligible_cpu);
 	if (err != 0)
 		return err;
 
 	if (limit == UV_AFFINITY_CPU)
-		desc->status |= IRQ_NO_BALANCING;
+		irq_set_status_flags(irq, IRQ_NO_BALANCING);
 	else
-		desc->status |= IRQ_MOVE_PCNTXT;
+		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
 
 	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
 				      irq_name);
@@ -206,17 +189,17 @@ static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
 }
 
-static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int
+uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
+		    bool force)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg = desc->chip_data;
+	struct irq_cfg *cfg = data->chip_data;
 	unsigned int dest;
-	unsigned long mmr_value;
+	unsigned long mmr_value, mmr_offset;
 	struct uv_IO_APIC_route_entry *entry;
-	unsigned long mmr_offset;
 	int mmr_pnode;
 
-	if (set_desc_affinity(desc, mask, &dest))
+	if (__ioapic_set_affinity(data, mask, &dest))
 		return -1;
 
 	mmr_value = 0;
@@ -231,7 +214,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	entry->dest		= dest;
 
 	/* Get previously stored MMR and pnode of hub sourcing interrupts */
-	if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+	if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
 		return -1;
 
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);

+ 44 - 96
arch/x86/kernel/visws_quirks.c

@@ -66,10 +66,7 @@ static void __init visws_time_init(void)
 }
 
 /* Replaces the default init_ISA_irqs in the generic setup */
-static void __init visws_pre_intr_init(void)
-{
-	init_VISWS_APIC_irqs();
-}
+static void __init visws_pre_intr_init(void);
 
 /* Quirk for machine specific memory setup. */
 
@@ -429,67 +426,34 @@ static int is_co_apic(unsigned int irq)
 /*
  * This is the SGI Cobalt (IO-)APIC:
  */
-
-static void enable_cobalt_irq(unsigned int irq)
+static void enable_cobalt_irq(struct irq_data *data)
 {
-	co_apic_set(is_co_apic(irq), irq);
+	co_apic_set(is_co_apic(data->irq), data->irq);
 }
 
-static void disable_cobalt_irq(unsigned int irq)
+static void disable_cobalt_irq(struct irq_data *data)
 {
-	int entry = is_co_apic(irq);
+	int entry = is_co_apic(data->irq);
 
 	co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
 	co_apic_read(CO_APIC_LO(entry));
 }
 
-/*
- * "irq" really just serves to identify the device.  Here is where we
- * map this to the Cobalt APIC entry where it's physically wired.
- * This is called via request_irq -> setup_irq -> irq_desc->startup()
- */
-static unsigned int startup_cobalt_irq(unsigned int irq)
+static void ack_cobalt_irq(struct irq_data *data)
 {
 	unsigned long flags;
-	struct irq_desc *desc = irq_to_desc(irq);
 
 	spin_lock_irqsave(&cobalt_lock, flags);
-	if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
-		desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
-	enable_cobalt_irq(irq);
-	spin_unlock_irqrestore(&cobalt_lock, flags);
-	return 0;
-}
-
-static void ack_cobalt_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cobalt_lock, flags);
-	disable_cobalt_irq(irq);
+	disable_cobalt_irq(data);
 	apic_write(APIC_EOI, APIC_EIO_ACK);
 	spin_unlock_irqrestore(&cobalt_lock, flags);
 }
 
-static void end_cobalt_irq(unsigned int irq)
-{
-	unsigned long flags;
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	spin_lock_irqsave(&cobalt_lock, flags);
-	if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)))
-		enable_cobalt_irq(irq);
-	spin_unlock_irqrestore(&cobalt_lock, flags);
-}
-
 static struct irq_chip cobalt_irq_type = {
-	.name =		"Cobalt-APIC",
-	.startup =	startup_cobalt_irq,
-	.shutdown =	disable_cobalt_irq,
-	.enable =	enable_cobalt_irq,
-	.disable =	disable_cobalt_irq,
-	.ack =		ack_cobalt_irq,
-	.end =		end_cobalt_irq,
+	.name		= "Cobalt-APIC",
+	.irq_enable	= enable_cobalt_irq,
+	.irq_disable	= disable_cobalt_irq,
+	.irq_ack	= ack_cobalt_irq,
 };
 
 
@@ -503,35 +467,34 @@ static struct irq_chip cobalt_irq_type = {
  * interrupt controller type, and through a special virtual interrupt-
  * controller. Device drivers only see the virtual interrupt sources.
  */
-static unsigned int startup_piix4_master_irq(unsigned int irq)
+static unsigned int startup_piix4_master_irq(struct irq_data *data)
 {
 	legacy_pic->init(0);
-
-	return startup_cobalt_irq(irq);
+	enable_cobalt_irq(data);
 }
 
-static void end_piix4_master_irq(unsigned int irq)
+static void end_piix4_master_irq(struct irq_data *data)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&cobalt_lock, flags);
-	enable_cobalt_irq(irq);
+	enable_cobalt_irq(data);
 	spin_unlock_irqrestore(&cobalt_lock, flags);
 }
 
 static struct irq_chip piix4_master_irq_type = {
-	.name =		"PIIX4-master",
-	.startup =	startup_piix4_master_irq,
-	.ack =		ack_cobalt_irq,
-	.end =		end_piix4_master_irq,
+	.name		= "PIIX4-master",
+	.irq_startup	= startup_piix4_master_irq,
+	.irq_ack	= ack_cobalt_irq,
 };
 
+static void pii4_mask(struct irq_data *data) { }
 
 static struct irq_chip piix4_virtual_irq_type = {
-	.name =		"PIIX4-virtual",
+	.name		= "PIIX4-virtual",
+	.mask		= pii4_mask,
 };
 
-
 /*
  * PIIX4-8259 master/virtual functions to handle interrupt requests
  * from legacy devices: floppy, parallel, serial, rtc.
@@ -549,9 +512,8 @@ static struct irq_chip piix4_virtual_irq_type = {
  */
 static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 {
-	int realirq;
-	struct irq_desc *desc;
 	unsigned long flags;
+	int realirq;
 
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
 
@@ -592,18 +554,10 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
 
 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 
-	desc = irq_to_desc(realirq);
-
 	/*
 	 * handle this 'virtual interrupt' as a Cobalt one now.
 	 */
-	kstat_incr_irqs_this_cpu(realirq, desc);
-
-	if (likely(desc->action != NULL))
-		handle_IRQ_event(realirq, desc->action);
-
-	if (!(desc->status & IRQ_DISABLED))
-		legacy_pic->chip->unmask(realirq);
+	generic_handle_irq(realirq);
 
 	return IRQ_HANDLED;
 
@@ -624,41 +578,35 @@ static struct irqaction cascade_action = {
 
 static inline void set_piix4_virtual_irq_type(void)
 {
-	piix4_virtual_irq_type.shutdown = i8259A_chip.mask;
 	piix4_virtual_irq_type.enable =	i8259A_chip.unmask;
 	piix4_virtual_irq_type.disable = i8259A_chip.mask;
+	piix4_virtual_irq_type.unmask =	i8259A_chip.unmask;
 }
 
-void init_VISWS_APIC_irqs(void)
+static void __init visws_pre_intr_init(void)
 {
 	int i;
 
-	for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		desc->status = IRQ_DISABLED;
-		desc->action = 0;
-		desc->depth = 1;
+	set_piix4_virtual_irq_type();
 
-		if (i == 0) {
-			desc->chip = &cobalt_irq_type;
-		}
-		else if (i == CO_IRQ_IDE0) {
-			desc->chip = &cobalt_irq_type;
-		}
-		else if (i == CO_IRQ_IDE1) {
-			desc->chip = &cobalt_irq_type;
-		}
-		else if (i == CO_IRQ_8259) {
-			desc->chip = &piix4_master_irq_type;
-		}
-		else if (i < CO_IRQ_APIC0) {
-			set_piix4_virtual_irq_type();
-			desc->chip = &piix4_virtual_irq_type;
-		}
-		else if (IS_CO_APIC(i)) {
-			desc->chip = &cobalt_irq_type;
-		}
+	for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
+		struct irq_chip *chip = NULL;
+
+		if (i == 0)
+			chip = &cobalt_irq_type;
+		else if (i == CO_IRQ_IDE0)
+			chip = &cobalt_irq_type;
+		else if (i == CO_IRQ_IDE1)
+			>chip = &cobalt_irq_type;
+		else if (i == CO_IRQ_8259)
+			chip = &piix4_master_irq_type;
+		else if (i < CO_IRQ_APIC0)
+			chip = &piix4_virtual_irq_type;
+		else if (IS_CO_APIC(i))
+			chip = &cobalt_irq_type;
+
+		if (chip)
+			set_irq_chip(i, chip);
 	}
 
 	setup_irq(CO_IRQ_8259, &master_action);

+ 7 - 0
arch/x86/kernel/x86_init.c

@@ -6,10 +6,12 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
+#include <linux/pci.h>
 
 #include <asm/bios_ebda.h>
 #include <asm/paravirt.h>
 #include <asm/pci_x86.h>
+#include <asm/pci.h>
 #include <asm/mpspec.h>
 #include <asm/setup.h>
 #include <asm/apic.h>
@@ -99,3 +101,8 @@ struct x86_platform_ops x86_platform = {
 };
 
 EXPORT_SYMBOL_GPL(x86_platform);
+struct x86_msi_ops x86_msi = {
+	.setup_msi_irqs = native_setup_msi_irqs,
+	.teardown_msi_irq = native_teardown_msi_irq,
+	.teardown_msi_irqs = default_teardown_msi_irqs,
+};

+ 1 - 2
arch/x86/kvm/lapic.c

@@ -1056,14 +1056,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.apic = apic;
 
-	apic->regs_page = alloc_page(GFP_KERNEL);
+	apic->regs_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (apic->regs_page == NULL) {
 		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
 		       vcpu->vcpu_id);
 		goto nomem_free_apic;
 	}
 	apic->regs = page_address(apic->regs_page);
-	memset(apic->regs, 0, PAGE_SIZE);
 	apic->vcpu = vcpu;
 
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,

+ 9 - 9
arch/x86/lguest/boot.c

@@ -791,22 +791,22 @@ static void lguest_flush_tlb_kernel(void)
  * simple as setting a bit.  We don't actually "ack" interrupts as such, we
  * just mask and unmask them.  I wonder if we should be cleverer?
  */
-static void disable_lguest_irq(unsigned int irq)
+static void disable_lguest_irq(struct irq_data *data)
 {
-	set_bit(irq, lguest_data.blocked_interrupts);
+	set_bit(data->irq, lguest_data.blocked_interrupts);
 }
 
-static void enable_lguest_irq(unsigned int irq)
+static void enable_lguest_irq(struct irq_data *data)
 {
-	clear_bit(irq, lguest_data.blocked_interrupts);
+	clear_bit(data->irq, lguest_data.blocked_interrupts);
 }
 
 /* This structure describes the lguest IRQ controller. */
 static struct irq_chip lguest_irq_controller = {
 	.name		= "lguest",
-	.mask		= disable_lguest_irq,
-	.mask_ack	= disable_lguest_irq,
-	.unmask		= enable_lguest_irq,
+	.irq_mask	= disable_lguest_irq,
+	.irq_mask_ack	= disable_lguest_irq,
+	.irq_unmask	= enable_lguest_irq,
 };
 
 /*
@@ -838,12 +838,12 @@ static void __init lguest_init_IRQ(void)
  * rather than set them in lguest_init_IRQ we are called here every time an
  * lguest device needs an interrupt.
  *
- * FIXME: irq_to_desc_alloc_node() can fail due to lack of memory, we should
+ * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should
  * pass that up!
  */
 void lguest_setup_irq(unsigned int irq)
 {
-	irq_to_desc_alloc_node(irq, 0);
+	irq_alloc_desc_at(irq, 0);
 	set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
 				      handle_level_irq, "level");
 }

+ 2 - 2
arch/x86/mm/init_32.c

@@ -67,7 +67,7 @@ static __init void *alloc_low_page(void)
 		panic("alloc_low_page: ran out of memory");
 
 	adr = __va(pfn * PAGE_SIZE);
-	memset(adr, 0, PAGE_SIZE);
+	clear_page(adr);
 	return adr;
 }
 
@@ -558,7 +558,7 @@ char swsusp_pg_dir[PAGE_SIZE]
 
 static inline void save_pg_dir(void)
 {
-	memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
+	copy_page(swsusp_pg_dir, swapper_pg_dir);
 }
 #else /* !CONFIG_ACPI_SLEEP */
 static inline void save_pg_dir(void)

+ 1 - 1
arch/x86/mm/init_64.c

@@ -293,7 +293,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
 		panic("alloc_low_page: ran out of memory");
 
 	adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
-	memset(adr, 0, PAGE_SIZE);
+	clear_page(adr);
 	*phys  = pfn * PAGE_SIZE;
 	return adr;
 }

+ 3 - 3
arch/x86/mm/k8topology_64.c

@@ -54,8 +54,8 @@ static __init int find_northbridge(void)
 static __init void early_get_boot_cpu_id(void)
 {
 	/*
-	 * need to get boot_cpu_id so can use that to create apicid_to_node
-	 * in k8_scan_nodes()
+	 * need to get the APIC ID of the BSP so can use that to
+	 * create apicid_to_node in k8_scan_nodes()
 	 */
 #ifdef CONFIG_X86_MPPARSE
 	/*
@@ -212,7 +212,7 @@ int __init k8_scan_nodes(void)
 	bits = boot_cpu_data.x86_coreid_bits;
 	cores = (1<<bits);
 	apicid_base = 0;
-	/* need to get boot_cpu_id early for system with apicid lifting */
+	/* get the APIC ID of the BSP early for systems with apicid lifting */
 	early_get_boot_cpu_id();
 	if (boot_cpu_physical_apicid > 0) {
 		pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);

+ 1 - 0
arch/x86/pci/Makefile

@@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS)		+= pcbios.o
 obj-$(CONFIG_PCI_MMCONFIG)	+= mmconfig_$(BITS).o direct.o mmconfig-shared.o
 obj-$(CONFIG_PCI_DIRECT)	+= direct.o
 obj-$(CONFIG_PCI_OLPC)		+= olpc.o
+obj-$(CONFIG_PCI_XEN)		+= xen.o
 
 obj-y				+= fixup.o
 obj-$(CONFIG_ACPI)		+= acpi.o

+ 10 - 7
arch/x86/pci/common.c

@@ -421,16 +421,10 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
 
 	return bus;
 }
-
-int __init pcibios_init(void)
+void __init pcibios_set_cache_line_size(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
-	if (!raw_pci_ops) {
-		printk(KERN_WARNING "PCI: System does not support PCI\n");
-		return 0;
-	}
-
 	/*
 	 * Set PCI cacheline size to that of the CPU if the CPU has reported it.
 	 * (For older CPUs that don't support cpuid, we se it to 32 bytes
@@ -445,7 +439,16 @@ int __init pcibios_init(void)
  		pci_dfl_cache_line_size = 32 >> 2;
 		printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n");
 	}
+}
+
+int __init pcibios_init(void)
+{
+	if (!raw_pci_ops) {
+		printk(KERN_WARNING "PCI: System does not support PCI\n");
+		return 0;
+	}
 
+	pcibios_set_cache_line_size();
 	pcibios_resource_survey();
 
 	if (pci_bf_sort >= pci_force_bf)

+ 2 - 0
arch/x86/pci/i386.c

@@ -311,6 +311,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 		 */
 		prot |= _PAGE_CACHE_UC_MINUS;
 
+	prot |= _PAGE_IOMAP;	/* creating a mapping for IO */
+
 	vma->vm_page_prot = __pgprot(prot);
 
 	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,

+ 142 - 0
arch/x86/pci/xen.c

@@ -0,0 +1,142 @@
+/*
+ * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux
+ *			   x86 PCI core to support the Xen PCI Frontend
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
+
+#include <linux/io.h>
+#include <asm/pci_x86.h>
+
+#include <asm/xen/hypervisor.h>
+
+#include <xen/events.h>
+#include <asm/xen/pci.h>
+
+#if defined(CONFIG_PCI_MSI)
+#include <linux/msi.h>
+
+struct xen_pci_frontend_ops *xen_pci_frontend;
+EXPORT_SYMBOL_GPL(xen_pci_frontend);
+
+/*
+ * For MSI interrupts we have to use drivers/xen/event.s functions to
+ * allocate an irq_desc and setup the right */
+
+
+static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	int irq, ret, i;
+	struct msi_desc *msidesc;
+	int *v;
+
+	v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
+	if (!v)
+		return -ENOMEM;
+
+	if (!xen_initial_domain()) {
+		if (type == PCI_CAP_ID_MSIX)
+			ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
+		else
+			ret = xen_pci_frontend_enable_msi(dev, &v);
+		if (ret)
+			goto error;
+	}
+	i = 0;
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = xen_allocate_pirq(v[i], 0, /* not sharable */
+			(type == PCI_CAP_ID_MSIX) ?
+			"pcifront-msi-x" : "pcifront-msi");
+		if (irq < 0)
+			return -1;
+
+		ret = set_irq_msi(irq, msidesc);
+		if (ret)
+			goto error_while;
+		i++;
+	}
+	kfree(v);
+	return 0;
+
+error_while:
+	unbind_from_irqhandler(irq, NULL);
+error:
+	if (ret == -ENODEV)
+		dev_err(&dev->dev, "Xen PCI frontend has not registered" \
+			" MSI/MSI-X support!\n");
+
+	kfree(v);
+	return ret;
+}
+
+static void xen_teardown_msi_irqs(struct pci_dev *dev)
+{
+	/* Only do this when were are in non-privileged mode.*/
+	if (!xen_initial_domain()) {
+		struct msi_desc *msidesc;
+
+		msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+		if (msidesc->msi_attrib.is_msix)
+			xen_pci_frontend_disable_msix(dev);
+		else
+			xen_pci_frontend_disable_msi(dev);
+	}
+
+}
+
+static void xen_teardown_msi_irq(unsigned int irq)
+{
+	xen_destroy_irq(irq);
+}
+#endif
+
+static int xen_pcifront_enable_irq(struct pci_dev *dev)
+{
+	int rc;
+	int share = 1;
+
+	dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
+
+	if (dev->irq < 0)
+		return -EINVAL;
+
+	if (dev->irq < NR_IRQS_LEGACY)
+		share = 0;
+
+	rc = xen_allocate_pirq(dev->irq, share, "pcifront");
+	if (rc < 0) {
+		dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
+			 dev->irq, rc);
+		return rc;
+	}
+	return 0;
+}
+
+int __init pci_xen_init(void)
+{
+	if (!xen_pv_domain() || xen_initial_domain())
+		return -ENODEV;
+
+	printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
+
+	pcibios_set_cache_line_size();
+
+	pcibios_enable_irq = xen_pcifront_enable_irq;
+	pcibios_disable_irq = NULL;
+
+#ifdef CONFIG_ACPI
+	/* Keep ACPI out of the picture */
+	acpi_noirq = 1;
+#endif
+
+#ifdef CONFIG_PCI_MSI
+	x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
+	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+	x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
+#endif
+	return 0;
+}

+ 3 - 0
arch/x86/xen/enlighten.c

@@ -45,6 +45,7 @@
 #include <asm/paravirt.h>
 #include <asm/apic.h>
 #include <asm/page.h>
+#include <asm/xen/pci.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/fixmap.h>
@@ -1220,6 +1221,8 @@ asmlinkage void __init xen_start_kernel(void)
 		add_preferred_console("xenboot", 0, NULL);
 		add_preferred_console("tty", 0, NULL);
 		add_preferred_console("hvc", 0, NULL);
+		if (pci_xen)
+			x86_init.pci.arch_init = pci_xen_init;
 	} else {
 		/* Make sure ACS will be enabled */
 		pci_request_acs();

+ 4 - 0
arch/x86/xen/pci-swiotlb-xen.c

@@ -1,6 +1,7 @@
 /* Glue code to lib/swiotlb-xen.c */
 
 #include <linux/dma-mapping.h>
+#include <linux/pci.h>
 #include <xen/swiotlb-xen.h>
 
 #include <asm/xen/hypervisor.h>
@@ -54,5 +55,8 @@ void __init pci_xen_swiotlb_init(void)
 	if (xen_swiotlb) {
 		xen_swiotlb_init(1);
 		dma_ops = &xen_swiotlb_dma_ops;
+
+		/* Make sure ACS will be enabled */
+		pci_request_acs();
 	}
 }

+ 0 - 2
arch/x86/xen/setup.c

@@ -260,7 +260,5 @@ void __init xen_arch_setup(void)
 
 	pm_idle = xen_idle;
 
-	paravirt_disable_iospace();
-
 	fiddle_vdso();
 }

+ 1 - 1
arch/xtensa/kernel/irq.c

@@ -92,7 +92,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
 		seq_printf(p, "  %s", action->name);
 
 		for (action=action->next; action; action = action->next)

+ 2 - 0
drivers/block/xen-blkfront.c

@@ -1125,6 +1125,8 @@ static void blkback_changed(struct xenbus_device *dev,
 	case XenbusStateInitialising:
 	case XenbusStateInitWait:
 	case XenbusStateInitialised:
+	case XenbusStateReconfiguring:
+	case XenbusStateReconfigured:
 	case XenbusStateUnknown:
 	case XenbusStateClosed:
 		break;

+ 2 - 0
drivers/input/xen-kbdfront.c

@@ -276,6 +276,8 @@ static void xenkbd_backend_changed(struct xenbus_device *dev,
 	switch (backend_state) {
 	case XenbusStateInitialising:
 	case XenbusStateInitialised:
+	case XenbusStateReconfiguring:
+	case XenbusStateReconfigured:
 	case XenbusStateUnknown:
 	case XenbusStateClosed:
 		break;

+ 3 - 3
drivers/isdn/act2000/act2000.h

@@ -141,9 +141,9 @@ typedef struct irq_data_isa {
 	__u8            rcvhdr[8];
 } irq_data_isa;
 
-typedef union irq_data {
+typedef union act2000_irq_data {
 	irq_data_isa isa;
-} irq_data;
+} act2000_irq_data;
 
 /*
  * Per card driver data
@@ -176,7 +176,7 @@ typedef struct act2000_card {
 	char   *status_buf_read;
 	char   *status_buf_write;
 	char   *status_buf_end;
-	irq_data idat;			/* Data used for IRQ handler        */
+	act2000_irq_data idat;		/* Data used for IRQ handler        */
 	isdn_if interface;		/* Interface to upper layer         */
 	char regname[35];		/* Name used for request_region     */
 } act2000_card;

+ 14 - 4
drivers/isdn/hisax/config.c

@@ -801,6 +801,16 @@ static void closecard(int cardnr)
 	ll_unload(csta);
 }
 
+static irqreturn_t card_irq(int intno, void *dev_id)
+{
+	struct IsdnCardState *cs = dev_id;
+	irqreturn_t ret = cs->irq_func(intno, cs);
+
+	if (ret == IRQ_HANDLED)
+		cs->irq_cnt++;
+	return ret;
+}
+
 static int init_card(struct IsdnCardState *cs)
 {
 	int 	irq_cnt, cnt = 3, ret;
@@ -809,10 +819,10 @@ static int init_card(struct IsdnCardState *cs)
 		ret = cs->cardmsg(cs, CARD_INIT, NULL);
 		return(ret);
 	}
-	irq_cnt = kstat_irqs(cs->irq);
+	irq_cnt = cs->irq_cnt = 0;
 	printk(KERN_INFO "%s: IRQ %d count %d\n", CardType[cs->typ],
 	       cs->irq, irq_cnt);
-	if (request_irq(cs->irq, cs->irq_func, cs->irq_flags, "HiSax", cs)) {
+	if (request_irq(cs->irq, card_irq, cs->irq_flags, "HiSax", cs)) {
 		printk(KERN_WARNING "HiSax: couldn't get interrupt %d\n",
 		       cs->irq);
 		return 1;
@@ -822,8 +832,8 @@ static int init_card(struct IsdnCardState *cs)
 		/* Timeout 10ms */
 		msleep(10);
 		printk(KERN_INFO "%s: IRQ %d count %d\n",
-		       CardType[cs->typ], cs->irq, kstat_irqs(cs->irq));
-		if (kstat_irqs(cs->irq) == irq_cnt) {
+		       CardType[cs->typ], cs->irq, cs->irq_cnt);
+		if (cs->irq_cnt == irq_cnt) {
 			printk(KERN_WARNING
 			       "%s: IRQ(%d) getting no interrupts during init %d\n",
 			       CardType[cs->typ], cs->irq, 4 - cnt);

+ 1 - 0
drivers/isdn/hisax/hisax.h

@@ -959,6 +959,7 @@ struct IsdnCardState {
 	u_long		event;
 	struct work_struct tqueue;
 	struct timer_list dbusytimer;
+	unsigned int	irq_cnt;
 #ifdef ERROR_STATISTIC
 	int		err_crc;
 	int		err_tx;

+ 2 - 2
drivers/mfd/twl4030-irq.c

@@ -78,7 +78,7 @@ struct sih {
 	u8	irq_lines;		/* number of supported irq lines */
 
 	/* SIR ignored -- set interrupt, for testing only */
-	struct irq_data {
+	struct sih_irq_data {
 		u8	isr_offset;
 		u8	imr_offset;
 	} mask[2];
@@ -810,7 +810,7 @@ int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
 	twl4030_irq_chip = dummy_irq_chip;
 	twl4030_irq_chip.name = "twl4030";
 
-	twl4030_sih_irq_chip.ack = dummy_irq_chip.ack;
+	twl4030_sih_irq_chip.irq_ack = dummy_irq_chip.irq_ack;
 
 	for (i = irq_base; i < irq_end; i++) {
 		set_irq_chip_and_handler(i, &twl4030_irq_chip,

+ 2 - 0
drivers/net/xen-netfront.c

@@ -1610,6 +1610,8 @@ static void backend_changed(struct xenbus_device *dev,
 	switch (backend_state) {
 	case XenbusStateInitialising:
 	case XenbusStateInitialised:
+	case XenbusStateReconfiguring:
+	case XenbusStateReconfigured:
 	case XenbusStateConnected:
 	case XenbusStateUnknown:
 	case XenbusStateClosed:

+ 21 - 0
drivers/pci/Kconfig

@@ -40,6 +40,27 @@ config PCI_STUB
 
 	  When in doubt, say N.
 
+config XEN_PCIDEV_FRONTEND
+        tristate "Xen PCI Frontend"
+        depends on PCI && X86 && XEN
+        select HOTPLUG
+        select PCI_XEN
+        default y
+        help
+          The PCI device frontend driver allows the kernel to import arbitrary
+          PCI devices from a PCI backend to support PCI driver domains.
+
+config XEN_PCIDEV_FE_DEBUG
+        bool "Xen PCI Frontend debugging"
+        depends on XEN_PCIDEV_FRONTEND && PCI_DEBUG
+	help
+	  Say Y here if you want the Xen PCI frontend to produce a bunch of debug
+	  messages to the system log.  Select this if you are having a
+	  problem with Xen PCI frontend support and want to see more of what is
+	  going on.
+
+	  When in doubt, say N.
+
 config HT_IRQ
 	bool "Interrupts on hypertransport devices"
 	default y

+ 2 - 0
drivers/pci/Makefile

@@ -65,6 +65,8 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
 
 obj-$(CONFIG_PCI_STUB) += pci-stub.o
 
+obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
+
 ifeq ($(CONFIG_PCI_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
 endif

+ 1 - 0
drivers/pci/bus.c

@@ -299,6 +299,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
 	}
 	up_read(&pci_bus_sem);
 }
+EXPORT_SYMBOL_GPL(pci_walk_bus);
 
 EXPORT_SYMBOL(pci_bus_alloc_resource);
 EXPORT_SYMBOL_GPL(pci_bus_add_device);

+ 4 - 4
drivers/pci/dmar.c

@@ -1221,9 +1221,9 @@ const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
 	}
 }
 
-void dmar_msi_unmask(unsigned int irq)
+void dmar_msi_unmask(struct irq_data *data)
 {
-	struct intel_iommu *iommu = get_irq_data(irq);
+	struct intel_iommu *iommu = irq_data_get_irq_data(data);
 	unsigned long flag;
 
 	/* unmask it */
@@ -1234,10 +1234,10 @@ void dmar_msi_unmask(unsigned int irq)
 	spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
-void dmar_msi_mask(unsigned int irq)
+void dmar_msi_mask(struct irq_data *data)
 {
 	unsigned long flag;
-	struct intel_iommu *iommu = get_irq_data(irq);
+	struct intel_iommu *iommu = irq_data_get_irq_data(data);
 
 	/* mask it */
 	spin_lock_irqsave(&iommu->register_lock, flag);

+ 8 - 14
drivers/pci/htirq.c

@@ -57,28 +57,22 @@ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
 	*msg = cfg->msg;
 }
 
-void mask_ht_irq(unsigned int irq)
+void mask_ht_irq(struct irq_data *data)
 {
-	struct ht_irq_cfg *cfg;
-	struct ht_irq_msg msg;
-
-	cfg = get_irq_data(irq);
+	struct ht_irq_cfg *cfg = irq_data_get_irq_data(data);
+	struct ht_irq_msg msg = cfg->msg;
 
-	msg = cfg->msg;
 	msg.address_lo |= 1;
-	write_ht_irq_msg(irq, &msg);
+	write_ht_irq_msg(data->irq, &msg);
 }
 
-void unmask_ht_irq(unsigned int irq)
+void unmask_ht_irq(struct irq_data *data)
 {
-	struct ht_irq_cfg *cfg;
-	struct ht_irq_msg msg;
-
-	cfg = get_irq_data(irq);
+	struct ht_irq_cfg *cfg = irq_data_get_irq_data(data);
+	struct ht_irq_msg msg = cfg->msg;
 
-	msg = cfg->msg;
 	msg.address_lo &= ~1;
-	write_ht_irq_msg(irq, &msg);
+	write_ht_irq_msg(data->irq, &msg);
 }
 
 /**

+ 27 - 185
drivers/pci/intr_remapping.c

@@ -46,109 +46,24 @@ static __init int setup_intremap(char *str)
 }
 early_param("intremap", setup_intremap);
 
-struct irq_2_iommu {
-	struct intel_iommu *iommu;
-	u16 irte_index;
-	u16 sub_handle;
-	u8  irte_mask;
-};
-
-#ifdef CONFIG_GENERIC_HARDIRQS
-static struct irq_2_iommu *get_one_free_irq_2_iommu(int node)
-{
-	struct irq_2_iommu *iommu;
-
-	iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
-	printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node);
-
-	return iommu;
-}
-
-static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	if (WARN_ON_ONCE(!desc))
-		return NULL;
-
-	return desc->irq_2_iommu;
-}
-
-static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
-{
-	struct irq_desc *desc;
-	struct irq_2_iommu *irq_iommu;
-
-	desc = irq_to_desc(irq);
-	if (!desc) {
-		printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-		return NULL;
-	}
-
-	irq_iommu = desc->irq_2_iommu;
-
-	if (!irq_iommu)
-		desc->irq_2_iommu = get_one_free_irq_2_iommu(irq_node(irq));
-
-	return desc->irq_2_iommu;
-}
-
-#else /* !CONFIG_SPARSE_IRQ */
-
-static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
-
-static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
-{
-	if (irq < nr_irqs)
-		return &irq_2_iommuX[irq];
-
-	return NULL;
-}
-static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
-{
-	return irq_2_iommu(irq);
-}
-#endif
-
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
-static struct irq_2_iommu *valid_irq_2_iommu(unsigned int irq)
-{
-	struct irq_2_iommu *irq_iommu;
-
-	irq_iommu = irq_2_iommu(irq);
-
-	if (!irq_iommu)
-		return NULL;
-
-	if (!irq_iommu->iommu)
-		return NULL;
-
-	return irq_iommu;
-}
-
-int irq_remapped(int irq)
+static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
-	return valid_irq_2_iommu(irq) != NULL;
+	struct irq_cfg *cfg = get_irq_chip_data(irq);
+	return cfg ? &cfg->irq_2_iommu : NULL;
 }
 
 int get_irte(int irq, struct irte *entry)
 {
-	int index;
-	struct irq_2_iommu *irq_iommu;
+	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 	unsigned long flags;
+	int index;
 
-	if (!entry)
+	if (!entry || !irq_iommu)
 		return -1;
 
 	spin_lock_irqsave(&irq_2_ir_lock, flags);
-	irq_iommu = valid_irq_2_iommu(irq);
-	if (!irq_iommu) {
-		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-		return -1;
-	}
 
 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	*entry = *(irq_iommu->iommu->ir_table->base + index);
@@ -160,20 +75,14 @@ int get_irte(int irq, struct irte *entry)
 int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 {
 	struct ir_table *table = iommu->ir_table;
-	struct irq_2_iommu *irq_iommu;
+	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 	u16 index, start_index;
 	unsigned int mask = 0;
 	unsigned long flags;
 	int i;
 
-	if (!count)
-		return -1;
-
-#ifndef CONFIG_SPARSE_IRQ
-	/* protect irq_2_iommu_alloc later */
-	if (irq >= nr_irqs)
+	if (!count || !irq_iommu)
 		return -1;
-#endif
 
 	/*
 	 * start the IRTE search from index 0.
@@ -214,13 +123,6 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	for (i = index; i < index + count; i++)
 		table->base[i].present = 1;
 
-	irq_iommu = irq_2_iommu_alloc(irq);
-	if (!irq_iommu) {
-		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-		printk(KERN_ERR "can't allocate irq_2_iommu\n");
-		return -1;
-	}
-
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index =  index;
 	irq_iommu->sub_handle = 0;
@@ -244,17 +146,14 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
 
 int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 {
-	int index;
-	struct irq_2_iommu *irq_iommu;
+	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 	unsigned long flags;
+	int index;
 
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
-	irq_iommu = valid_irq_2_iommu(irq);
-	if (!irq_iommu) {
-		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+	if (!irq_iommu)
 		return -1;
-	}
 
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 	*sub_handle = irq_iommu->sub_handle;
 	index = irq_iommu->irte_index;
 	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
@@ -263,18 +162,13 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 
 int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
-	struct irq_2_iommu *irq_iommu;
+	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 	unsigned long flags;
 
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
-
-	irq_iommu = irq_2_iommu_alloc(irq);
-
-	if (!irq_iommu) {
-		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-		printk(KERN_ERR "can't allocate irq_2_iommu\n");
+	if (!irq_iommu)
 		return -1;
-	}
+
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index = index;
@@ -286,43 +180,18 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 	return 0;
 }
 
-int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
-{
-	struct irq_2_iommu *irq_iommu;
-	unsigned long flags;
-
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
-	irq_iommu = valid_irq_2_iommu(irq);
-	if (!irq_iommu) {
-		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-		return -1;
-	}
-
-	irq_iommu->iommu = NULL;
-	irq_iommu->irte_index = 0;
-	irq_iommu->sub_handle = 0;
-	irq_2_iommu(irq)->irte_mask = 0;
-
-	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-
-	return 0;
-}
-
 int modify_irte(int irq, struct irte *irte_modified)
 {
-	int rc;
-	int index;
-	struct irte *irte;
+	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 	struct intel_iommu *iommu;
-	struct irq_2_iommu *irq_iommu;
 	unsigned long flags;
+	struct irte *irte;
+	int rc, index;
 
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
-	irq_iommu = valid_irq_2_iommu(irq);
-	if (!irq_iommu) {
-		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+	if (!irq_iommu)
 		return -1;
-	}
+
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 
 	iommu = irq_iommu->iommu;
 
@@ -339,31 +208,6 @@ int modify_irte(int irq, struct irte *irte_modified)
 	return rc;
 }
 
-int flush_irte(int irq)
-{
-	int rc;
-	int index;
-	struct intel_iommu *iommu;
-	struct irq_2_iommu *irq_iommu;
-	unsigned long flags;
-
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
-	irq_iommu = valid_irq_2_iommu(irq);
-	if (!irq_iommu) {
-		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-		return -1;
-	}
-
-	iommu = irq_iommu->iommu;
-
-	index = irq_iommu->irte_index + irq_iommu->sub_handle;
-
-	rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
-	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-
-	return rc;
-}
-
 struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
 {
 	int i;
@@ -420,16 +264,14 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
 
 int free_irte(int irq)
 {
-	int rc = 0;
-	struct irq_2_iommu *irq_iommu;
+	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 	unsigned long flags;
+	int rc;
 
-	spin_lock_irqsave(&irq_2_ir_lock, flags);
-	irq_iommu = valid_irq_2_iommu(irq);
-	if (!irq_iommu) {
-		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+	if (!irq_iommu)
 		return -1;
-	}
+
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 
 	rc = clear_entries(irq_iommu);
 

+ 28 - 24
drivers/pci/msi.c

@@ -35,7 +35,12 @@ int arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
 #endif
 
 #ifndef arch_setup_msi_irqs
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+# define arch_setup_msi_irqs default_setup_msi_irqs
+# define HAVE_DEFAULT_MSI_SETUP_IRQS
+#endif
+
+#ifdef HAVE_DEFAULT_MSI_SETUP_IRQS
+int default_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	struct msi_desc *entry;
 	int ret;
@@ -60,7 +65,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 #endif
 
 #ifndef arch_teardown_msi_irqs
-void arch_teardown_msi_irqs(struct pci_dev *dev)
+# define arch_teardown_msi_irqs default_teardown_msi_irqs
+# define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
+#endif
+
+#ifdef HAVE_DEFAULT_MSI_TEARDOWN_IRQS
+void default_teardown_msi_irqs(struct pci_dev *dev)
 {
 	struct msi_desc *entry;
 
@@ -170,33 +180,31 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag)
 	desc->masked = __msix_mask_irq(desc, flag);
 }
 
-static void msi_set_mask_bit(unsigned irq, u32 flag)
+static void msi_set_mask_bit(struct irq_data *data, u32 flag)
 {
-	struct msi_desc *desc = get_irq_msi(irq);
+	struct msi_desc *desc = irq_data_get_msi(data);
 
 	if (desc->msi_attrib.is_msix) {
 		msix_mask_irq(desc, flag);
 		readl(desc->mask_base);		/* Flush write to device */
 	} else {
-		unsigned offset = irq - desc->dev->irq;
+		unsigned offset = data->irq - desc->dev->irq;
 		msi_mask_irq(desc, 1 << offset, flag << offset);
 	}
 }
 
-void mask_msi_irq(unsigned int irq)
+void mask_msi_irq(struct irq_data *data)
 {
-	msi_set_mask_bit(irq, 1);
+	msi_set_mask_bit(data, 1);
 }
 
-void unmask_msi_irq(unsigned int irq)
+void unmask_msi_irq(struct irq_data *data)
 {
-	msi_set_mask_bit(irq, 0);
+	msi_set_mask_bit(data, 0);
 }
 
-void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_desc_msi(desc);
-
 	BUG_ON(entry->dev->current_state != PCI_D0);
 
 	if (entry->msi_attrib.is_msix) {
@@ -227,15 +235,13 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 
 void read_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct msi_desc *entry = get_irq_msi(irq);
 
-	read_msi_msg_desc(desc, msg);
+	__read_msi_msg(entry, msg);
 }
 
-void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_desc_msi(desc);
-
 	/* Assert that the cache is valid, assuming that
 	 * valid messages are not all-zeroes. */
 	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
@@ -246,15 +252,13 @@ void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 
 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct msi_desc *entry = get_irq_msi(irq);
 
-	get_cached_msi_msg_desc(desc, msg);
+	__get_cached_msi_msg(entry, msg);
 }
 
-void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_desc_msi(desc);
-
 	if (entry->dev->current_state != PCI_D0) {
 		/* Don't touch the hardware now */
 	} else if (entry->msi_attrib.is_msix) {
@@ -292,9 +296,9 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 
 void write_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct msi_desc *entry = get_irq_msi(irq);
 
-	write_msi_msg_desc(desc, msg);
+	__write_msi_msg(entry, msg);
 }
 
 static void free_msi_irqs(struct pci_dev *dev)

+ 1148 - 0
drivers/pci/xen-pcifront.c

@@ -0,0 +1,1148 @@
+/*
+ * Xen PCI Frontend.
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/page.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <xen/xenbus.h>
+#include <xen/interface/io/pciif.h>
+#include <asm/xen/pci.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <linux/time.h>
+
+#define INVALID_GRANT_REF (0)
+#define INVALID_EVTCHN    (-1)
+
+struct pci_bus_entry {
+	struct list_head list;
+	struct pci_bus *bus;
+};
+
+#define _PDEVB_op_active		(0)
+#define PDEVB_op_active			(1 << (_PDEVB_op_active))
+
+struct pcifront_device {
+	struct xenbus_device *xdev;
+	struct list_head root_buses;
+
+	int evtchn;
+	int gnt_ref;
+
+	int irq;
+
+	/* Lock this when doing any operations in sh_info */
+	spinlock_t sh_info_lock;
+	struct xen_pci_sharedinfo *sh_info;
+	struct work_struct op_work;
+	unsigned long flags;
+
+};
+
+struct pcifront_sd {
+	int domain;
+	struct pcifront_device *pdev;
+};
+
+static inline struct pcifront_device *
+pcifront_get_pdev(struct pcifront_sd *sd)
+{
+	return sd->pdev;
+}
+
+static inline void pcifront_init_sd(struct pcifront_sd *sd,
+				    unsigned int domain, unsigned int bus,
+				    struct pcifront_device *pdev)
+{
+	sd->domain = domain;
+	sd->pdev = pdev;
+}
+
+static DEFINE_SPINLOCK(pcifront_dev_lock);
+static struct pcifront_device *pcifront_dev;
+
+static int verbose_request;
+module_param(verbose_request, int, 0644);
+
+static int errno_to_pcibios_err(int errno)
+{
+	switch (errno) {
+	case XEN_PCI_ERR_success:
+		return PCIBIOS_SUCCESSFUL;
+
+	case XEN_PCI_ERR_dev_not_found:
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	case XEN_PCI_ERR_invalid_offset:
+	case XEN_PCI_ERR_op_failed:
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	case XEN_PCI_ERR_not_implemented:
+		return PCIBIOS_FUNC_NOT_SUPPORTED;
+
+	case XEN_PCI_ERR_access_denied:
+		return PCIBIOS_SET_FAILED;
+	}
+	return errno;
+}
+
+static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
+{
+	if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
+		&& !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
+		dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
+		schedule_work(&pdev->op_work);
+	}
+}
+
+static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
+{
+	int err = 0;
+	struct xen_pci_op *active_op = &pdev->sh_info->op;
+	unsigned long irq_flags;
+	evtchn_port_t port = pdev->evtchn;
+	unsigned irq = pdev->irq;
+	s64 ns, ns_timeout;
+	struct timeval tv;
+
+	spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
+
+	memcpy(active_op, op, sizeof(struct xen_pci_op));
+
+	/* Go */
+	wmb();
+	set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+	notify_remote_via_evtchn(port);
+
+	/*
+	 * We set a poll timeout of 3 seconds but give up on return after
+	 * 2 seconds. It is better to time out too late rather than too early
+	 * (in the latter case we end up continually re-executing poll() with a
+	 * timeout in the past). 1s difference gives plenty of slack for error.
+	 */
+	do_gettimeofday(&tv);
+	ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
+
+	xen_clear_irq_pending(irq);
+
+	while (test_bit(_XEN_PCIF_active,
+			(unsigned long *)&pdev->sh_info->flags)) {
+		xen_poll_irq_timeout(irq, jiffies + 3*HZ);
+		xen_clear_irq_pending(irq);
+		do_gettimeofday(&tv);
+		ns = timeval_to_ns(&tv);
+		if (ns > ns_timeout) {
+			dev_err(&pdev->xdev->dev,
+				"pciback not responding!!!\n");
+			clear_bit(_XEN_PCIF_active,
+				  (unsigned long *)&pdev->sh_info->flags);
+			err = XEN_PCI_ERR_dev_not_found;
+			goto out;
+		}
+	}
+
+	/*
+	* We might lose backend service request since we
+	* reuse same evtchn with pci_conf backend response. So re-schedule
+	* aer pcifront service.
+	*/
+	if (test_bit(_XEN_PCIB_active,
+			(unsigned long *)&pdev->sh_info->flags)) {
+		dev_err(&pdev->xdev->dev,
+			"schedule aer pcifront service\n");
+		schedule_pcifront_aer_op(pdev);
+	}
+
+	memcpy(op, active_op, sizeof(struct xen_pci_op));
+
+	err = op->err;
+out:
+	spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
+	return err;
+}
+
+/* Access to this function is spinlocked in drivers/pci/access.c */
+static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
+			     int where, int size, u32 *val)
+{
+	int err = 0;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_conf_read,
+		.domain = pci_domain_nr(bus),
+		.bus    = bus->number,
+		.devfn  = devfn,
+		.offset = where,
+		.size   = size,
+	};
+	struct pcifront_sd *sd = bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	if (verbose_request)
+		dev_info(&pdev->xdev->dev,
+			 "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
+			 pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
+			 PCI_FUNC(devfn), where, size);
+
+	err = do_pci_op(pdev, &op);
+
+	if (likely(!err)) {
+		if (verbose_request)
+			dev_info(&pdev->xdev->dev, "read got back value %x\n",
+				 op.value);
+
+		*val = op.value;
+	} else if (err == -ENODEV) {
+		/* No device here, pretend that it just returned 0 */
+		err = 0;
+		*val = 0;
+	}
+
+	return errno_to_pcibios_err(err);
+}
+
+/* Access to this function is spinlocked in drivers/pci/access.c */
+static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
+			      int where, int size, u32 val)
+{
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_conf_write,
+		.domain = pci_domain_nr(bus),
+		.bus    = bus->number,
+		.devfn  = devfn,
+		.offset = where,
+		.size   = size,
+		.value  = val,
+	};
+	struct pcifront_sd *sd = bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	if (verbose_request)
+		dev_info(&pdev->xdev->dev,
+			 "write dev=%04x:%02x:%02x.%01x - "
+			 "offset %x size %d val %x\n",
+			 pci_domain_nr(bus), bus->number,
+			 PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
+
+	return errno_to_pcibios_err(do_pci_op(pdev, &op));
+}
+
+struct pci_ops pcifront_bus_ops = {
+	.read = pcifront_bus_read,
+	.write = pcifront_bus_write,
+};
+
+#ifdef CONFIG_PCI_MSI
+static int pci_frontend_enable_msix(struct pci_dev *dev,
+				    int **vector, int nvec)
+{
+	int err;
+	int i;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_enable_msix,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+		.value = nvec,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+	struct msi_desc *entry;
+
+	if (nvec > SH_INFO_MAX_VEC) {
+		dev_err(&dev->dev, "too much vector for pci frontend: %x."
+				   " Increase SH_INFO_MAX_VEC.\n", nvec);
+		return -EINVAL;
+	}
+
+	i = 0;
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		op.msix_entries[i].entry = entry->msi_attrib.entry_nr;
+		/* Vector is useless at this point. */
+		op.msix_entries[i].vector = -1;
+		i++;
+	}
+
+	err = do_pci_op(pdev, &op);
+
+	if (likely(!err)) {
+		if (likely(!op.value)) {
+			/* we get the result */
+			for (i = 0; i < nvec; i++)
+				*(*vector+i) = op.msix_entries[i].vector;
+			return 0;
+		} else {
+			printk(KERN_DEBUG "enable msix get value %x\n",
+				op.value);
+			return op.value;
+		}
+	} else {
+		dev_err(&dev->dev, "enable msix get err %x\n", err);
+		return err;
+	}
+}
+
+static void pci_frontend_disable_msix(struct pci_dev *dev)
+{
+	int err;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_disable_msix,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	err = do_pci_op(pdev, &op);
+
+	/* What should do for error ? */
+	if (err)
+		dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
+}
+
+static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
+{
+	int err;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_enable_msi,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	err = do_pci_op(pdev, &op);
+	if (likely(!err)) {
+		*(*vector) = op.value;
+	} else {
+		dev_err(&dev->dev, "pci frontend enable msi failed for dev "
+				    "%x:%x\n", op.bus, op.devfn);
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static void pci_frontend_disable_msi(struct pci_dev *dev)
+{
+	int err;
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_disable_msi,
+		.domain = pci_domain_nr(dev->bus),
+		.bus = dev->bus->number,
+		.devfn = dev->devfn,
+	};
+	struct pcifront_sd *sd = dev->bus->sysdata;
+	struct pcifront_device *pdev = pcifront_get_pdev(sd);
+
+	err = do_pci_op(pdev, &op);
+	if (err == XEN_PCI_ERR_dev_not_found) {
+		/* XXX No response from backend, what shall we do? */
+		printk(KERN_DEBUG "get no response from backend for disable MSI\n");
+		return;
+	}
+	if (err)
+		/* how can pciback notify us fail? */
+		printk(KERN_DEBUG "get fake response frombackend\n");
+}
+
+static struct xen_pci_frontend_ops pci_frontend_ops = {
+	.enable_msi = pci_frontend_enable_msi,
+	.disable_msi = pci_frontend_disable_msi,
+	.enable_msix = pci_frontend_enable_msix,
+	.disable_msix = pci_frontend_disable_msix,
+};
+
+static void pci_frontend_registrar(int enable)
+{
+	if (enable)
+		xen_pci_frontend = &pci_frontend_ops;
+	else
+		xen_pci_frontend = NULL;
+};
+#else
+static inline void pci_frontend_registrar(int enable) { };
+#endif /* CONFIG_PCI_MSI */
+
+/* Claim resources for the PCI frontend as-is, backend won't allow changes */
+static int pcifront_claim_resource(struct pci_dev *dev, void *data)
+{
+	struct pcifront_device *pdev = data;
+	int i;
+	struct resource *r;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		r = &dev->resource[i];
+
+		if (!r->parent && r->start && r->flags) {
+			dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
+				pci_name(dev), i);
+			if (pci_claim_resource(dev, i)) {
+				dev_err(&pdev->xdev->dev, "Could not claim "
+					"resource %s/%d! Device offline. Try "
+					"giving less than 4GB to domain.\n",
+					pci_name(dev), i);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
+				unsigned int domain, unsigned int bus,
+				struct pci_bus *b)
+{
+	struct pci_dev *d;
+	unsigned int devfn;
+
+	/* Scan the bus for functions and add.
+	 * We omit handling of PCI bridge attachment because pciback prevents
+	 * bridges from being exported.
+	 */
+	for (devfn = 0; devfn < 0x100; devfn++) {
+		d = pci_get_slot(b, devfn);
+		if (d) {
+			/* Device is already known. */
+			pci_dev_put(d);
+			continue;
+		}
+
+		d = pci_scan_single_device(b, devfn);
+		if (d)
+			dev_info(&pdev->xdev->dev, "New device on "
+				 "%04x:%02x:%02x.%02x found.\n", domain, bus,
+				 PCI_SLOT(devfn), PCI_FUNC(devfn));
+	}
+
+	return 0;
+}
+
+static int __devinit pcifront_scan_root(struct pcifront_device *pdev,
+				 unsigned int domain, unsigned int bus)
+{
+	struct pci_bus *b;
+	struct pcifront_sd *sd = NULL;
+	struct pci_bus_entry *bus_entry = NULL;
+	int err = 0;
+
+#ifndef CONFIG_PCI_DOMAINS
+	if (domain != 0) {
+		dev_err(&pdev->xdev->dev,
+			"PCI Root in non-zero PCI Domain! domain=%d\n", domain);
+		dev_err(&pdev->xdev->dev,
+			"Please compile with CONFIG_PCI_DOMAINS\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+#endif
+
+	dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
+		 domain, bus);
+
+	bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
+	sd = kmalloc(sizeof(*sd), GFP_KERNEL);
+	if (!bus_entry || !sd) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	pcifront_init_sd(sd, domain, bus, pdev);
+
+	b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
+				  &pcifront_bus_ops, sd);
+	if (!b) {
+		dev_err(&pdev->xdev->dev,
+			"Error creating PCI Frontend Bus!\n");
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	bus_entry->bus = b;
+
+	list_add(&bus_entry->list, &pdev->root_buses);
+
+	/* pci_scan_bus_parented skips devices which do not have a have
+	* devfn==0. The pcifront_scan_bus enumerates all devfn. */
+	err = pcifront_scan_bus(pdev, domain, bus, b);
+
+	/* Claim resources before going "live" with our devices */
+	pci_walk_bus(b, pcifront_claim_resource, pdev);
+
+	/* Create SysFS and notify udev of the devices. Aka: "going live" */
+	pci_bus_add_devices(b);
+
+	return err;
+
+err_out:
+	kfree(bus_entry);
+	kfree(sd);
+
+	return err;
+}
+
+static int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
+				   unsigned int domain, unsigned int bus)
+{
+	int err;
+	struct pci_bus *b;
+
+#ifndef CONFIG_PCI_DOMAINS
+	if (domain != 0) {
+		dev_err(&pdev->xdev->dev,
+			"PCI Root in non-zero PCI Domain! domain=%d\n", domain);
+		dev_err(&pdev->xdev->dev,
+			"Please compile with CONFIG_PCI_DOMAINS\n");
+		return -EINVAL;
+	}
+#endif
+
+	dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
+		 domain, bus);
+
+	b = pci_find_bus(domain, bus);
+	if (!b)
+		/* If the bus is unknown, create it. */
+		return pcifront_scan_root(pdev, domain, bus);
+
+	err = pcifront_scan_bus(pdev, domain, bus, b);
+
+	/* Claim resources before going "live" with our devices */
+	pci_walk_bus(b, pcifront_claim_resource, pdev);
+
+	/* Create SysFS and notify udev of the devices. Aka: "going live" */
+	pci_bus_add_devices(b);
+
+	return err;
+}
+
+static void free_root_bus_devs(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	while (!list_empty(&bus->devices)) {
+		dev = container_of(bus->devices.next, struct pci_dev,
+				   bus_list);
+		dev_dbg(&dev->dev, "removing device\n");
+		pci_remove_bus_device(dev);
+	}
+}
+
+static void pcifront_free_roots(struct pcifront_device *pdev)
+{
+	struct pci_bus_entry *bus_entry, *t;
+
+	dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
+
+	list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
+		list_del(&bus_entry->list);
+
+		free_root_bus_devs(bus_entry->bus);
+
+		kfree(bus_entry->bus->sysdata);
+
+		device_unregister(bus_entry->bus->bridge);
+		pci_remove_bus(bus_entry->bus);
+
+		kfree(bus_entry);
+	}
+}
+
+static pci_ers_result_t pcifront_common_process(int cmd,
+						struct pcifront_device *pdev,
+						pci_channel_state_t state)
+{
+	pci_ers_result_t result;
+	struct pci_driver *pdrv;
+	int bus = pdev->sh_info->aer_op.bus;
+	int devfn = pdev->sh_info->aer_op.devfn;
+	struct pci_dev *pcidev;
+	int flag = 0;
+
+	dev_dbg(&pdev->xdev->dev,
+		"pcifront AER process: cmd %x (bus:%x, devfn%x)",
+		cmd, bus, devfn);
+	result = PCI_ERS_RESULT_NONE;
+
+	pcidev = pci_get_bus_and_slot(bus, devfn);
+	if (!pcidev || !pcidev->driver) {
+		dev_err(&pcidev->dev,
+			"device or driver is NULL\n");
+		return result;
+	}
+	pdrv = pcidev->driver;
+
+	if (get_driver(&pdrv->driver)) {
+		if (pdrv->err_handler && pdrv->err_handler->error_detected) {
+			dev_dbg(&pcidev->dev,
+				"trying to call AER service\n");
+			if (pcidev) {
+				flag = 1;
+				switch (cmd) {
+				case XEN_PCI_OP_aer_detected:
+					result = pdrv->err_handler->
+						 error_detected(pcidev, state);
+					break;
+				case XEN_PCI_OP_aer_mmio:
+					result = pdrv->err_handler->
+						 mmio_enabled(pcidev);
+					break;
+				case XEN_PCI_OP_aer_slotreset:
+					result = pdrv->err_handler->
+						 slot_reset(pcidev);
+					break;
+				case XEN_PCI_OP_aer_resume:
+					pdrv->err_handler->resume(pcidev);
+					break;
+				default:
+					dev_err(&pdev->xdev->dev,
+						"bad request in aer recovery "
+						"operation!\n");
+
+				}
+			}
+		}
+		put_driver(&pdrv->driver);
+	}
+	if (!flag)
+		result = PCI_ERS_RESULT_NONE;
+
+	return result;
+}
+
+
+static void pcifront_do_aer(struct work_struct *data)
+{
+	struct pcifront_device *pdev =
+		container_of(data, struct pcifront_device, op_work);
+	int cmd = pdev->sh_info->aer_op.cmd;
+	pci_channel_state_t state =
+		(pci_channel_state_t)pdev->sh_info->aer_op.err;
+
+	/*If a pci_conf op is in progress,
+		we have to wait until it is done before service aer op*/
+	dev_dbg(&pdev->xdev->dev,
+		"pcifront service aer bus %x devfn %x\n",
+		pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
+
+	pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state);
+
+	/* Post the operation to the guest. */
+	wmb();
+	clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags);
+	notify_remote_via_evtchn(pdev->evtchn);
+
+	/*in case of we lost an aer request in four lines time_window*/
+	smp_mb__before_clear_bit();
+	clear_bit(_PDEVB_op_active, &pdev->flags);
+	smp_mb__after_clear_bit();
+
+	schedule_pcifront_aer_op(pdev);
+
+}
+
+static irqreturn_t pcifront_handler_aer(int irq, void *dev)
+{
+	struct pcifront_device *pdev = dev;
+	schedule_pcifront_aer_op(pdev);
+	return IRQ_HANDLED;
+}
+static int pcifront_connect(struct pcifront_device *pdev)
+{
+	int err = 0;
+
+	spin_lock(&pcifront_dev_lock);
+
+	if (!pcifront_dev) {
+		dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
+		pcifront_dev = pdev;
+	} else {
+		dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
+		err = -EEXIST;
+	}
+
+	spin_unlock(&pcifront_dev_lock);
+
+	return err;
+}
+
+static void pcifront_disconnect(struct pcifront_device *pdev)
+{
+	spin_lock(&pcifront_dev_lock);
+
+	if (pdev == pcifront_dev) {
+		dev_info(&pdev->xdev->dev,
+			 "Disconnecting PCI Frontend Buses\n");
+		pcifront_dev = NULL;
+	}
+
+	spin_unlock(&pcifront_dev_lock);
+}
+static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
+{
+	struct pcifront_device *pdev;
+
+	pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
+	if (pdev == NULL)
+		goto out;
+
+	pdev->sh_info =
+	    (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
+	if (pdev->sh_info == NULL) {
+		kfree(pdev);
+		pdev = NULL;
+		goto out;
+	}
+	pdev->sh_info->flags = 0;
+
+	/*Flag for registering PV AER handler*/
+	set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags);
+
+	dev_set_drvdata(&xdev->dev, pdev);
+	pdev->xdev = xdev;
+
+	INIT_LIST_HEAD(&pdev->root_buses);
+
+	spin_lock_init(&pdev->sh_info_lock);
+
+	pdev->evtchn = INVALID_EVTCHN;
+	pdev->gnt_ref = INVALID_GRANT_REF;
+	pdev->irq = -1;
+
+	INIT_WORK(&pdev->op_work, pcifront_do_aer);
+
+	dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
+		pdev, pdev->sh_info);
+out:
+	return pdev;
+}
+
+static void free_pdev(struct pcifront_device *pdev)
+{
+	dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
+
+	pcifront_free_roots(pdev);
+
+	/*For PCIE_AER error handling job*/
+	flush_scheduled_work();
+
+	if (pdev->irq >= 0)
+		unbind_from_irqhandler(pdev->irq, pdev);
+
+	if (pdev->evtchn != INVALID_EVTCHN)
+		xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
+
+	if (pdev->gnt_ref != INVALID_GRANT_REF)
+		gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */,
+					  (unsigned long)pdev->sh_info);
+	else
+		free_page((unsigned long)pdev->sh_info);
+
+	dev_set_drvdata(&pdev->xdev->dev, NULL);
+
+	kfree(pdev);
+}
+
+static int pcifront_publish_info(struct pcifront_device *pdev)
+{
+	int err = 0;
+	struct xenbus_transaction trans;
+
+	err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+	if (err < 0)
+		goto out;
+
+	pdev->gnt_ref = err;
+
+	err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
+	if (err)
+		goto out;
+
+	err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer,
+		0, "pcifront", pdev);
+
+	if (err < 0)
+		return err;
+
+	pdev->irq = err;
+
+do_publish:
+	err = xenbus_transaction_start(&trans);
+	if (err) {
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error writing configuration for backend "
+				 "(start transaction)");
+		goto out;
+	}
+
+	err = xenbus_printf(trans, pdev->xdev->nodename,
+			    "pci-op-ref", "%u", pdev->gnt_ref);
+	if (!err)
+		err = xenbus_printf(trans, pdev->xdev->nodename,
+				    "event-channel", "%u", pdev->evtchn);
+	if (!err)
+		err = xenbus_printf(trans, pdev->xdev->nodename,
+				    "magic", XEN_PCI_MAGIC);
+
+	if (err) {
+		xenbus_transaction_end(trans, 1);
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error writing configuration for backend");
+		goto out;
+	} else {
+		err = xenbus_transaction_end(trans, 0);
+		if (err == -EAGAIN)
+			goto do_publish;
+		else if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error completing transaction "
+					 "for backend");
+			goto out;
+		}
+	}
+
+	xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
+
+	dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
+
+out:
+	return err;
+}
+
+static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
+{
+	int err = -EFAULT;
+	int i, num_roots, len;
+	char str[64];
+	unsigned int domain, bus;
+
+
+	/* Only connect once */
+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+	    XenbusStateInitialised)
+		goto out;
+
+	err = pcifront_connect(pdev);
+	if (err) {
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error connecting PCI Frontend");
+		goto out;
+	}
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
+			   "root_num", "%d", &num_roots);
+	if (err == -ENOENT) {
+		xenbus_dev_error(pdev->xdev, err,
+				 "No PCI Roots found, trying 0000:00");
+		err = pcifront_scan_root(pdev, 0, 0);
+		num_roots = 0;
+	} else if (err != 1) {
+		if (err == 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error reading number of PCI roots");
+		goto out;
+	}
+
+	for (i = 0; i < num_roots; i++) {
+		len = snprintf(str, sizeof(str), "root-%d", i);
+		if (unlikely(len >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
+				   "%x:%x", &domain, &bus);
+		if (err != 2) {
+			if (err >= 0)
+				err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error reading PCI root %d", i);
+			goto out;
+		}
+
+		err = pcifront_scan_root(pdev, domain, bus);
+		if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error scanning PCI root %04x:%02x",
+					 domain, bus);
+			goto out;
+		}
+	}
+
+	err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
+
+out:
+	return err;
+}
+
+static int pcifront_try_disconnect(struct pcifront_device *pdev)
+{
+	int err = 0;
+	enum xenbus_state prev_state;
+
+
+	prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
+
+	if (prev_state >= XenbusStateClosing)
+		goto out;
+
+	if (prev_state == XenbusStateConnected) {
+		pcifront_free_roots(pdev);
+		pcifront_disconnect(pdev);
+	}
+
+	err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
+
+out:
+
+	return err;
+}
+
+static int __devinit pcifront_attach_devices(struct pcifront_device *pdev)
+{
+	int err = -EFAULT;
+	int i, num_roots, len;
+	unsigned int domain, bus;
+	char str[64];
+
+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+	    XenbusStateReconfiguring)
+		goto out;
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
+			   "root_num", "%d", &num_roots);
+	if (err == -ENOENT) {
+		xenbus_dev_error(pdev->xdev, err,
+				 "No PCI Roots found, trying 0000:00");
+		err = pcifront_rescan_root(pdev, 0, 0);
+		num_roots = 0;
+	} else if (err != 1) {
+		if (err == 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error reading number of PCI roots");
+		goto out;
+	}
+
+	for (i = 0; i < num_roots; i++) {
+		len = snprintf(str, sizeof(str), "root-%d", i);
+		if (unlikely(len >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
+				   "%x:%x", &domain, &bus);
+		if (err != 2) {
+			if (err >= 0)
+				err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error reading PCI root %d", i);
+			goto out;
+		}
+
+		err = pcifront_rescan_root(pdev, domain, bus);
+		if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error scanning PCI root %04x:%02x",
+					 domain, bus);
+			goto out;
+		}
+	}
+
+	xenbus_switch_state(pdev->xdev, XenbusStateConnected);
+
+out:
+	return err;
+}
+
+static int pcifront_detach_devices(struct pcifront_device *pdev)
+{
+	int err = 0;
+	int i, num_devs;
+	unsigned int domain, bus, slot, func;
+	struct pci_bus *pci_bus;
+	struct pci_dev *pci_dev;
+	char str[64];
+
+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+	    XenbusStateConnected)
+		goto out;
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
+			   &num_devs);
+	if (err != 1) {
+		if (err >= 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error reading number of PCI devices");
+		goto out;
+	}
+
+	/* Find devices being detached and remove them. */
+	for (i = 0; i < num_devs; i++) {
+		int l, state;
+		l = snprintf(str, sizeof(str), "state-%d", i);
+		if (unlikely(l >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d",
+				   &state);
+		if (err != 1)
+			state = XenbusStateUnknown;
+
+		if (state != XenbusStateClosing)
+			continue;
+
+		/* Remove device. */
+		l = snprintf(str, sizeof(str), "vdev-%d", i);
+		if (unlikely(l >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
+				   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
+		if (err != 4) {
+			if (err >= 0)
+				err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error reading PCI device %d", i);
+			goto out;
+		}
+
+		pci_bus = pci_find_bus(domain, bus);
+		if (!pci_bus) {
+			dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n",
+				domain, bus);
+			continue;
+		}
+		pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
+		if (!pci_dev) {
+			dev_dbg(&pdev->xdev->dev,
+				"Cannot get PCI device %04x:%02x:%02x.%02x\n",
+				domain, bus, slot, func);
+			continue;
+		}
+		pci_remove_bus_device(pci_dev);
+		pci_dev_put(pci_dev);
+
+		dev_dbg(&pdev->xdev->dev,
+			"PCI device %04x:%02x:%02x.%02x removed.\n",
+			domain, bus, slot, func);
+	}
+
+	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
+
+out:
+	return err;
+}
+
+static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
+						  enum xenbus_state be_state)
+{
+	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
+
+	switch (be_state) {
+	case XenbusStateUnknown:
+	case XenbusStateInitialising:
+	case XenbusStateInitWait:
+	case XenbusStateInitialised:
+	case XenbusStateClosed:
+		break;
+
+	case XenbusStateConnected:
+		pcifront_try_connect(pdev);
+		break;
+
+	case XenbusStateClosing:
+		dev_warn(&xdev->dev, "backend going away!\n");
+		pcifront_try_disconnect(pdev);
+		break;
+
+	case XenbusStateReconfiguring:
+		pcifront_detach_devices(pdev);
+		break;
+
+	case XenbusStateReconfigured:
+		pcifront_attach_devices(pdev);
+		break;
+	}
+}
+
+static int pcifront_xenbus_probe(struct xenbus_device *xdev,
+				 const struct xenbus_device_id *id)
+{
+	int err = 0;
+	struct pcifront_device *pdev = alloc_pdev(xdev);
+
+	if (pdev == NULL) {
+		err = -ENOMEM;
+		xenbus_dev_fatal(xdev, err,
+				 "Error allocating pcifront_device struct");
+		goto out;
+	}
+
+	err = pcifront_publish_info(pdev);
+	if (err)
+		free_pdev(pdev);
+
+out:
+	return err;
+}
+
+static int pcifront_xenbus_remove(struct xenbus_device *xdev)
+{
+	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
+	if (pdev)
+		free_pdev(pdev);
+
+	return 0;
+}
+
+static const struct xenbus_device_id xenpci_ids[] = {
+	{"pci"},
+	{""},
+};
+
+static struct xenbus_driver xenbus_pcifront_driver = {
+	.name			= "pcifront",
+	.owner			= THIS_MODULE,
+	.ids			= xenpci_ids,
+	.probe			= pcifront_xenbus_probe,
+	.remove			= pcifront_xenbus_remove,
+	.otherend_changed	= pcifront_backend_changed,
+};
+
+static int __init pcifront_init(void)
+{
+	if (!xen_pv_domain() || xen_initial_domain())
+		return -ENODEV;
+
+	pci_frontend_registrar(1 /* enable */);
+
+	return xenbus_register_frontend(&xenbus_pcifront_driver);
+}
+
+static void __exit pcifront_cleanup(void)
+{
+	xenbus_unregister_driver(&xenbus_pcifront_driver);
+	pci_frontend_registrar(0 /* disable */);
+}
+module_init(pcifront_init);
+module_exit(pcifront_cleanup);
+
+MODULE_DESCRIPTION("Xen PCI passthrough frontend.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:pci");

+ 2 - 0
drivers/video/xen-fbfront.c

@@ -631,6 +631,8 @@ static void xenfb_backend_changed(struct xenbus_device *dev,
 	switch (backend_state) {
 	case XenbusStateInitialising:
 	case XenbusStateInitialised:
+	case XenbusStateReconfiguring:
+	case XenbusStateReconfigured:
 	case XenbusStateUnknown:
 	case XenbusStateClosed:
 		break;

+ 2 - 1
drivers/xen/Kconfig

@@ -74,6 +74,7 @@ config XEN_PLATFORM_PCI
 
 config SWIOTLB_XEN
 	def_bool y
-	depends on SWIOTLB
+	depends on PCI
+	select SWIOTLB
 
 endmenu

+ 1 - 0
drivers/xen/Makefile

@@ -4,6 +4,7 @@ obj-y	+= xenbus/
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_features.o			:= $(nostackp)
 
+obj-$(CONFIG_BLOCK)		+= biomerge.o
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o

+ 13 - 0
drivers/xen/biomerge.c

@@ -0,0 +1,13 @@
+#include <linux/bio.h>
+#include <linux/io.h>
+#include <xen/page.h>
+
+bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
+			       const struct bio_vec *vec2)
+{
+	unsigned long mfn1 = pfn_to_mfn(page_to_pfn(vec1->bv_page));
+	unsigned long mfn2 = pfn_to_mfn(page_to_pfn(vec2->bv_page));
+
+	return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&
+		((mfn1 == mfn2) || ((mfn1+1) == mfn2));
+}

+ 338 - 27
drivers/xen/events.c

@@ -16,7 +16,7 @@
  *    (typically dom0).
  * 2. VIRQs, typically used for timers.  These are per-cpu events.
  * 3. IPIs.
- * 4. Hardware interrupts. Not supported at present.
+ * 4. PIRQs - Hardware interrupts.
  *
  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  */
@@ -28,11 +28,13 @@
 #include <linux/string.h>
 #include <linux/bootmem.h>
 #include <linux/slab.h>
+#include <linux/irqnr.h>
 
 #include <asm/desc.h>
 #include <asm/ptrace.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
+#include <asm/io_apic.h>
 #include <asm/sync_bitops.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -89,20 +91,26 @@ struct irq_info
 		enum ipi_vector ipi;
 		struct {
 			unsigned short gsi;
-			unsigned short vector;
+			unsigned char vector;
+			unsigned char flags;
 		} pirq;
 	} u;
 };
+#define PIRQ_NEEDS_EOI	(1 << 0)
+#define PIRQ_SHAREABLE	(1 << 1)
 
-static struct irq_info irq_info[NR_IRQS];
+static struct irq_info *irq_info;
 
-static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
-	[0 ... NR_EVENT_CHANNELS-1] = -1
-};
+static int *evtchn_to_irq;
 struct cpu_evtchn_s {
 	unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG];
 };
-static struct cpu_evtchn_s *cpu_evtchn_mask_p;
+
+static __initdata struct cpu_evtchn_s init_evtchn_mask = {
+	.bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
+};
+static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
+
 static inline unsigned long *cpu_evtchn_mask(int cpu)
 {
 	return cpu_evtchn_mask_p[cpu].bits;
@@ -113,6 +121,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
 
 static struct irq_chip xen_dynamic_chip;
 static struct irq_chip xen_percpu_chip;
+static struct irq_chip xen_pirq_chip;
 
 /* Constructor for packed IRQ information. */
 static struct irq_info mk_unbound_info(void)
@@ -225,6 +234,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
 	return ret;
 }
 
+static bool pirq_needs_eoi(unsigned irq)
+{
+	struct irq_info *info = info_for_irq(irq);
+
+	BUG_ON(info->type != IRQT_PIRQ);
+
+	return info->u.pirq.flags & PIRQ_NEEDS_EOI;
+}
+
 static inline unsigned long active_evtchns(unsigned int cpu,
 					   struct shared_info *sh,
 					   unsigned int idx)
@@ -336,36 +354,297 @@ static void unmask_evtchn(int port)
 	put_cpu();
 }
 
+static int get_nr_hw_irqs(void)
+{
+	int ret = 1;
+
+#ifdef CONFIG_X86_IO_APIC
+	ret = get_nr_irqs_gsi();
+#endif
+
+	return ret;
+}
+
 static int find_unbound_irq(void)
 {
-	int irq;
-	struct irq_desc *desc;
+	struct irq_data *data;
+	int irq, res;
+	int start = get_nr_hw_irqs();
 
-	for (irq = 0; irq < nr_irqs; irq++) {
-		desc = irq_to_desc(irq);
+	if (start == nr_irqs)
+		goto no_irqs;
+
+	/* nr_irqs is a magic value. Must not use it.*/
+	for (irq = nr_irqs-1; irq > start; irq--) {
+		data = irq_get_irq_data(irq);
 		/* only 0->15 have init'd desc; handle irq > 16 */
-		if (desc == NULL)
+		if (!data)
 			break;
-		if (desc->chip == &no_irq_chip)
+		if (data->chip == &no_irq_chip)
 			break;
-		if (desc->chip != &xen_dynamic_chip)
+		if (data->chip != &xen_dynamic_chip)
 			continue;
 		if (irq_info[irq].type == IRQT_UNBOUND)
-			break;
+			return irq;
 	}
 
-	if (irq == nr_irqs)
-		panic("No available IRQ to bind to: increase nr_irqs!\n");
+	if (irq == start)
+		goto no_irqs;
+
+	res = irq_alloc_desc_at(irq, 0);
 
-	desc = irq_to_desc_alloc_node(irq, 0);
-	if (WARN_ON(desc == NULL))
+	if (WARN_ON(res != irq))
 		return -1;
 
-	dynamic_irq_init_keep_chip_data(irq);
+	return irq;
+
+no_irqs:
+	panic("No available IRQ to bind to: increase nr_irqs!\n");
+}
+
+static bool identity_mapped_irq(unsigned irq)
+{
+	/* identity map all the hardware irqs */
+	return irq < get_nr_hw_irqs();
+}
+
+static void pirq_unmask_notify(int irq)
+{
+	struct physdev_eoi eoi = { .irq = irq };
+
+	if (unlikely(pirq_needs_eoi(irq))) {
+		int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
+		WARN_ON(rc);
+	}
+}
+
+static void pirq_query_unmask(int irq)
+{
+	struct physdev_irq_status_query irq_status;
+	struct irq_info *info = info_for_irq(irq);
+
+	BUG_ON(info->type != IRQT_PIRQ);
+
+	irq_status.irq = irq;
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
+		irq_status.flags = 0;
+
+	info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
+	if (irq_status.flags & XENIRQSTAT_needs_eoi)
+		info->u.pirq.flags |= PIRQ_NEEDS_EOI;
+}
+
+static bool probing_irq(int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return desc && desc->action == NULL;
+}
+
+static unsigned int startup_pirq(unsigned int irq)
+{
+	struct evtchn_bind_pirq bind_pirq;
+	struct irq_info *info = info_for_irq(irq);
+	int evtchn = evtchn_from_irq(irq);
+	int rc;
+
+	BUG_ON(info->type != IRQT_PIRQ);
+
+	if (VALID_EVTCHN(evtchn))
+		goto out;
+
+	bind_pirq.pirq = irq;
+	/* NB. We are happy to share unless we are probing. */
+	bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
+					BIND_PIRQ__WILL_SHARE : 0;
+	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
+	if (rc != 0) {
+		if (!probing_irq(irq))
+			printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
+			       irq);
+		return 0;
+	}
+	evtchn = bind_pirq.port;
+
+	pirq_query_unmask(irq);
+
+	evtchn_to_irq[evtchn] = irq;
+	bind_evtchn_to_cpu(evtchn, 0);
+	info->evtchn = evtchn;
+
+out:
+	unmask_evtchn(evtchn);
+	pirq_unmask_notify(irq);
+
+	return 0;
+}
+
+static void shutdown_pirq(unsigned int irq)
+{
+	struct evtchn_close close;
+	struct irq_info *info = info_for_irq(irq);
+	int evtchn = evtchn_from_irq(irq);
+
+	BUG_ON(info->type != IRQT_PIRQ);
+
+	if (!VALID_EVTCHN(evtchn))
+		return;
+
+	mask_evtchn(evtchn);
+
+	close.port = evtchn;
+	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+		BUG();
+
+	bind_evtchn_to_cpu(evtchn, 0);
+	evtchn_to_irq[evtchn] = -1;
+	info->evtchn = 0;
+}
+
+static void enable_pirq(unsigned int irq)
+{
+	startup_pirq(irq);
+}
+
+static void disable_pirq(unsigned int irq)
+{
+}
+
+static void ack_pirq(unsigned int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+
+	move_native_irq(irq);
+
+	if (VALID_EVTCHN(evtchn)) {
+		mask_evtchn(evtchn);
+		clear_evtchn(evtchn);
+	}
+}
+
+static void end_pirq(unsigned int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (WARN_ON(!desc))
+		return;
+
+	if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
+	    (IRQ_DISABLED|IRQ_PENDING)) {
+		shutdown_pirq(irq);
+	} else if (VALID_EVTCHN(evtchn)) {
+		unmask_evtchn(evtchn);
+		pirq_unmask_notify(irq);
+	}
+}
+
+static int find_irq_by_gsi(unsigned gsi)
+{
+	int irq;
+
+	for (irq = 0; irq < nr_irqs; irq++) {
+		struct irq_info *info = info_for_irq(irq);
+
+		if (info == NULL || info->type != IRQT_PIRQ)
+			continue;
+
+		if (gsi_from_irq(irq) == gsi)
+			return irq;
+	}
+
+	return -1;
+}
+
+/* xen_allocate_irq might allocate irqs from the top down, as a
+ * consequence don't assume that the irq number returned has a low value
+ * or can be used as a pirq number unless you know otherwise.
+ *
+ * One notable exception is when xen_allocate_irq is called passing an
+ * hardware gsi as argument, in that case the irq number returned
+ * matches the gsi number passed as first argument.
+
+ * Note: We don't assign an
+ * event channel until the irq actually started up.  Return an
+ * existing irq if we've already got one for the gsi.
+ */
+int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
+{
+	int irq;
+	struct physdev_irq irq_op;
+
+	spin_lock(&irq_mapping_update_lock);
+
+	irq = find_irq_by_gsi(gsi);
+	if (irq != -1) {
+		printk(KERN_INFO "xen_allocate_pirq: returning irq %d for gsi %u\n",
+		       irq, gsi);
+		goto out;	/* XXX need refcount? */
+	}
+
+	/* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
+	 * we are using the !xen_initial_domain() to drop in the function.*/
+	if (identity_mapped_irq(gsi) || !xen_initial_domain()) {
+		irq = gsi;
+		irq_alloc_desc_at(irq, 0);
+	} else
+		irq = find_unbound_irq();
+
+	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+				      handle_level_irq, name);
+
+	irq_op.irq = irq;
+	irq_op.vector = 0;
+
+	/* Only the privileged domain can do this. For non-priv, the pcifront
+	 * driver provides a PCI bus that does the call to do exactly
+	 * this in the priv domain. */
+	if (xen_initial_domain() &&
+	    HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+		irq_free_desc(irq);
+		irq = -ENOSPC;
+		goto out;
+	}
+
+	irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector);
+	irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0;
+
+out:
+	spin_unlock(&irq_mapping_update_lock);
 
 	return irq;
 }
 
+int xen_destroy_irq(int irq)
+{
+	struct irq_desc *desc;
+	int rc = -ENOENT;
+
+	spin_lock(&irq_mapping_update_lock);
+
+	desc = irq_to_desc(irq);
+	if (!desc)
+		goto out;
+
+	irq_info[irq] = mk_unbound_info();
+
+	irq_free_desc(irq);
+
+out:
+	spin_unlock(&irq_mapping_update_lock);
+	return rc;
+}
+
+int xen_vector_from_irq(unsigned irq)
+{
+	return vector_from_irq(irq);
+}
+
+int xen_gsi_from_irq(unsigned irq)
+{
+	return gsi_from_irq(irq);
+}
+
 int bind_evtchn_to_irq(unsigned int evtchn)
 {
 	int irq;
@@ -495,7 +774,7 @@ static void unbind_from_irq(unsigned int irq)
 	if (irq_info[irq].type != IRQT_UNBOUND) {
 		irq_info[irq] = mk_unbound_info();
 
-		dynamic_irq_cleanup(irq);
+		irq_free_desc(irq);
 	}
 
 	spin_unlock(&irq_mapping_update_lock);
@@ -892,7 +1171,7 @@ void xen_clear_irq_pending(int irq)
 	if (VALID_EVTCHN(evtchn))
 		clear_evtchn(evtchn);
 }
-
+EXPORT_SYMBOL(xen_clear_irq_pending);
 void xen_set_irq_pending(int irq)
 {
 	int evtchn = evtchn_from_irq(irq);
@@ -912,9 +1191,9 @@ bool xen_test_irq_pending(int irq)
 	return ret;
 }
 
-/* Poll waiting for an irq to become pending.  In the usual case, the
-   irq will be disabled so it won't deliver an interrupt. */
-void xen_poll_irq(int irq)
+/* Poll waiting for an irq to become pending with timeout.  In the usual case,
+ * the irq will be disabled so it won't deliver an interrupt. */
+void xen_poll_irq_timeout(int irq, u64 timeout)
 {
 	evtchn_port_t evtchn = evtchn_from_irq(irq);
 
@@ -922,13 +1201,20 @@ void xen_poll_irq(int irq)
 		struct sched_poll poll;
 
 		poll.nr_ports = 1;
-		poll.timeout = 0;
+		poll.timeout = timeout;
 		set_xen_guest_handle(poll.ports, &evtchn);
 
 		if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
 			BUG();
 	}
 }
+EXPORT_SYMBOL(xen_poll_irq_timeout);
+/* Poll waiting for an irq to become pending.  In the usual case, the
+ * irq will be disabled so it won't deliver an interrupt. */
+void xen_poll_irq(int irq)
+{
+	xen_poll_irq_timeout(irq, 0 /* no timeout */);
+}
 
 void xen_irq_resume(void)
 {
@@ -965,6 +1251,26 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
 	.retrigger	= retrigger_dynirq,
 };
 
+static struct irq_chip xen_pirq_chip __read_mostly = {
+	.name		= "xen-pirq",
+
+	.startup	= startup_pirq,
+	.shutdown	= shutdown_pirq,
+
+	.enable		= enable_pirq,
+	.unmask		= enable_pirq,
+
+	.disable	= disable_pirq,
+	.mask		= disable_pirq,
+
+	.ack		= ack_pirq,
+	.end		= end_pirq,
+
+	.set_affinity	= set_affinity_irq,
+
+	.retrigger	= retrigger_dynirq,
+};
+
 static struct irq_chip xen_percpu_chip __read_mostly = {
 	.name		= "xen-percpu",
 
@@ -1019,7 +1325,12 @@ void __init xen_init_IRQ(void)
 
 	cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s),
 				    GFP_KERNEL);
-	BUG_ON(cpu_evtchn_mask_p == NULL);
+	irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL);
+
+	evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
+				    GFP_KERNEL);
+	for (i = 0; i < NR_EVENT_CHANNELS; i++)
+		evtchn_to_irq[i] = -1;
 
 	init_evtchn_cpu_bindings();
 

+ 2 - 0
drivers/xen/xenbus/xenbus_client.c

@@ -50,6 +50,8 @@ const char *xenbus_strstate(enum xenbus_state state)
 		[ XenbusStateConnected    ] = "Connected",
 		[ XenbusStateClosing      ] = "Closing",
 		[ XenbusStateClosed	  ] = "Closed",
+		[XenbusStateReconfiguring] = "Reconfiguring",
+		[XenbusStateReconfigured] = "Reconfigured",
 	};
 	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
 }

+ 4 - 6
include/linux/dmar.h

@@ -106,6 +106,7 @@ struct irte {
 		__u64 high;
 	};
 };
+
 #ifdef CONFIG_INTR_REMAP
 extern int intr_remapping_enabled;
 extern int intr_remapping_supported(void);
@@ -119,11 +120,8 @@ extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
 extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
    			u16 sub_handle);
 extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
-extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
-extern int flush_irte(int irq);
 extern int free_irte(int irq);
 
-extern int irq_remapped(int irq);
 extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
 extern struct intel_iommu *map_ioapic_to_ir(int apic);
 extern struct intel_iommu *map_hpet_to_ir(u8 id);
@@ -177,7 +175,6 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 	return 0;
 }
 
-#define irq_remapped(irq)		(0)
 #define enable_intr_remapping(mode)	(-1)
 #define disable_intr_remapping()	(0)
 #define reenable_intr_remapping(mode)	(0)
@@ -187,8 +184,9 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 /* Can't use the common MSI interrupt functions
  * since DMAR is not a pci device
  */
-extern void dmar_msi_unmask(unsigned int irq);
-extern void dmar_msi_mask(unsigned int irq);
+struct irq_data;
+extern void dmar_msi_unmask(struct irq_data *data);
+extern void dmar_msi_mask(struct irq_data *data);
 extern void dmar_msi_read(int irq, struct msi_msg *msg);
 extern void dmar_msi_write(int irq, struct msi_msg *msg);
 extern int dmar_set_interrupt(struct intel_iommu *iommu);

+ 3 - 2
include/linux/htirq.h

@@ -9,8 +9,9 @@ struct ht_irq_msg {
 /* Helper functions.. */
 void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
 void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
-void mask_ht_irq(unsigned int irq);
-void unmask_ht_irq(unsigned int irq);
+struct irq_data;
+void mask_ht_irq(struct irq_data *data);
+void unmask_ht_irq(struct irq_data *data);
 
 /* The arch hook for getting things started */
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev);

+ 0 - 3
include/linux/interrupt.h

@@ -641,11 +641,8 @@ static inline void init_irq_proc(void)
 struct seq_file;
 int show_interrupts(struct seq_file *p, void *v);
 
-struct irq_desc;
-
 extern int early_irq_init(void);
 extern int arch_probe_nr_irqs(void);
 extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int node);
 
 #endif

+ 171 - 276
include/linux/irq.h

@@ -72,6 +72,10 @@ typedef	void (*irq_flow_handler_t)(unsigned int irq,
 #define IRQ_ONESHOT		0x08000000	/* IRQ is not unmasked after hardirq */
 #define IRQ_NESTED_THREAD	0x10000000	/* IRQ is nested into another, no own handler thread */
 
+#define IRQF_MODIFY_MASK	\
+	(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
+	 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL)
+
 #ifdef CONFIG_IRQ_PER_CPU
 # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
 # define IRQ_NO_BALANCING_MASK	(IRQ_PER_CPU | IRQ_NO_BALANCING)
@@ -80,36 +84,77 @@ typedef	void (*irq_flow_handler_t)(unsigned int irq,
 # define IRQ_NO_BALANCING_MASK	IRQ_NO_BALANCING
 #endif
 
-struct proc_dir_entry;
 struct msi_desc;
 
+/**
+ * struct irq_data - per irq and irq chip data passed down to chip functions
+ * @irq:		interrupt number
+ * @node:		node index useful for balancing
+ * @chip:		low level interrupt hardware access
+ * @handler_data:	per-IRQ data for the irq_chip methods
+ * @chip_data:		platform-specific per-chip private data for the chip
+ *			methods, to allow shared chip implementations
+ * @msi_desc:		MSI descriptor
+ * @affinity:		IRQ affinity on SMP
+ *
+ * The fields here need to overlay the ones in irq_desc until we
+ * cleaned up the direct references and switched everything over to
+ * irq_data.
+ */
+struct irq_data {
+	unsigned int		irq;
+	unsigned int		node;
+	struct irq_chip		*chip;
+	void			*handler_data;
+	void			*chip_data;
+	struct msi_desc		*msi_desc;
+#ifdef CONFIG_SMP
+	cpumask_var_t		affinity;
+#endif
+};
+
 /**
  * struct irq_chip - hardware interrupt chip descriptor
  *
  * @name:		name for /proc/interrupts
- * @startup:		start up the interrupt (defaults to ->enable if NULL)
- * @shutdown:		shut down the interrupt (defaults to ->disable if NULL)
- * @enable:		enable the interrupt (defaults to chip->unmask if NULL)
- * @disable:		disable the interrupt
- * @ack:		start of a new interrupt
- * @mask:		mask an interrupt source
- * @mask_ack:		ack and mask an interrupt source
- * @unmask:		unmask an interrupt source
- * @eoi:		end of interrupt - chip level
- * @end:		end of interrupt - flow level
- * @set_affinity:	set the CPU affinity on SMP machines
- * @retrigger:		resend an IRQ to the CPU
- * @set_type:		set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
- * @set_wake:		enable/disable power-management wake-on of an IRQ
+ * @startup:		deprecated, replaced by irq_startup
+ * @shutdown:		deprecated, replaced by irq_shutdown
+ * @enable:		deprecated, replaced by irq_enable
+ * @disable:		deprecated, replaced by irq_disable
+ * @ack:		deprecated, replaced by irq_ack
+ * @mask:		deprecated, replaced by irq_mask
+ * @mask_ack:		deprecated, replaced by irq_mask_ack
+ * @unmask:		deprecated, replaced by irq_unmask
+ * @eoi:		deprecated, replaced by irq_eoi
+ * @end:		deprecated, will go away with __do_IRQ()
+ * @set_affinity:	deprecated, replaced by irq_set_affinity
+ * @retrigger:		deprecated, replaced by irq_retrigger
+ * @set_type:		deprecated, replaced by irq_set_type
+ * @set_wake:		deprecated, replaced by irq_wake
+ * @bus_lock:		deprecated, replaced by irq_bus_lock
+ * @bus_sync_unlock:	deprecated, replaced by irq_bus_sync_unlock
  *
- * @bus_lock:		function to lock access to slow bus (i2c) chips
- * @bus_sync_unlock:	function to sync and unlock slow bus (i2c) chips
+ * @irq_startup:	start up the interrupt (defaults to ->enable if NULL)
+ * @irq_shutdown:	shut down the interrupt (defaults to ->disable if NULL)
+ * @irq_enable:		enable the interrupt (defaults to chip->unmask if NULL)
+ * @irq_disable:	disable the interrupt
+ * @irq_ack:		start of a new interrupt
+ * @irq_mask:		mask an interrupt source
+ * @irq_mask_ack:	ack and mask an interrupt source
+ * @irq_unmask:		unmask an interrupt source
+ * @irq_eoi:		end of interrupt
+ * @irq_set_affinity:	set the CPU affinity on SMP machines
+ * @irq_retrigger:	resend an IRQ to the CPU
+ * @irq_set_type:	set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
+ * @irq_set_wake:	enable/disable power-management wake-on of an IRQ
+ * @irq_bus_lock:	function to lock access to slow bus (i2c) chips
+ * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
  *
  * @release:		release function solely used by UML
- * @typename:		obsoleted by name, kept as migration helper
  */
 struct irq_chip {
 	const char	*name;
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
 	unsigned int	(*startup)(unsigned int irq);
 	void		(*shutdown)(unsigned int irq);
 	void		(*enable)(unsigned int irq);
@@ -130,154 +175,66 @@ struct irq_chip {
 
 	void		(*bus_lock)(unsigned int irq);
 	void		(*bus_sync_unlock)(unsigned int irq);
+#endif
+	unsigned int	(*irq_startup)(struct irq_data *data);
+	void		(*irq_shutdown)(struct irq_data *data);
+	void		(*irq_enable)(struct irq_data *data);
+	void		(*irq_disable)(struct irq_data *data);
+
+	void		(*irq_ack)(struct irq_data *data);
+	void		(*irq_mask)(struct irq_data *data);
+	void		(*irq_mask_ack)(struct irq_data *data);
+	void		(*irq_unmask)(struct irq_data *data);
+	void		(*irq_eoi)(struct irq_data *data);
+
+	int		(*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);
+	int		(*irq_retrigger)(struct irq_data *data);
+	int		(*irq_set_type)(struct irq_data *data, unsigned int flow_type);
+	int		(*irq_set_wake)(struct irq_data *data, unsigned int on);
+
+	void		(*irq_bus_lock)(struct irq_data *data);
+	void		(*irq_bus_sync_unlock)(struct irq_data *data);
 
 	/* Currently used only by UML, might disappear one day.*/
 #ifdef CONFIG_IRQ_RELEASE_METHOD
 	void		(*release)(unsigned int irq, void *dev_id);
 #endif
-	/*
-	 * For compatibility, ->typename is copied into ->name.
-	 * Will disappear.
-	 */
-	const char	*typename;
 };
 
-struct timer_rand_state;
-struct irq_2_iommu;
-/**
- * struct irq_desc - interrupt descriptor
- * @irq:		interrupt number for this descriptor
- * @timer_rand_state:	pointer to timer rand state struct
- * @kstat_irqs:		irq stats per cpu
- * @irq_2_iommu:	iommu with this irq
- * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
- * @chip:		low level interrupt hardware access
- * @msi_desc:		MSI descriptor
- * @handler_data:	per-IRQ data for the irq_chip methods
- * @chip_data:		platform-specific per-chip private data for the chip
- *			methods, to allow shared chip implementations
- * @action:		the irq action chain
- * @status:		status information
- * @depth:		disable-depth, for nested irq_disable() calls
- * @wake_depth:		enable depth, for multiple set_irq_wake() callers
- * @irq_count:		stats field to detect stalled irqs
- * @last_unhandled:	aging timer for unhandled count
- * @irqs_unhandled:	stats field for spurious unhandled interrupts
- * @lock:		locking for SMP
- * @affinity:		IRQ affinity on SMP
- * @node:		node index useful for balancing
- * @pending_mask:	pending rebalanced interrupts
- * @threads_active:	number of irqaction threads currently running
- * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
- * @dir:		/proc/irq/ procfs entry
- * @name:		flow handler name for /proc/interrupts output
- */
-struct irq_desc {
-	unsigned int		irq;
-	struct timer_rand_state *timer_rand_state;
-	unsigned int            *kstat_irqs;
-#ifdef CONFIG_INTR_REMAP
-	struct irq_2_iommu      *irq_2_iommu;
-#endif
-	irq_flow_handler_t	handle_irq;
-	struct irq_chip		*chip;
-	struct msi_desc		*msi_desc;
-	void			*handler_data;
-	void			*chip_data;
-	struct irqaction	*action;	/* IRQ action list */
-	unsigned int		status;		/* IRQ status */
-
-	unsigned int		depth;		/* nested irq disables */
-	unsigned int		wake_depth;	/* nested wake enables */
-	unsigned int		irq_count;	/* For detecting broken IRQs */
-	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
-	unsigned int		irqs_unhandled;
-	raw_spinlock_t		lock;
-#ifdef CONFIG_SMP
-	cpumask_var_t		affinity;
-	const struct cpumask	*affinity_hint;
-	unsigned int		node;
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_var_t		pending_mask;
-#endif
-#endif
-	atomic_t		threads_active;
-	wait_queue_head_t       wait_for_threads;
-#ifdef CONFIG_PROC_FS
-	struct proc_dir_entry	*dir;
-#endif
-	const char		*name;
-} ____cacheline_internodealigned_in_smp;
+/* This include will go away once we isolated irq_desc usage to core code */
+#include <linux/irqdesc.h>
 
-extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
-					struct irq_desc *desc, int node);
-extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
+/*
+ * Pick up the arch-dependent methods:
+ */
+#include <asm/hw_irq.h>
 
-#ifndef CONFIG_SPARSE_IRQ
-extern struct irq_desc irq_desc[NR_IRQS];
+#ifndef NR_IRQS_LEGACY
+# define NR_IRQS_LEGACY 0
 #endif
 
-#ifdef CONFIG_NUMA_IRQ_DESC
-extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
-#else
-static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
-{
-	return desc;
-}
+#ifndef ARCH_IRQ_INIT_FLAGS
+# define ARCH_IRQ_INIT_FLAGS	0
 #endif
 
-extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
-
-/*
- * Pick up the arch-dependent methods:
- */
-#include <asm/hw_irq.h>
+#define IRQ_DEFAULT_INIT_FLAGS	(IRQ_DISABLED | ARCH_IRQ_INIT_FLAGS)
 
+struct irqaction;
 extern int setup_irq(unsigned int irq, struct irqaction *new);
 extern void remove_irq(unsigned int irq, struct irqaction *act);
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 
-#ifdef CONFIG_SMP
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void move_native_irq(int irq);
 void move_masked_irq(int irq);
-
-#else /* CONFIG_GENERIC_PENDING_IRQ */
-
-static inline void move_irq(int irq)
-{
-}
-
-static inline void move_native_irq(int irq)
-{
-}
-
-static inline void move_masked_irq(int irq)
-{
-}
-
-#endif /* CONFIG_GENERIC_PENDING_IRQ */
-
-#else /* CONFIG_SMP */
-
-#define move_native_irq(x)
-#define move_masked_irq(x)
-
-#endif /* CONFIG_SMP */
+#else
+static inline void move_native_irq(int irq) { }
+static inline void move_masked_irq(int irq) { }
+#endif
 
 extern int no_irq_affinity;
 
-static inline int irq_balancing_disabled(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-	return desc->status & IRQ_NO_BALANCING_MASK;
-}
-
 /* Handle irq action chains: */
 extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action);
 
@@ -293,42 +250,10 @@ extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_nested_irq(unsigned int irq);
 
-/*
- * Monolithic do_IRQ implementation.
- */
-#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
-extern unsigned int __do_IRQ(unsigned int irq);
-#endif
-
-/*
- * Architectures call this to let the generic IRQ layer
- * handle an interrupt. If the descriptor is attached to an
- * irqchip-style controller then we call the ->handle_irq() handler,
- * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
- */
-static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
-{
-#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
-	desc->handle_irq(irq, desc);
-#else
-	if (likely(desc->handle_irq))
-		desc->handle_irq(irq, desc);
-	else
-		__do_IRQ(irq);
-#endif
-}
-
-static inline void generic_handle_irq(unsigned int irq)
-{
-	generic_handle_irq_desc(irq, irq_to_desc(irq));
-}
-
 /* Handling of unhandled and spurious interrupts: */
 extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
 			   irqreturn_t action_ret);
 
-/* Resending of interrupts :*/
-void check_irq_resend(struct irq_desc *desc, unsigned int irq);
 
 /* Enable/disable irq debugging output: */
 extern int noirqdebug_setup(char *str);
@@ -351,16 +276,6 @@ extern void
 __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		  const char *name);
 
-/* caller has locked the irq_desc and both params are valid */
-static inline void __set_irq_handler_unlocked(int irq,
-					      irq_flow_handler_t handler)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-	desc->handle_irq = handler;
-}
-
 /*
  * Set a highlevel flow handler for a given IRQ:
  */
@@ -384,141 +299,121 @@ set_irq_chained_handler(unsigned int irq,
 
 extern void set_irq_nested_thread(unsigned int irq, int nest);
 
-extern void set_irq_noprobe(unsigned int irq);
-extern void set_irq_probe(unsigned int irq);
+void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set);
+
+static inline void irq_set_status_flags(unsigned int irq, unsigned long set)
+{
+	irq_modify_status(irq, 0, set);
+}
+
+static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr)
+{
+	irq_modify_status(irq, clr, 0);
+}
+
+static inline void set_irq_noprobe(unsigned int irq)
+{
+	irq_modify_status(irq, 0, IRQ_NOPROBE);
+}
+
+static inline void set_irq_probe(unsigned int irq)
+{
+	irq_modify_status(irq, IRQ_NOPROBE, 0);
+}
 
 /* Handle dynamic irq creation and destruction */
 extern unsigned int create_irq_nr(unsigned int irq_want, int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
-/* Test to see if a driver has successfully requested an irq */
-static inline int irq_has_action(unsigned int irq)
+/*
+ * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
+ * irq_free_desc instead.
+ */
+extern void dynamic_irq_cleanup(unsigned int irq);
+static inline void dynamic_irq_init(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-	return desc->action != NULL;
+	dynamic_irq_cleanup(irq);
 }
 
-/* Dynamic irq helper functions */
-extern void dynamic_irq_init(unsigned int irq);
-void dynamic_irq_init_keep_chip_data(unsigned int irq);
-extern void dynamic_irq_cleanup(unsigned int irq);
-void dynamic_irq_cleanup_keep_chip_data(unsigned int irq);
-
 /* Set/get chip/data for an IRQ: */
 extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
 extern int set_irq_data(unsigned int irq, void *data);
 extern int set_irq_chip_data(unsigned int irq, void *data);
 extern int set_irq_type(unsigned int irq, unsigned int type);
 extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
+extern struct irq_data *irq_get_irq_data(unsigned int irq);
 
-#define get_irq_chip(irq)	(irq_to_desc(irq)->chip)
-#define get_irq_chip_data(irq)	(irq_to_desc(irq)->chip_data)
-#define get_irq_data(irq)	(irq_to_desc(irq)->handler_data)
-#define get_irq_msi(irq)	(irq_to_desc(irq)->msi_desc)
-
-#define get_irq_desc_chip(desc)		((desc)->chip)
-#define get_irq_desc_chip_data(desc)	((desc)->chip_data)
-#define get_irq_desc_data(desc)		((desc)->handler_data)
-#define get_irq_desc_msi(desc)		((desc)->msi_desc)
-
-#endif /* CONFIG_GENERIC_HARDIRQS */
-
-#endif /* !CONFIG_S390 */
-
-#ifdef CONFIG_SMP
-/**
- * alloc_desc_masks - allocate cpumasks for irq_desc
- * @desc:	pointer to irq_desc struct
- * @node:	node which will be handling the cpumasks
- * @boot:	true if need bootmem
- *
- * Allocates affinity and pending_mask cpumask if required.
- * Returns true if successful (or not required).
- */
-static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
-							bool boot)
+static inline struct irq_chip *get_irq_chip(unsigned int irq)
 {
-	gfp_t gfp = GFP_ATOMIC;
-
-	if (boot)
-		gfp = GFP_NOWAIT;
-
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	if (!alloc_cpumask_var_node(&desc->affinity, gfp, node))
-		return false;
+	struct irq_data *d = irq_get_irq_data(irq);
+	return d ? d->chip : NULL;
+}
 
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
-		free_cpumask_var(desc->affinity);
-		return false;
-	}
-#endif
-#endif
-	return true;
+static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d)
+{
+	return d->chip;
 }
 
-static inline void init_desc_masks(struct irq_desc *desc)
+static inline void *get_irq_chip_data(unsigned int irq)
 {
-	cpumask_setall(desc->affinity);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_clear(desc->pending_mask);
-#endif
+	struct irq_data *d = irq_get_irq_data(irq);
+	return d ? d->chip_data : NULL;
 }
 
-/**
- * init_copy_desc_masks - copy cpumasks for irq_desc
- * @old_desc:	pointer to old irq_desc struct
- * @new_desc:	pointer to new irq_desc struct
- *
- * Insures affinity and pending_masks are copied to new irq_desc.
- * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
- * irq_desc struct so the copy is redundant.
- */
+static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
+{
+	return d->chip_data;
+}
 
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
-					struct irq_desc *new_desc)
+static inline void *get_irq_data(unsigned int irq)
 {
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	cpumask_copy(new_desc->affinity, old_desc->affinity);
+	struct irq_data *d = irq_get_irq_data(irq);
+	return d ? d->handler_data : NULL;
+}
 
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
-#endif
-#endif
+static inline void *irq_data_get_irq_data(struct irq_data *d)
+{
+	return d->handler_data;
 }
 
-static inline void free_desc_masks(struct irq_desc *old_desc,
-				   struct irq_desc *new_desc)
+static inline struct msi_desc *get_irq_msi(unsigned int irq)
 {
-	free_cpumask_var(old_desc->affinity);
+	struct irq_data *d = irq_get_irq_data(irq);
+	return d ? d->msi_desc : NULL;
+}
 
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	free_cpumask_var(old_desc->pending_mask);
-#endif
+static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
+{
+	return d->msi_desc;
 }
 
-#else /* !CONFIG_SMP */
+int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
+void irq_free_descs(unsigned int irq, unsigned int cnt);
+int irq_reserve_irqs(unsigned int from, unsigned int cnt);
 
-static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
-								bool boot)
+static inline int irq_alloc_desc(int node)
 {
-	return true;
+	return irq_alloc_descs(-1, 0, 1, node);
 }
 
-static inline void init_desc_masks(struct irq_desc *desc)
+static inline int irq_alloc_desc_at(unsigned int at, int node)
 {
+	return irq_alloc_descs(at, at, 1, node);
 }
 
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
-					struct irq_desc *new_desc)
+static inline int irq_alloc_desc_from(unsigned int from, int node)
 {
+	return irq_alloc_descs(-1, from, 1, node);
 }
 
-static inline void free_desc_masks(struct irq_desc *old_desc,
-				   struct irq_desc *new_desc)
+static inline void irq_free_desc(unsigned int irq)
 {
+	irq_free_descs(irq, 1);
 }
-#endif	/* CONFIG_SMP */
+
+#endif /* CONFIG_GENERIC_HARDIRQS */
+
+#endif /* !CONFIG_S390 */
 
 #endif /* _LINUX_IRQ_H */

+ 159 - 0
include/linux/irqdesc.h

@@ -0,0 +1,159 @@
+#ifndef _LINUX_IRQDESC_H
+#define _LINUX_IRQDESC_H
+
+/*
+ * Core internal functions to deal with irq descriptors
+ *
+ * This include will move to kernel/irq once we cleaned up the tree.
+ * For now it's included from <linux/irq.h>
+ */
+
+struct proc_dir_entry;
+struct timer_rand_state;
+/**
+ * struct irq_desc - interrupt descriptor
+ * @irq_data:		per irq and chip data passed down to chip functions
+ * @timer_rand_state:	pointer to timer rand state struct
+ * @kstat_irqs:		irq stats per cpu
+ * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
+ * @action:		the irq action chain
+ * @status:		status information
+ * @depth:		disable-depth, for nested irq_disable() calls
+ * @wake_depth:		enable depth, for multiple set_irq_wake() callers
+ * @irq_count:		stats field to detect stalled irqs
+ * @last_unhandled:	aging timer for unhandled count
+ * @irqs_unhandled:	stats field for spurious unhandled interrupts
+ * @lock:		locking for SMP
+ * @pending_mask:	pending rebalanced interrupts
+ * @threads_active:	number of irqaction threads currently running
+ * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
+ * @dir:		/proc/irq/ procfs entry
+ * @name:		flow handler name for /proc/interrupts output
+ */
+struct irq_desc {
+
+#ifdef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
+	struct irq_data		irq_data;
+#else
+	/*
+	 * This union will go away, once we fixed the direct access to
+	 * irq_desc all over the place. The direct fields are a 1:1
+	 * overlay of irq_data.
+	 */
+	union {
+		struct irq_data		irq_data;
+		struct {
+			unsigned int		irq;
+			unsigned int		node;
+			struct irq_chip		*chip;
+			void			*handler_data;
+			void			*chip_data;
+			struct msi_desc		*msi_desc;
+#ifdef CONFIG_SMP
+			cpumask_var_t		affinity;
+#endif
+		};
+	};
+#endif
+
+	struct timer_rand_state *timer_rand_state;
+	unsigned int		*kstat_irqs;
+	irq_flow_handler_t	handle_irq;
+	struct irqaction	*action;	/* IRQ action list */
+	unsigned int		status;		/* IRQ status */
+
+	unsigned int		depth;		/* nested irq disables */
+	unsigned int		wake_depth;	/* nested wake enables */
+	unsigned int		irq_count;	/* For detecting broken IRQs */
+	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
+	unsigned int		irqs_unhandled;
+	raw_spinlock_t		lock;
+#ifdef CONFIG_SMP
+	const struct cpumask	*affinity_hint;
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_var_t		pending_mask;
+#endif
+#endif
+	atomic_t		threads_active;
+	wait_queue_head_t       wait_for_threads;
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*dir;
+#endif
+	const char		*name;
+} ____cacheline_internodealigned_in_smp;
+
+#ifndef CONFIG_SPARSE_IRQ
+extern struct irq_desc irq_desc[NR_IRQS];
+#endif
+
+/* Will be removed once the last users in power and sh are gone */
+extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
+static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
+{
+	return desc;
+}
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+
+#define get_irq_desc_chip(desc)		((desc)->irq_data.chip)
+#define get_irq_desc_chip_data(desc)	((desc)->irq_data.chip_data)
+#define get_irq_desc_data(desc)		((desc)->irq_data.handler_data)
+#define get_irq_desc_msi(desc)		((desc)->irq_data.msi_desc)
+
+/*
+ * Monolithic do_IRQ implementation.
+ */
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+extern unsigned int __do_IRQ(unsigned int irq);
+#endif
+
+/*
+ * Architectures call this to let the generic IRQ layer
+ * handle an interrupt. If the descriptor is attached to an
+ * irqchip-style controller then we call the ->handle_irq() handler,
+ * and it calls __do_IRQ() if it's attached to an irqtype-style controller.
+ */
+static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+	desc->handle_irq(irq, desc);
+#else
+	if (likely(desc->handle_irq))
+		desc->handle_irq(irq, desc);
+	else
+		__do_IRQ(irq);
+#endif
+}
+
+static inline void generic_handle_irq(unsigned int irq)
+{
+	generic_handle_irq_desc(irq, irq_to_desc(irq));
+}
+
+/* Test to see if a driver has successfully requested an irq */
+static inline int irq_has_action(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	return desc->action != NULL;
+}
+
+static inline int irq_balancing_disabled(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	return desc->status & IRQ_NO_BALANCING_MASK;
+}
+
+/* caller has locked the irq_desc and both params are valid */
+static inline void __set_irq_handler_unlocked(int irq,
+					      irq_flow_handler_t handler)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->handle_irq = handler;
+}
+#endif
+
+#endif

+ 5 - 0
include/linux/irqnr.h

@@ -25,6 +25,7 @@
 
 extern int nr_irqs;
 extern struct irq_desc *irq_to_desc(unsigned int irq);
+unsigned int irq_get_next_irq(unsigned int offset);
 
 # define for_each_irq_desc(irq, desc)					\
 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
@@ -47,6 +48,10 @@ extern struct irq_desc *irq_to_desc(unsigned int irq);
 #define irq_node(irq)	0
 #endif
 
+# define for_each_active_irq(irq)			\
+	for (irq = irq_get_next_irq(0); irq < nr_irqs;	\
+	     irq = irq_get_next_irq(irq + 1))
+
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #define for_each_irq_nr(irq)                   \

Niektoré súbory nie sú zobrazené, pretože je v týchto rozdielových dátach zmenené mnoho súborov