Ver Fonte

Merge git://git.infradead.org/iommu-2.6

* git://git.infradead.org/iommu-2.6:
  drivers/pci/intr_remapping.c: include acpi.h
  intel-iommu: Fix oops in device_to_iommu() when devices not found.
  intel-iommu: Handle PCI domains appropriately.
  intel-iommu: Fix device-to-iommu mapping for PCI-PCI bridges.
  x2apic/intr-remap: decouple interrupt remapping from x2apic
  x86, dmar: check if it's initialized before disable queue invalidation
  intel-iommu: set compatibility format interrupt
  Intel IOMMU Suspend/Resume Support - Interrupt Remapping
  Intel IOMMU Suspend/Resume Support - Queued Invalidation
  Intel IOMMU Suspend/Resume Support - DMAR
  intel-iommu: Add for_each_iommu() and for_each_active_iommu() macros
Linus Torvalds há 16 anos atrás
pai
commit
ffa009c366

+ 1 - 1
arch/x86/Kconfig

@@ -253,6 +253,7 @@ config SMP
 config X86_X2APIC
 	bool "Support x2apic"
 	depends on X86_LOCAL_APIC && X86_64
+	select INTR_REMAP
 	---help---
 	  This enables x2apic support on CPUs that have this feature.
 
@@ -1881,7 +1882,6 @@ config DMAR_FLOPPY_WA
 config INTR_REMAP
 	bool "Support for Interrupt Remapping (EXPERIMENTAL)"
 	depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
-	select X86_X2APIC
 	---help---
 	  Supports Interrupt remapping for IO-APIC and MSI devices.
 	  To use x2apic mode in the CPU's which support x2APIC enhancements or

+ 3 - 0
arch/x86/include/asm/apic.h

@@ -107,6 +107,9 @@ extern u32 native_safe_apic_wait_icr_idle(void);
 extern void native_apic_icr_write(u32 low, u32 id);
 extern u64 native_apic_icr_read(void);
 
+#define EIM_8BIT_APIC_ID	0
+#define EIM_32BIT_APIC_ID	1
+
 #ifdef CONFIG_X86_X2APIC
 /*
  * Make previous memory operations globally visible before

+ 7 - 4
arch/x86/include/asm/io_apic.h

@@ -162,10 +162,13 @@ extern int (*ioapic_renumber_irq)(int ioapic, int irq);
 extern void ioapic_init_mappings(void);
 
 #ifdef CONFIG_X86_64
-extern int save_IO_APIC_setup(void);
-extern void mask_IO_APIC_setup(void);
-extern void restore_IO_APIC_setup(void);
-extern void reinit_intr_remapped_IO_APIC(int);
+extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
+extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
+extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+extern void reinit_intr_remapped_IO_APIC(int intr_remapping,
+	struct IO_APIC_route_entry **ioapic_entries);
 #endif
 
 extern void probe_nr_irqs_gsi(void);

+ 62 - 8
arch/x86/kernel/apic/apic.c

@@ -1304,6 +1304,7 @@ void __init enable_IR_x2apic(void)
 #ifdef CONFIG_INTR_REMAP
 	int ret;
 	unsigned long flags;
+	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
 	if (!cpu_has_x2apic)
 		return;
@@ -1334,17 +1335,23 @@ void __init enable_IR_x2apic(void)
 		return;
 	}
 
-	ret = save_IO_APIC_setup();
+	ioapic_entries = alloc_ioapic_entries();
+	if (!ioapic_entries) {
+		pr_info("Allocate ioapic_entries failed: %d\n", ret);
+		goto end;
+	}
+
+	ret = save_IO_APIC_setup(ioapic_entries);
 	if (ret) {
 		pr_info("Saving IO-APIC state failed: %d\n", ret);
 		goto end;
 	}
 
 	local_irq_save(flags);
-	mask_IO_APIC_setup();
+	mask_IO_APIC_setup(ioapic_entries);
 	mask_8259A();
 
-	ret = enable_intr_remapping(1);
+	ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
 
 	if (ret && x2apic_preenabled) {
 		local_irq_restore(flags);
@@ -1364,9 +1371,9 @@ end_restore:
 		/*
 		 * IR enabling failed
 		 */
-		restore_IO_APIC_setup();
+		restore_IO_APIC_setup(ioapic_entries);
 	else
-		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+		reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries);
 
 	unmask_8259A();
 	local_irq_restore(flags);
@@ -1379,6 +1386,8 @@ end:
 			pr_info("Enabled Interrupt-remapping\n");
 	} else
 		pr_err("Failed to enable Interrupt-remapping and x2apic\n");
+	if (ioapic_entries)
+		free_ioapic_entries(ioapic_entries);
 #else
 	if (!cpu_has_x2apic)
 		return;
@@ -1954,6 +1963,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 
 	local_irq_save(flags);
 	disable_local_APIC();
+#ifdef CONFIG_INTR_REMAP
+	if (intr_remapping_enabled)
+		disable_intr_remapping();
+#endif
 	local_irq_restore(flags);
 	return 0;
 }
@@ -1964,15 +1977,41 @@ static int lapic_resume(struct sys_device *dev)
 	unsigned long flags;
 	int maxlvt;
 
+#ifdef CONFIG_INTR_REMAP
+	int ret;
+	struct IO_APIC_route_entry **ioapic_entries = NULL;
+
 	if (!apic_pm_state.active)
 		return 0;
 
-	maxlvt = lapic_get_maxlvt();
-
 	local_irq_save(flags);
+	if (x2apic) {
+		ioapic_entries = alloc_ioapic_entries();
+		if (!ioapic_entries) {
+			WARN(1, "Alloc ioapic_entries in lapic resume failed.");
+			return -ENOMEM;
+		}
+
+		ret = save_IO_APIC_setup(ioapic_entries);
+		if (ret) {
+			WARN(1, "Saving IO-APIC state failed: %d\n", ret);
+			free_ioapic_entries(ioapic_entries);
+			return ret;
+		}
+
+		mask_IO_APIC_setup(ioapic_entries);
+		mask_8259A();
+		enable_x2apic();
+	}
+#else
+	if (!apic_pm_state.active)
+		return 0;
 
+	local_irq_save(flags);
 	if (x2apic)
 		enable_x2apic();
+#endif
+
 	else {
 		/*
 		 * Make sure the APICBASE points to the right address
@@ -1986,6 +2025,7 @@ static int lapic_resume(struct sys_device *dev)
 		wrmsr(MSR_IA32_APICBASE, l, h);
 	}
 
+	maxlvt = lapic_get_maxlvt();
 	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
 	apic_write(APIC_ID, apic_pm_state.apic_id);
 	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -2009,8 +2049,20 @@ static int lapic_resume(struct sys_device *dev)
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
 
+#ifdef CONFIG_INTR_REMAP
+	if (intr_remapping_enabled)
+		reenable_intr_remapping(EIM_32BIT_APIC_ID);
+
+	if (x2apic) {
+		unmask_8259A();
+		restore_IO_APIC_setup(ioapic_entries);
+		free_ioapic_entries(ioapic_entries);
+	}
+#endif
+
 	local_irq_restore(flags);
 
+
 	return 0;
 }
 
@@ -2048,7 +2100,9 @@ static int __init init_lapic_sysfs(void)
 		error = sysdev_register(&device_lapic);
 	return error;
 }
-device_initcall(init_lapic_sysfs);
+
+/* local apic needs to resume before other devices access its registers. */
+core_initcall(init_lapic_sysfs);
 
 #else	/* CONFIG_PM */
 

+ 94 - 46
arch/x86/kernel/apic/io_apic.c

@@ -851,63 +851,74 @@ __setup("pirq=", ioapic_pirq_setup);
 #endif /* CONFIG_X86_32 */
 
 #ifdef CONFIG_INTR_REMAP
-/* I/O APIC RTE contents at the OS boot up */
-static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+struct IO_APIC_route_entry **alloc_ioapic_entries(void)
+{
+	int apic;
+	struct IO_APIC_route_entry **ioapic_entries;
+
+	ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
+				GFP_ATOMIC);
+	if (!ioapic_entries)
+		return 0;
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		ioapic_entries[apic] =
+			kzalloc(sizeof(struct IO_APIC_route_entry) *
+				nr_ioapic_registers[apic], GFP_ATOMIC);
+		if (!ioapic_entries[apic])
+			goto nomem;
+	}
+
+	return ioapic_entries;
+
+nomem:
+	while (--apic >= 0)
+		kfree(ioapic_entries[apic]);
+	kfree(ioapic_entries);
+
+	return 0;
+}
 
 /*
  * Saves all the IO-APIC RTE's
  */
-int save_IO_APIC_setup(void)
+int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
 {
-	union IO_APIC_reg_01 reg_01;
-	unsigned long flags;
 	int apic, pin;
 
-	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
-	 */
-	for (apic = 0; apic < nr_ioapics; apic++) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		reg_01.raw = io_apic_read(apic, 1);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
-	}
+	if (!ioapic_entries)
+		return -ENOMEM;
 
 	for (apic = 0; apic < nr_ioapics; apic++) {
-		early_ioapic_entries[apic] =
-			kzalloc(sizeof(struct IO_APIC_route_entry) *
-				nr_ioapic_registers[apic], GFP_KERNEL);
-		if (!early_ioapic_entries[apic])
-			goto nomem;
-	}
+		if (!ioapic_entries[apic])
+			return -ENOMEM;
 
-	for (apic = 0; apic < nr_ioapics; apic++)
 		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
-			early_ioapic_entries[apic][pin] =
+			ioapic_entries[apic][pin] =
 				ioapic_read_entry(apic, pin);
+	}
 
 	return 0;
-
-nomem:
-	while (apic >= 0)
-		kfree(early_ioapic_entries[apic--]);
-	memset(early_ioapic_entries, 0,
-		ARRAY_SIZE(early_ioapic_entries));
-
-	return -ENOMEM;
 }
 
-void mask_IO_APIC_setup(void)
+/*
+ * Mask all IO APIC entries.
+ */
+void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
 {
 	int apic, pin;
 
+	if (!ioapic_entries)
+		return;
+
 	for (apic = 0; apic < nr_ioapics; apic++) {
-		if (!early_ioapic_entries[apic])
+		if (!ioapic_entries[apic])
 			break;
+
 		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 			struct IO_APIC_route_entry entry;
 
-			entry = early_ioapic_entries[apic][pin];
+			entry = ioapic_entries[apic][pin];
 			if (!entry.mask) {
 				entry.mask = 1;
 				ioapic_write_entry(apic, pin, entry);
@@ -916,22 +927,30 @@ void mask_IO_APIC_setup(void)
 	}
 }
 
-void restore_IO_APIC_setup(void)
+/*
+ * Restore IO APIC entries which was saved in ioapic_entries.
+ */
+int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
 {
 	int apic, pin;
 
+	if (!ioapic_entries)
+		return -ENOMEM;
+
 	for (apic = 0; apic < nr_ioapics; apic++) {
-		if (!early_ioapic_entries[apic])
-			break;
+		if (!ioapic_entries[apic])
+			return -ENOMEM;
+
 		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
 			ioapic_write_entry(apic, pin,
-					   early_ioapic_entries[apic][pin]);
-		kfree(early_ioapic_entries[apic]);
-		early_ioapic_entries[apic] = NULL;
+					ioapic_entries[apic][pin]);
 	}
+	return 0;
 }
 
-void reinit_intr_remapped_IO_APIC(int intr_remapping)
+void reinit_intr_remapped_IO_APIC(int intr_remapping,
+	struct IO_APIC_route_entry **ioapic_entries)
+
 {
 	/*
 	 * for now plain restore of previous settings.
@@ -940,7 +959,17 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
 	 * table entries. for now, do a plain restore, and wait for
 	 * the setup_IO_APIC_irqs() to do proper initialization.
 	 */
-	restore_IO_APIC_setup();
+	restore_IO_APIC_setup(ioapic_entries);
+}
+
+void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
+{
+	int apic;
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		kfree(ioapic_entries[apic]);
+
+	kfree(ioapic_entries);
 }
 #endif
 
@@ -2495,7 +2524,7 @@ static void irq_complete_move(struct irq_desc **descp)
 static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
 
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_X86_X2APIC
 static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 {
 	int apic, pin;
@@ -2540,7 +2569,6 @@ static void ack_x2apic_edge(unsigned int irq)
 {
 	ack_x2APIC_irq();
 }
-
 #endif
 
 static void ack_apic_edge(unsigned int irq)
@@ -2651,6 +2679,26 @@ static void ack_apic_level(unsigned int irq)
 #endif
 }
 
+#ifdef CONFIG_INTR_REMAP
+static void ir_ack_apic_edge(unsigned int irq)
+{
+#ifdef CONFIG_X86_X2APIC
+       if (x2apic_enabled())
+               return ack_x2apic_edge(irq);
+#endif
+       return ack_apic_edge(irq);
+}
+
+static void ir_ack_apic_level(unsigned int irq)
+{
+#ifdef CONFIG_X86_X2APIC
+       if (x2apic_enabled())
+               return ack_x2apic_level(irq);
+#endif
+       return ack_apic_level(irq);
+}
+#endif /* CONFIG_INTR_REMAP */
+
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name		= "IO-APIC",
 	.startup	= startup_ioapic_irq,
@@ -2670,8 +2718,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
 	.mask		= mask_IO_APIC_irq,
 	.unmask		= unmask_IO_APIC_irq,
 #ifdef CONFIG_INTR_REMAP
-	.ack		= ack_x2apic_edge,
-	.eoi		= ack_x2apic_level,
+	.ack		= ir_ack_apic_edge,
+	.eoi		= ir_ack_apic_level,
 #ifdef CONFIG_SMP
 	.set_affinity	= set_ir_ioapic_affinity_irq,
 #endif
@@ -3397,7 +3445,7 @@ static struct irq_chip msi_ir_chip = {
 	.unmask		= unmask_msi_irq,
 	.mask		= mask_msi_irq,
 #ifdef CONFIG_INTR_REMAP
-	.ack		= ack_x2apic_edge,
+	.ack		= ir_ack_apic_edge,
 #ifdef CONFIG_SMP
 	.set_affinity	= ir_set_msi_irq_affinity,
 #endif

+ 56 - 15
drivers/pci/dmar.c

@@ -180,6 +180,7 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
 	dmaru->hdr = header;
 	drhd = (struct acpi_dmar_hardware_unit *)header;
 	dmaru->reg_base_addr = drhd->address;
+	dmaru->segment = drhd->segment;
 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
 
 	ret = alloc_iommu(dmaru);
@@ -789,6 +790,35 @@ end:
 	spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
+/*
+ * Enable queued invalidation.
+ */
+static void __dmar_enable_qi(struct intel_iommu *iommu)
+{
+	u32 cmd, sts;
+	unsigned long flags;
+	struct q_inval *qi = iommu->qi;
+
+	qi->free_head = qi->free_tail = 0;
+	qi->free_cnt = QI_LENGTH;
+
+	spin_lock_irqsave(&iommu->register_lock, flags);
+
+	/* write zero to the tail reg */
+	writel(0, iommu->reg + DMAR_IQT_REG);
+
+	dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+
+	cmd = iommu->gcmd | DMA_GCMD_QIE;
+	iommu->gcmd |= DMA_GCMD_QIE;
+	writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+	/* Make sure hardware complete it */
+	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
+
+	spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
 /*
  * Enable Queued Invalidation interface. This is a must to support
  * interrupt-remapping. Also used by DMA-remapping, which replaces
@@ -796,8 +826,6 @@ end:
  */
 int dmar_enable_qi(struct intel_iommu *iommu)
 {
-	u32 cmd, sts;
-	unsigned long flags;
 	struct q_inval *qi;
 
 	if (!ecap_qis(iommu->ecap))
@@ -835,19 +863,7 @@ int dmar_enable_qi(struct intel_iommu *iommu)
 
 	spin_lock_init(&qi->q_lock);
 
-	spin_lock_irqsave(&iommu->register_lock, flags);
-	/* write zero to the tail reg */
-	writel(0, iommu->reg + DMAR_IQT_REG);
-
-	dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
-
-	cmd = iommu->gcmd | DMA_GCMD_QIE;
-	iommu->gcmd |= DMA_GCMD_QIE;
-	writel(cmd, iommu->reg + DMAR_GCMD_REG);
-
-	/* Make sure hardware complete it */
-	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
-	spin_unlock_irqrestore(&iommu->register_lock, flags);
+	__dmar_enable_qi(iommu);
 
 	return 0;
 }
@@ -1102,3 +1118,28 @@ int __init enable_drhd_fault_handling(void)
 
 	return 0;
 }
+
+/*
+ * Re-enable Queued Invalidation interface.
+ */
+int dmar_reenable_qi(struct intel_iommu *iommu)
+{
+	if (!ecap_qis(iommu->ecap))
+		return -ENOENT;
+
+	if (!iommu->qi)
+		return -ENOENT;
+
+	/*
+	 * First disable queued invalidation.
+	 */
+	dmar_disable_qi(iommu);
+	/*
+	 * Then enable queued invalidation again. Since there is no pending
+	 * invalidation requests now, it's safe to re-enable queued
+	 * invalidation.
+	 */
+	__dmar_enable_qi(iommu);
+
+	return 0;
+}

+ 210 - 29
drivers/pci/intel-iommu.c

@@ -36,6 +36,7 @@
 #include <linux/iova.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
+#include <linux/sysdev.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
 #include "pci.h"
@@ -247,7 +248,8 @@ struct dmar_domain {
 struct device_domain_info {
 	struct list_head link;	/* link to domain siblings */
 	struct list_head global; /* link to global list */
-	u8 bus;			/* PCI bus numer */
+	int segment;		/* PCI domain */
+	u8 bus;			/* PCI bus number */
 	u8 devfn;		/* PCI devfn number */
 	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
 	struct dmar_domain *domain; /* pointer to domain */
@@ -467,7 +469,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
 	domain_update_iommu_snooping(domain);
 }
 
-static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
+static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
 {
 	struct dmar_drhd_unit *drhd = NULL;
 	int i;
@@ -475,12 +477,20 @@ static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
 	for_each_drhd_unit(drhd) {
 		if (drhd->ignored)
 			continue;
+		if (segment != drhd->segment)
+			continue;
 
-		for (i = 0; i < drhd->devices_cnt; i++)
+		for (i = 0; i < drhd->devices_cnt; i++) {
 			if (drhd->devices[i] &&
 			    drhd->devices[i]->bus->number == bus &&
 			    drhd->devices[i]->devfn == devfn)
 				return drhd->iommu;
+			if (drhd->devices[i] &&
+			    drhd->devices[i]->subordinate &&
+			    drhd->devices[i]->subordinate->number <= bus &&
+			    drhd->devices[i]->subordinate->subordinate >= bus)
+				return drhd->iommu;
+		}
 
 		if (drhd->include_all)
 			return drhd->iommu;
@@ -1312,7 +1322,7 @@ static void domain_exit(struct dmar_domain *domain)
 }
 
 static int domain_context_mapping_one(struct dmar_domain *domain,
-		u8 bus, u8 devfn)
+				      int segment, u8 bus, u8 devfn)
 {
 	struct context_entry *context;
 	unsigned long flags;
@@ -1327,7 +1337,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 	BUG_ON(!domain->pgd);
 
-	iommu = device_to_iommu(bus, devfn);
+	iommu = device_to_iommu(segment, bus, devfn);
 	if (!iommu)
 		return -ENODEV;
 
@@ -1417,8 +1427,8 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
 	int ret;
 	struct pci_dev *tmp, *parent;
 
-	ret = domain_context_mapping_one(domain, pdev->bus->number,
-		pdev->devfn);
+	ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
+					 pdev->bus->number, pdev->devfn);
 	if (ret)
 		return ret;
 
@@ -1429,18 +1439,23 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
 	/* Secondary interface's bus number and devfn 0 */
 	parent = pdev->bus->self;
 	while (parent != tmp) {
-		ret = domain_context_mapping_one(domain, parent->bus->number,
-			parent->devfn);
+		ret = domain_context_mapping_one(domain,
+						 pci_domain_nr(parent->bus),
+						 parent->bus->number,
+						 parent->devfn);
 		if (ret)
 			return ret;
 		parent = parent->bus->self;
 	}
 	if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
 		return domain_context_mapping_one(domain,
-			tmp->subordinate->number, 0);
+					pci_domain_nr(tmp->subordinate),
+					tmp->subordinate->number, 0);
 	else /* this is a legacy PCI bridge */
 		return domain_context_mapping_one(domain,
-			tmp->bus->number, tmp->devfn);
+						  pci_domain_nr(tmp->bus),
+						  tmp->bus->number,
+						  tmp->devfn);
 }
 
 static int domain_context_mapped(struct pci_dev *pdev)
@@ -1449,12 +1464,12 @@ static int domain_context_mapped(struct pci_dev *pdev)
 	struct pci_dev *tmp, *parent;
 	struct intel_iommu *iommu;
 
-	iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
+				pdev->devfn);
 	if (!iommu)
 		return -ENODEV;
 
-	ret = device_context_mapped(iommu,
-		pdev->bus->number, pdev->devfn);
+	ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
 	if (!ret)
 		return ret;
 	/* dependent device mapping */
@@ -1465,17 +1480,17 @@ static int domain_context_mapped(struct pci_dev *pdev)
 	parent = pdev->bus->self;
 	while (parent != tmp) {
 		ret = device_context_mapped(iommu, parent->bus->number,
-			parent->devfn);
+					    parent->devfn);
 		if (!ret)
 			return ret;
 		parent = parent->bus->self;
 	}
 	if (tmp->is_pcie)
-		return device_context_mapped(iommu,
-			tmp->subordinate->number, 0);
+		return device_context_mapped(iommu, tmp->subordinate->number,
+					     0);
 	else
-		return device_context_mapped(iommu,
-			tmp->bus->number, tmp->devfn);
+		return device_context_mapped(iommu, tmp->bus->number,
+					     tmp->devfn);
 }
 
 static int
@@ -1542,7 +1557,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
 			info->dev->dev.archdata.iommu = NULL;
 		spin_unlock_irqrestore(&device_domain_lock, flags);
 
-		iommu = device_to_iommu(info->bus, info->devfn);
+		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
 		iommu_detach_dev(iommu, info->bus, info->devfn);
 		free_devinfo_mem(info);
 
@@ -1577,11 +1592,14 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 	struct pci_dev *dev_tmp;
 	unsigned long flags;
 	int bus = 0, devfn = 0;
+	int segment;
 
 	domain = find_domain(pdev);
 	if (domain)
 		return domain;
 
+	segment = pci_domain_nr(pdev->bus);
+
 	dev_tmp = pci_find_upstream_pcie_bridge(pdev);
 	if (dev_tmp) {
 		if (dev_tmp->is_pcie) {
@@ -1593,7 +1611,8 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 		}
 		spin_lock_irqsave(&device_domain_lock, flags);
 		list_for_each_entry(info, &device_domain_list, global) {
-			if (info->bus == bus && info->devfn == devfn) {
+			if (info->segment == segment &&
+			    info->bus == bus && info->devfn == devfn) {
 				found = info->domain;
 				break;
 			}
@@ -1631,6 +1650,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 			domain_exit(domain);
 			goto error;
 		}
+		info->segment = segment;
 		info->bus = bus;
 		info->devfn = devfn;
 		info->dev = NULL;
@@ -1642,7 +1662,8 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 		found = NULL;
 		spin_lock_irqsave(&device_domain_lock, flags);
 		list_for_each_entry(tmp, &device_domain_list, global) {
-			if (tmp->bus == bus && tmp->devfn == devfn) {
+			if (tmp->segment == segment &&
+			    tmp->bus == bus && tmp->devfn == devfn) {
 				found = tmp->domain;
 				break;
 			}
@@ -1662,6 +1683,7 @@ found_domain:
 	info = alloc_devinfo_mem();
 	if (!info)
 		goto error;
+	info->segment = segment;
 	info->bus = pdev->bus->number;
 	info->devfn = pdev->devfn;
 	info->dev = pdev;
@@ -1946,6 +1968,15 @@ static int __init init_dmars(void)
 		}
 	}
 
+#ifdef CONFIG_INTR_REMAP
+	if (!intr_remapping_enabled) {
+		ret = enable_intr_remapping(0);
+		if (ret)
+			printk(KERN_ERR
+			       "IOMMU: enable interrupt remapping failed\n");
+	}
+#endif
+
 	/*
 	 * For each rmrr
 	 *   for each dev attached to rmrr
@@ -2597,6 +2628,150 @@ static void __init init_no_remapping_devices(void)
 	}
 }
 
+#ifdef CONFIG_SUSPEND
+static int init_iommu_hw(void)
+{
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu = NULL;
+
+	for_each_active_iommu(iommu, drhd)
+		if (iommu->qi)
+			dmar_reenable_qi(iommu);
+
+	for_each_active_iommu(iommu, drhd) {
+		iommu_flush_write_buffer(iommu);
+
+		iommu_set_root_entry(iommu);
+
+		iommu->flush.flush_context(iommu, 0, 0, 0,
+						DMA_CCMD_GLOBAL_INVL, 0);
+		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
+						DMA_TLB_GLOBAL_FLUSH, 0);
+		iommu_disable_protect_mem_regions(iommu);
+		iommu_enable_translation(iommu);
+	}
+
+	return 0;
+}
+
+static void iommu_flush_all(void)
+{
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu;
+
+	for_each_active_iommu(iommu, drhd) {
+		iommu->flush.flush_context(iommu, 0, 0, 0,
+						DMA_CCMD_GLOBAL_INVL, 0);
+		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
+						DMA_TLB_GLOBAL_FLUSH, 0);
+	}
+}
+
+static int iommu_suspend(struct sys_device *dev, pm_message_t state)
+{
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu = NULL;
+	unsigned long flag;
+
+	for_each_active_iommu(iommu, drhd) {
+		iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
+						 GFP_ATOMIC);
+		if (!iommu->iommu_state)
+			goto nomem;
+	}
+
+	iommu_flush_all();
+
+	for_each_active_iommu(iommu, drhd) {
+		iommu_disable_translation(iommu);
+
+		spin_lock_irqsave(&iommu->register_lock, flag);
+
+		iommu->iommu_state[SR_DMAR_FECTL_REG] =
+			readl(iommu->reg + DMAR_FECTL_REG);
+		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
+			readl(iommu->reg + DMAR_FEDATA_REG);
+		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
+			readl(iommu->reg + DMAR_FEADDR_REG);
+		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
+			readl(iommu->reg + DMAR_FEUADDR_REG);
+
+		spin_unlock_irqrestore(&iommu->register_lock, flag);
+	}
+	return 0;
+
+nomem:
+	for_each_active_iommu(iommu, drhd)
+		kfree(iommu->iommu_state);
+
+	return -ENOMEM;
+}
+
+static int iommu_resume(struct sys_device *dev)
+{
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu = NULL;
+	unsigned long flag;
+
+	if (init_iommu_hw()) {
+		WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
+		return -EIO;
+	}
+
+	for_each_active_iommu(iommu, drhd) {
+
+		spin_lock_irqsave(&iommu->register_lock, flag);
+
+		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
+			iommu->reg + DMAR_FECTL_REG);
+		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
+			iommu->reg + DMAR_FEDATA_REG);
+		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
+			iommu->reg + DMAR_FEADDR_REG);
+		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
+			iommu->reg + DMAR_FEUADDR_REG);
+
+		spin_unlock_irqrestore(&iommu->register_lock, flag);
+	}
+
+	for_each_active_iommu(iommu, drhd)
+		kfree(iommu->iommu_state);
+
+	return 0;
+}
+
+static struct sysdev_class iommu_sysclass = {
+	.name		= "iommu",
+	.resume		= iommu_resume,
+	.suspend	= iommu_suspend,
+};
+
+static struct sys_device device_iommu = {
+	.cls	= &iommu_sysclass,
+};
+
+static int __init init_iommu_sysfs(void)
+{
+	int error;
+
+	error = sysdev_class_register(&iommu_sysclass);
+	if (error)
+		return error;
+
+	error = sysdev_register(&device_iommu);
+	if (error)
+		sysdev_class_unregister(&iommu_sysclass);
+
+	return error;
+}
+
+#else
+static int __init init_iommu_sysfs(void)
+{
+	return 0;
+}
+#endif	/* CONFIG_PM */
+
 int __init intel_iommu_init(void)
 {
 	int ret = 0;
@@ -2632,6 +2807,7 @@ int __init intel_iommu_init(void)
 	init_timer(&unmap_timer);
 	force_iommu = 1;
 	dma_ops = &intel_dma_ops;
+	init_iommu_sysfs();
 
 	register_iommu(&intel_iommu_ops);
 
@@ -2648,6 +2824,7 @@ static int vm_domain_add_dev_info(struct dmar_domain *domain,
 	if (!info)
 		return -ENOMEM;
 
+	info->segment = pci_domain_nr(pdev->bus);
 	info->bus = pdev->bus->number;
 	info->devfn = pdev->devfn;
 	info->dev = pdev;
@@ -2677,15 +2854,15 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
 		parent = pdev->bus->self;
 		while (parent != tmp) {
 			iommu_detach_dev(iommu, parent->bus->number,
-				parent->devfn);
+					 parent->devfn);
 			parent = parent->bus->self;
 		}
 		if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
 			iommu_detach_dev(iommu,
 				tmp->subordinate->number, 0);
 		else /* this is a legacy PCI bridge */
-			iommu_detach_dev(iommu,
-				tmp->bus->number, tmp->devfn);
+			iommu_detach_dev(iommu, tmp->bus->number,
+					 tmp->devfn);
 	}
 }
 
@@ -2698,13 +2875,15 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
 	int found = 0;
 	struct list_head *entry, *tmp;
 
-	iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
+				pdev->devfn);
 	if (!iommu)
 		return;
 
 	spin_lock_irqsave(&device_domain_lock, flags);
 	list_for_each_safe(entry, tmp, &domain->devices) {
 		info = list_entry(entry, struct device_domain_info, link);
+		/* No need to compare PCI domain; it has to be the same */
 		if (info->bus == pdev->bus->number &&
 		    info->devfn == pdev->devfn) {
 			list_del(&info->link);
@@ -2729,7 +2908,8 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
 		 * owned by this domain, clear this iommu in iommu_bmp
 		 * update iommu count and coherency
 		 */
-		if (device_to_iommu(info->bus, info->devfn) == iommu)
+		if (iommu == device_to_iommu(info->segment, info->bus,
+					    info->devfn))
 			found = 1;
 	}
 
@@ -2762,7 +2942,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
 
 		spin_unlock_irqrestore(&device_domain_lock, flags1);
 
-		iommu = device_to_iommu(info->bus, info->devfn);
+		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
 		iommu_detach_dev(iommu, info->bus, info->devfn);
 		iommu_detach_dependent_devices(iommu, info->dev);
 
@@ -2950,7 +3130,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		}
 	}
 
-	iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
+				pdev->devfn);
 	if (!iommu)
 		return -ENODEV;
 

+ 82 - 2
drivers/pci/intr_remapping.c

@@ -9,6 +9,7 @@
 #include <asm/cpu.h>
 #include <linux/intel-iommu.h>
 #include "intr_remapping.h"
+#include <acpi/acpi.h>
 
 static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static int ir_ioapic_num;
@@ -415,12 +416,27 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
 
 	/* Set interrupt-remapping table pointer */
 	cmd = iommu->gcmd | DMA_GCMD_SIRTP;
+	iommu->gcmd |= DMA_GCMD_SIRTP;
 	writel(cmd, iommu->reg + DMAR_GCMD_REG);
 
 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 		      readl, (sts & DMA_GSTS_IRTPS), sts);
 	spin_unlock_irqrestore(&iommu->register_lock, flags);
 
+	if (mode == 0) {
+		spin_lock_irqsave(&iommu->register_lock, flags);
+
+		/* enable comaptiblity format interrupt pass through */
+		cmd = iommu->gcmd | DMA_GCMD_CFI;
+		iommu->gcmd |= DMA_GCMD_CFI;
+		writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+		IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+			      readl, (sts & DMA_GSTS_CFIS), sts);
+
+		spin_unlock_irqrestore(&iommu->register_lock, flags);
+	}
+
 	/*
 	 * global invalidation of interrupt entry cache before enabling
 	 * interrupt-remapping.
@@ -470,7 +486,7 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
 /*
  * Disable Interrupt Remapping.
  */
-static void disable_intr_remapping(struct intel_iommu *iommu)
+static void iommu_disable_intr_remapping(struct intel_iommu *iommu)
 {
 	unsigned long flags;
 	u32 sts;
@@ -478,6 +494,12 @@ static void disable_intr_remapping(struct intel_iommu *iommu)
 	if (!ecap_ir_support(iommu->ecap))
 		return;
 
+	/*
+	 * global invalidation of interrupt entry cache before disabling
+	 * interrupt-remapping.
+	 */
+	qi_global_iec(iommu);
+
 	spin_lock_irqsave(&iommu->register_lock, flags);
 
 	sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
@@ -502,6 +524,13 @@ int __init enable_intr_remapping(int eim)
 	for_each_drhd_unit(drhd) {
 		struct intel_iommu *iommu = drhd->iommu;
 
+		/*
+		 * If the queued invalidation is already initialized,
+		 * shouldn't disable it.
+		 */
+		if (iommu->qi)
+			continue;
+
 		/*
 		 * Clear previous faults.
 		 */
@@ -511,7 +540,7 @@ int __init enable_intr_remapping(int eim)
 		 * Disable intr remapping and queued invalidation, if already
 		 * enabled prior to OS handover.
 		 */
-		disable_intr_remapping(iommu);
+		iommu_disable_intr_remapping(iommu);
 
 		dmar_disable_qi(iommu);
 	}
@@ -639,3 +668,54 @@ int __init parse_ioapics_under_ir(void)
 
 	return ir_supported;
 }
+
+void disable_intr_remapping(void)
+{
+	struct dmar_drhd_unit *drhd;
+	struct intel_iommu *iommu = NULL;
+
+	/*
+	 * Disable Interrupt-remapping for all the DRHD's now.
+	 */
+	for_each_iommu(iommu, drhd) {
+		if (!ecap_ir_support(iommu->ecap))
+			continue;
+
+		iommu_disable_intr_remapping(iommu);
+	}
+}
+
+int reenable_intr_remapping(int eim)
+{
+	struct dmar_drhd_unit *drhd;
+	int setup = 0;
+	struct intel_iommu *iommu = NULL;
+
+	for_each_iommu(iommu, drhd)
+		if (iommu->qi)
+			dmar_reenable_qi(iommu);
+
+	/*
+	 * Setup Interrupt-remapping for all the DRHD's now.
+	 */
+	for_each_iommu(iommu, drhd) {
+		if (!ecap_ir_support(iommu->ecap))
+			continue;
+
+		/* Set up interrupt remapping for iommu.*/
+		iommu_set_intr_remapping(iommu, eim);
+		setup = 1;
+	}
+
+	if (!setup)
+		goto error;
+
+	return 0;
+
+error:
+	/*
+	 * handle error condition gracefully here!
+	 */
+	return -1;
+}
+

+ 11 - 0
include/linux/dmar.h

@@ -34,6 +34,7 @@ struct dmar_drhd_unit {
 	u64	reg_base_addr;		/* register base address*/
 	struct	pci_dev **devices; 	/* target device array	*/
 	int	devices_cnt;		/* target device count	*/
+	u16	segment;		/* PCI domain		*/
 	u8	ignored:1; 		/* ignore drhd		*/
 	u8	include_all:1;
 	struct intel_iommu *iommu;
@@ -44,6 +45,14 @@ extern struct list_head dmar_drhd_units;
 #define for_each_drhd_unit(drhd) \
 	list_for_each_entry(drhd, &dmar_drhd_units, list)
 
+#define for_each_active_iommu(i, drhd)					\
+	list_for_each_entry(drhd, &dmar_drhd_units, list)		\
+		if (i=drhd->iommu, drhd->ignored) {} else
+
+#define for_each_iommu(i, drhd)						\
+	list_for_each_entry(drhd, &dmar_drhd_units, list)		\
+		if (i=drhd->iommu, 0) {} else 
+
 extern int dmar_table_init(void);
 extern int dmar_dev_scope_init(void);
 
@@ -100,6 +109,8 @@ struct irte {
 #ifdef CONFIG_INTR_REMAP
 extern int intr_remapping_enabled;
 extern int enable_intr_remapping(int);
+extern void disable_intr_remapping(void);
+extern int reenable_intr_remapping(int);
 
 extern int get_irte(int irq, struct irte *entry);
 extern int modify_irte(int irq, struct irte *irte_modified);

+ 13 - 0
include/linux/intel-iommu.h

@@ -164,6 +164,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_GCMD_QIE (((u32)1) << 26)
 #define DMA_GCMD_SIRTP (((u32)1) << 24)
 #define DMA_GCMD_IRE (((u32) 1) << 25)
+#define DMA_GCMD_CFI (((u32) 1) << 23)
 
 /* GSTS_REG */
 #define DMA_GSTS_TES (((u32)1) << 31)
@@ -174,6 +175,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_GSTS_QIES (((u32)1) << 26)
 #define DMA_GSTS_IRTPS (((u32)1) << 24)
 #define DMA_GSTS_IRES (((u32)1) << 25)
+#define DMA_GSTS_CFIS (((u32)1) << 23)
 
 /* CCMD_REG */
 #define DMA_CCMD_ICC (((u64)1) << 63)
@@ -284,6 +286,14 @@ struct iommu_flush {
 		unsigned int size_order, u64 type, int non_present_entry_flush);
 };
 
+enum {
+	SR_DMAR_FECTL_REG,
+	SR_DMAR_FEDATA_REG,
+	SR_DMAR_FEADDR_REG,
+	SR_DMAR_FEUADDR_REG,
+	MAX_SR_DMAR_REGS
+};
+
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64		cap;
@@ -304,6 +314,8 @@ struct intel_iommu {
 	struct iommu_flush flush;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
+	u32 *iommu_state; /* Store iommu states between suspend and resume.*/
+
 #ifdef CONFIG_INTR_REMAP
 	struct ir_table *ir_table;	/* Interrupt remapping info */
 #endif
@@ -322,6 +334,7 @@ extern int alloc_iommu(struct dmar_drhd_unit *drhd);
 extern void free_iommu(struct intel_iommu *iommu);
 extern int dmar_enable_qi(struct intel_iommu *iommu);
 extern void dmar_disable_qi(struct intel_iommu *iommu);
+extern int dmar_reenable_qi(struct intel_iommu *iommu);
 extern void qi_global_iec(struct intel_iommu *iommu);
 
 extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,