Pārlūkot izejas kodu

Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6

* 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6: (88 commits)
  PCI: fix HT MSI mapping fix
  PCI: don't enable too much HT MSI mapping
  x86/PCI: make pci=lastbus=255 work when acpi is on
  PCI: save and restore PCIe 2.0 registers
  PCI: update fakephp for bus_id removal
  PCI: fix kernel oops on bridge removal
  PCI: fix conflict between SR-IOV and config space sizing
  powerpc/PCI: include pci.h in powerpc MSI implementation
  PCI Hotplug: schedule fakephp for feature removal
  PCI Hotplug: rename legacy_fakephp to fakephp
  PCI Hotplug: restore fakephp interface with complete reimplementation
  PCI: Introduce /sys/bus/pci/devices/.../rescan
  PCI: Introduce /sys/bus/pci/devices/.../remove
  PCI: Introduce /sys/bus/pci/rescan
  PCI: Introduce pci_rescan_bus()
  PCI: do not enable bridges more than once
  PCI: do not initialize bridges more than once
  PCI: always scan child buses
  PCI: pci_scan_slot() returns newly found devices
  PCI: don't scan existing devices
  ...

Fix trivial append-only conflict in Documentation/feature-removal-schedule.txt
Linus Torvalds 16 gadi atpakaļ
vecāks
revīzija
e76e5b2c66
62 mainītis faili ar 3641 papildinājumiem un 1902 dzēšanām
  1. 70 0
      Documentation/ABI/testing/sysfs-bus-pci
  2. 1 0
      Documentation/DocBook/kernel-api.tmpl
  3. 332 482
      Documentation/PCI/MSI-HOWTO.txt
  4. 99 0
      Documentation/PCI/pci-iov-howto.txt
  5. 32 0
      Documentation/feature-removal-schedule.txt
  6. 10 0
      Documentation/filesystems/sysfs-pci.txt
  7. 11 0
      Documentation/kernel-parameters.txt
  8. 14 0
      arch/alpha/include/asm/pci.h
  9. 1 1
      arch/alpha/kernel/Makefile
  10. 366 0
      arch/alpha/kernel/pci-sysfs.c
  11. 4 0
      arch/powerpc/include/asm/pci.h
  12. 5 0
      arch/powerpc/kernel/msi.c
  13. 3 0
      arch/x86/include/asm/pci.h
  14. 4 0
      arch/x86/kernel/apic/io_apic.c
  15. 1 2
      arch/x86/kernel/pci-dma.c
  16. 11 8
      arch/x86/pci/early.c
  17. 0 20
      arch/x86/pci/fixup.c
  18. 1 2
      arch/x86/pci/legacy.c
  19. 173 54
      arch/x86/pci/mmconfig-shared.c
  20. 11 6
      arch/x86/pci/mmconfig_64.c
  21. 178 2
      drivers/acpi/pci_root.c
  22. 10 0
      drivers/pci/Kconfig
  23. 2 0
      drivers/pci/Makefile
  24. 5 3
      drivers/pci/bus.c
  25. 21 37
      drivers/pci/hotplug/acpi_pcihp.c
  26. 106 338
      drivers/pci/hotplug/fakephp.c
  27. 7 6
      drivers/pci/hotplug/pciehp.h
  28. 6 15
      drivers/pci/hotplug/pciehp_acpi.c
  29. 5 13
      drivers/pci/hotplug/pciehp_core.c
  30. 13 21
      drivers/pci/hotplug/pciehp_hpc.c
  31. 5 5
      drivers/pci/hotplug/shpchp.h
  32. 1 1
      drivers/pci/hotplug/shpchp_pci.c
  33. 1 1
      drivers/pci/intel-iommu.c
  34. 680 0
      drivers/pci/iov.c
  35. 212 214
      drivers/pci/msi.c
  36. 0 6
      drivers/pci/msi.h
  37. 0 215
      drivers/pci/pci-acpi.c
  38. 79 2
      drivers/pci/pci-driver.c
  39. 120 4
      drivers/pci/pci-sysfs.c
  40. 171 22
      drivers/pci/pci.c
  41. 65 0
      drivers/pci/pci.h
  42. 4 24
      drivers/pci/pcie/aer/aerdrv.c
  43. 1 1
      drivers/pci/pcie/aer/aerdrv_acpi.c
  44. 5 5
      drivers/pci/pcie/aer/aerdrv_core.c
  45. 8 6
      drivers/pci/pcie/portdrv.h
  46. 10 8
      drivers/pci/pcie/portdrv_bus.c
  47. 248 131
      drivers/pci/pcie/portdrv_core.c
  48. 19 31
      drivers/pci/pcie/portdrv_pci.c
  49. 130 80
      drivers/pci/probe.c
  50. 182 39
      drivers/pci/quirks.c
  51. 4 0
      drivers/pci/remove.c
  52. 1 1
      drivers/pci/search.c
  53. 5 2
      drivers/pci/setup-bus.c
  54. 15 0
      drivers/pci/setup-res.c
  55. 8 10
      drivers/pci/slot.c
  56. 34 0
      include/linux/acpi.h
  57. 8 5
      include/linux/msi.h
  58. 10 57
      include/linux/pci-acpi.h
  59. 57 4
      include/linux/pci.h
  60. 1 0
      include/linux/pci_ids.h
  61. 36 1
      include/linux/pci_regs.h
  62. 19 17
      include/linux/pcieport_if.h

+ 70 - 0
Documentation/ABI/testing/sysfs-bus-pci

@@ -41,6 +41,49 @@ Description:
 		for the device and attempt to bind to it.  For example:
 		for the device and attempt to bind to it.  For example:
 		# echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id
 		# echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id
 
 
+What:		/sys/bus/pci/drivers/.../remove_id
+Date:		February 2009
+Contact:	Chris Wright <chrisw@sous-sol.org>
+Description:
+		Writing a device ID to this file will remove an ID
+		that was dynamically added via the new_id sysfs entry.
+		The format for the device ID is:
+		VVVV DDDD SVVV SDDD CCCC MMMM.	That is Vendor ID, Device
+		ID, Subsystem Vendor ID, Subsystem Device ID, Class,
+		and Class Mask.  The Vendor ID and Device ID fields are
+		required, the rest are optional.  After successfully
+		removing an ID, the driver will no longer support the
+		device.  This is useful to ensure auto probing won't
+		match the driver to the device.  For example:
+		# echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id
+
+What:		/sys/bus/pci/rescan
+Date:		January 2009
+Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+		Writing a non-zero value to this attribute will
+		force a rescan of all PCI buses in the system, and
+		re-discover previously removed devices.
+		Depends on CONFIG_HOTPLUG.
+
+What:		/sys/bus/pci/devices/.../remove
+Date:		January 2009
+Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+		Writing a non-zero value to this attribute will
+		hot-remove the PCI device and any of its children.
+		Depends on CONFIG_HOTPLUG.
+
+What:		/sys/bus/pci/devices/.../rescan
+Date:		January 2009
+Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+		Writing a non-zero value to this attribute will
+		force a rescan of the device's parent bus and all
+		child buses, and re-discover devices removed earlier
+		from this part of the device tree.
+		Depends on CONFIG_HOTPLUG.
+
 What:		/sys/bus/pci/devices/.../vpd
 What:		/sys/bus/pci/devices/.../vpd
 Date:		February 2008
 Date:		February 2008
 Contact:	Ben Hutchings <bhutchings@solarflare.com>
 Contact:	Ben Hutchings <bhutchings@solarflare.com>
@@ -52,3 +95,30 @@ Description:
 		that some devices may have malformatted data.  If the
 		that some devices may have malformatted data.  If the
 		underlying VPD has a writable section then the
 		underlying VPD has a writable section then the
 		corresponding section of this file will be writable.
 		corresponding section of this file will be writable.
+
+What:		/sys/bus/pci/devices/.../virtfnN
+Date:		March 2009
+Contact:	Yu Zhao <yu.zhao@intel.com>
+Description:
+		This symbolic link appears when hardware supports the SR-IOV
+		capability and the Physical Function driver has enabled it.
+		The symbolic link points to the PCI device sysfs entry of the
+		Virtual Function whose index is N (0...MaxVFs-1).
+
+What:		/sys/bus/pci/devices/.../dep_link
+Date:		March 2009
+Contact:	Yu Zhao <yu.zhao@intel.com>
+Description:
+		This symbolic link appears when hardware supports the SR-IOV
+		capability and the Physical Function driver has enabled it,
+		and this device has vendor specific dependencies with others.
+		The symbolic link points to the PCI device sysfs entry of
+		Physical Function this device depends on.
+
+What:		/sys/bus/pci/devices/.../physfn
+Date:		March 2009
+Contact:	Yu Zhao <yu.zhao@intel.com>
+Description:
+		This symbolic link appears when a device is a Virtual Function.
+		The symbolic link points to the PCI device sysfs entry of the
+		Physical Function this device associates with.

+ 1 - 0
Documentation/DocBook/kernel-api.tmpl

@@ -199,6 +199,7 @@ X!Edrivers/pci/hotplug.c
 -->
 -->
 !Edrivers/pci/probe.c
 !Edrivers/pci/probe.c
 !Edrivers/pci/rom.c
 !Edrivers/pci/rom.c
+!Edrivers/pci/iov.c
      </sect1>
      </sect1>
      <sect1><title>PCI Hotplug Support Library</title>
      <sect1><title>PCI Hotplug Support Library</title>
 !Edrivers/pci/hotplug/pci_hotplug_core.c
 !Edrivers/pci/hotplug/pci_hotplug_core.c

+ 332 - 482
Documentation/PCI/MSI-HOWTO.txt

@@ -4,506 +4,356 @@
 	Revised Feb 12, 2004 by Martine Silbermann
 	Revised Feb 12, 2004 by Martine Silbermann
 		email: Martine.Silbermann@hp.com
 		email: Martine.Silbermann@hp.com
 	Revised Jun 25, 2004 by Tom L Nguyen
 	Revised Jun 25, 2004 by Tom L Nguyen
+	Revised Jul  9, 2008 by Matthew Wilcox <willy@linux.intel.com>
+		Copyright 2003, 2008 Intel Corporation
 
 
 1. About this guide
 1. About this guide
 
 
-This guide describes the basics of Message Signaled Interrupts (MSI),
-the advantages of using MSI over traditional interrupt mechanisms,
-and how to enable your driver to use MSI or MSI-X. Also included is
-a Frequently Asked Questions (FAQ) section.
-
-1.1 Terminology
-
-PCI devices can be single-function or multi-function.  In either case,
-when this text talks about enabling or disabling MSI on a "device
-function," it is referring to one specific PCI device and function and
-not to all functions on a PCI device (unless the PCI device has only
-one function).
-
-2. Copyright 2003 Intel Corporation
-
-3. What is MSI/MSI-X?
-
-Message Signaled Interrupt (MSI), as described in the PCI Local Bus
-Specification Revision 2.3 or later, is an optional feature, and a
-required feature for PCI Express devices. MSI enables a device function
-to request service by sending an Inbound Memory Write on its PCI bus to
-the FSB as a Message Signal Interrupt transaction. Because MSI is
-generated in the form of a Memory Write, all transaction conditions,
-such as a Retry, Master-Abort, Target-Abort or normal completion, are
-supported.
-
-A PCI device that supports MSI must also support pin IRQ assertion
-interrupt mechanism to provide backward compatibility for systems that
-do not support MSI. In systems which support MSI, the bus driver is
-responsible for initializing the message address and message data of
-the device function's MSI/MSI-X capability structure during device
-initial configuration.
-
-An MSI capable device function indicates MSI support by implementing
-the MSI/MSI-X capability structure in its PCI capability list. The
-device function may implement both the MSI capability structure and
-the MSI-X capability structure; however, the bus driver should not
-enable both.
-
-The MSI capability structure contains Message Control register,
-Message Address register and Message Data register. These registers
-provide the bus driver control over MSI. The Message Control register
-indicates the MSI capability supported by the device. The Message
-Address register specifies the target address and the Message Data
-register specifies the characteristics of the message. To request
-service, the device function writes the content of the Message Data
-register to the target address. The device and its software driver
-are prohibited from writing to these registers.
-
-The MSI-X capability structure is an optional extension to MSI. It
-uses an independent and separate capability structure. There are
-some key advantages to implementing the MSI-X capability structure
-over the MSI capability structure as described below.
-
-	- Support a larger maximum number of vectors per function.
-
-	- Provide the ability for system software to configure
-	each vector with an independent message address and message
-	data, specified by a table that resides in Memory Space.
-
-        - MSI and MSI-X both support per-vector masking. Per-vector
-	masking is an optional extension of MSI but a required
-	feature for MSI-X. Per-vector masking provides the kernel the
-	ability to mask/unmask a single MSI while running its
-	interrupt service routine. If per-vector masking is
-	not supported, then the device driver should provide the
-	hardware/software synchronization to ensure that the device
-	generates MSI when the driver wants it to do so.
-
-4. Why use MSI?
-
-As a benefit to the simplification of board design, MSI allows board
-designers to remove out-of-band interrupt routing. MSI is another
-step towards a legacy-free environment.
-
-Due to increasing pressure on chipset and processor packages to
-reduce pin count, the need for interrupt pins is expected to
-diminish over time. Devices, due to pin constraints, may implement
-messages to increase performance.
-
-PCI Express endpoints uses INTx emulation (in-band messages) instead
-of IRQ pin assertion. Using INTx emulation requires interrupt
-sharing among devices connected to the same node (PCI bridge) while
-MSI is unique (non-shared) and does not require BIOS configuration
-support. As a result, the PCI Express technology requires MSI
-support for better interrupt performance.
-
-Using MSI enables the device functions to support two or more
-vectors, which can be configured to target different CPUs to
-increase scalability.
-
-5. Configuring a driver to use MSI/MSI-X
-
-By default, the kernel will not enable MSI/MSI-X on all devices that
-support this capability. The CONFIG_PCI_MSI kernel option
-must be selected to enable MSI/MSI-X support.
-
-5.1 Including MSI/MSI-X support into the kernel
-
-To allow MSI/MSI-X capable device drivers to selectively enable
-MSI/MSI-X (using pci_enable_msi()/pci_enable_msix() as described
-below), the VECTOR based scheme needs to be enabled by setting
-CONFIG_PCI_MSI during kernel config.
-
-Since the target of the inbound message is the local APIC, providing
-CONFIG_X86_LOCAL_APIC must be enabled as well as CONFIG_PCI_MSI.
-
-5.2 Configuring for MSI support
-
-Due to the non-contiguous fashion in vector assignment of the
-existing Linux kernel, this version does not support multiple
-messages regardless of a device function is capable of supporting
-more than one vector. To enable MSI on a device function's MSI
-capability structure requires a device driver to call the function
-pci_enable_msi() explicitly.
-
-5.2.1 API pci_enable_msi
+This guide describes the basics of Message Signaled Interrupts (MSIs),
+the advantages of using MSI over traditional interrupt mechanisms, how
+to change your driver to use MSI or MSI-X and some basic diagnostics to
+try if a device doesn't support MSIs.
 
 
-int pci_enable_msi(struct pci_dev *dev)
 
 
-With this new API, a device driver that wants to have MSI
-enabled on its device function must call this API to enable MSI.
-A successful call will initialize the MSI capability structure
-with ONE vector, regardless of whether a device function is
-capable of supporting multiple messages. This vector replaces the
-pre-assigned dev->irq with a new MSI vector. To avoid a conflict
-of the new assigned vector with existing pre-assigned vector requires
-a device driver to call this API before calling request_irq().
+2. What are MSIs?
 
 
-5.2.2 API pci_disable_msi
+A Message Signaled Interrupt is a write from the device to a special
+address which causes an interrupt to be received by the CPU.
 
 
-void pci_disable_msi(struct pci_dev *dev)
+The MSI capability was first specified in PCI 2.2 and was later enhanced
+in PCI 3.0 to allow each interrupt to be masked individually.  The MSI-X
+capability was also introduced with PCI 3.0.  It supports more interrupts
+per device than MSI and allows interrupts to be independently configured.
 
 
-This API should always be used to undo the effect of pci_enable_msi()
-when a device driver is unloading. This API restores dev->irq with
-the pre-assigned IOAPIC vector and switches a device's interrupt
-mode to PCI pin-irq assertion/INTx emulation mode.
-
-Note that a device driver should always call free_irq() on the MSI vector
-that it has done request_irq() on before calling this API. Failure to do
-so results in a BUG_ON() and a device will be left with MSI enabled and
-leaks its vector.
-
-5.2.3 MSI mode vs. legacy mode diagram
-
-The below diagram shows the events which switch the interrupt
-mode on the MSI-capable device function between MSI mode and
-PIN-IRQ assertion mode.
-
-	 ------------   pci_enable_msi 	 ------------------------
-	|	     | <===============	| 			 |
-	| MSI MODE   |	  	     	| PIN-IRQ ASSERTION MODE |
-	| 	     | ===============>	|			 |
- 	 ------------	pci_disable_msi  ------------------------
-
-
-Figure 1. MSI Mode vs. Legacy Mode
-
-In Figure 1, a device operates by default in legacy mode. Legacy
-in this context means PCI pin-irq assertion or PCI-Express INTx
-emulation. A successful MSI request (using pci_enable_msi()) switches
-a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector
-stored in dev->irq will be saved by the PCI subsystem and a new
-assigned MSI vector will replace dev->irq.
-
-To return back to its default mode, a device driver should always call
-pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a
-device driver should always call free_irq() on the MSI vector it has
-done request_irq() on before calling pci_disable_msi(). Failure to do
-so results in a BUG_ON() and a device will be left with MSI enabled and
-leaks its vector. Otherwise, the PCI subsystem restores a device's
-dev->irq with a pre-assigned IOAPIC vector and marks the released
-MSI vector as unused.
-
-Once being marked as unused, there is no guarantee that the PCI
-subsystem will reserve this MSI vector for a device. Depending on
-the availability of current PCI vector resources and the number of
-MSI/MSI-X requests from other drivers, this MSI may be re-assigned.
-
-For the case where the PCI subsystem re-assigns this MSI vector to
-another driver, a request to switch back to MSI mode may result
-in being assigned a different MSI vector or a failure if no more
-vectors are available.
-
-5.3 Configuring for MSI-X support
-
-Due to the ability of the system software to configure each vector of
-the MSI-X capability structure with an independent message address
-and message data, the non-contiguous fashion in vector assignment of
-the existing Linux kernel has no impact on supporting multiple
-messages on an MSI-X capable device functions. To enable MSI-X on
-a device function's MSI-X capability structure requires its device
-driver to call the function pci_enable_msix() explicitly.
-
-The function pci_enable_msix(), once invoked, enables either
-all or nothing, depending on the current availability of PCI vector
-resources. If the PCI vector resources are available for the number
-of vectors requested by a device driver, this function will configure
-the MSI-X table of the MSI-X capability structure of a device with
-requested messages. To emphasize this reason, for example, a device
-may be capable for supporting the maximum of 32 vectors while its
-software driver usually may request 4 vectors. It is recommended
-that the device driver should call this function once during the
-initialization phase of the device driver.
-
-Unlike the function pci_enable_msi(), the function pci_enable_msix()
-does not replace the pre-assigned IOAPIC dev->irq with a new MSI
-vector because the PCI subsystem writes the 1:1 vector-to-entry mapping
-into the field vector of each element contained in a second argument.
-Note that the pre-assigned IOAPIC dev->irq is valid only if the device
-operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at
-using dev->irq by the device driver to request for interrupt service
-may result in unpredictable behavior.
-
-For each MSI-X vector granted, a device driver is responsible for calling
-other functions like request_irq(), enable_irq(), etc. to enable
-this vector with its corresponding interrupt service handler. It is
-a device driver's choice to assign all vectors with the same
-interrupt service handler or each vector with a unique interrupt
-service handler.
-
-5.3.1 Handling MMIO address space of MSI-X Table
-
-The PCI 3.0 specification has implementation notes that MMIO address
-space for a device's MSI-X structure should be isolated so that the
-software system can set different pages for controlling accesses to the
-MSI-X structure. The implementation of MSI support requires the PCI
-subsystem, not a device driver, to maintain full control of the MSI-X
-table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
-table/MSI-X PBA.  A device driver should not access the MMIO address
-space of the MSI-X table/MSI-X PBA.
-
-5.3.2 API pci_enable_msix
+Devices may support both MSI and MSI-X, but only one can be enabled at
+a time.
 
 
-int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
 
 
-This API enables a device driver to request the PCI subsystem
-to enable MSI-X messages on its hardware device. Depending on
-the availability of PCI vectors resources, the PCI subsystem enables
-either all or none of the requested vectors.
+3. Why use MSIs?
+
+There are three reasons why using MSIs can give an advantage over
+traditional pin-based interrupts.
+
+Pin-based PCI interrupts are often shared amongst several devices.
+To support this, the kernel must call each interrupt handler associated
+with an interrupt, which leads to reduced performance for the system as
+a whole.  MSIs are never shared, so this problem cannot arise.
+
+When a device writes data to memory, then raises a pin-based interrupt,
+it is possible that the interrupt may arrive before all the data has
+arrived in memory (this becomes more likely with devices behind PCI-PCI
+bridges).  In order to ensure that all the data has arrived in memory,
+the interrupt handler must read a register on the device which raised
+the interrupt.  PCI transaction ordering rules require that all the data
+arrives in memory before the value can be returned from the register.
+Using MSIs avoids this problem as the interrupt-generating write cannot
+pass the data writes, so by the time the interrupt is raised, the driver
+knows that all the data has arrived in memory.
+
+PCI devices can only support a single pin-based interrupt per function.
+Often drivers have to query the device to find out what event has
+occurred, slowing down interrupt handling for the common case.  With
+MSIs, a device can support more interrupts, allowing each interrupt
+to be specialised to a different purpose.  One possible design gives
+infrequent conditions (such as errors) their own interrupt which allows
+the driver to handle the normal interrupt handling path more efficiently.
+Other possible designs include giving one interrupt to each packet queue
+in a network card or each port in a storage controller.
+
+
+4. How to use MSIs
+
+PCI devices are initialised to use pin-based interrupts.  The device
+driver has to set up the device to use MSI or MSI-X.  Not all machines
+support MSIs correctly, and for those machines, the APIs described below
+will simply fail and the device will continue to use pin-based interrupts.
+
+4.1 Include kernel support for MSIs
+
+To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
+option enabled.  This option is only available on some architectures,
+and it may depend on some other options also being set.  For example,
+on x86, you must also enable X86_UP_APIC or SMP in order to see the
+CONFIG_PCI_MSI option.
+
+4.2 Using MSI
+
+Most of the hard work is done for the driver in the PCI layer.  It simply
+has to request that the PCI layer set up the MSI capability for this
+device.
+
+4.2.1 pci_enable_msi
+
+int pci_enable_msi(struct pci_dev *dev)
+
+A successful call will allocate ONE interrupt to the device, regardless
+of how many MSIs the device supports.  The device will be switched from
+pin-based interrupt mode to MSI mode.  The dev->irq number is changed
+to a new number which represents the message signaled interrupt.
+This function should be called before the driver calls request_irq()
+since enabling MSIs disables the pin-based IRQ and the driver will not
+receive interrupts on the old interrupt.
+
+4.2.2 pci_enable_msi_block
+
+int pci_enable_msi_block(struct pci_dev *dev, int count)
+
+This variation on the above call allows a device driver to request multiple
+MSIs.  The MSI specification only allows interrupts to be allocated in
+powers of two, up to a maximum of 2^5 (32).
+
+If this function returns 0, it has succeeded in allocating at least as many
+interrupts as the driver requested (it may have allocated more in order
+to satisfy the power-of-two requirement).  In this case, the function
+enables MSI on this device and updates dev->irq to be the lowest of
+the new interrupts assigned to it.  The other interrupts assigned to
+the device are in the range dev->irq to dev->irq + count - 1.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to request any more MSI interrupts for
+this device.  If this function returns a positive number, it will be
+less than 'count' and indicate the number of interrupts that could have
+been allocated.  In neither case will the irq value have been
+updated, nor will the device have been switched into MSI mode.
+
+The device driver must decide what action to take if
+pci_enable_msi_block() returns a value less than the number asked for.
+Some devices can make use of fewer interrupts than the maximum they
+request; in this case the driver should call pci_enable_msi_block()
+again.  Note that it is not guaranteed to succeed, even when the
+'count' has been reduced to the value returned from a previous call to
+pci_enable_msi_block().  This is because there are multiple constraints
+on the number of vectors that can be allocated; pci_enable_msi_block()
+will return as soon as it finds any constraint that doesn't allow the
+call to succeed.
+
+4.2.3 pci_disable_msi
+
+void pci_disable_msi(struct pci_dev *dev)
 
 
-Argument 'dev' points to the device (pci_dev) structure.
+This function should be used to undo the effect of pci_enable_msi() or
+pci_enable_msi_block().  Calling it restores dev->irq to the pin-based
+interrupt number and frees the previously allocated message signaled
+interrupt(s).  The interrupt may subsequently be assigned to another
+device, so drivers should not cache the value of dev->irq.
 
 
-Argument 'entries' is a pointer to an array of msix_entry structs.
-The number of entries is indicated in argument 'nvec'.
-struct msix_entry is defined in /driver/pci/msi.h:
+A device driver must always call free_irq() on the interrupt(s)
+for which it has called request_irq() before calling this function.
+Failure to do so will result in a BUG_ON(), the device will be left with
+MSI enabled and will leak its vector.
+
+4.3 Using MSI-X
+
+The MSI-X capability is much more flexible than the MSI capability.
+It supports up to 2048 interrupts, each of which can be controlled
+independently.  To support this flexibility, drivers must use an array of
+`struct msix_entry':
 
 
 struct msix_entry {
 struct msix_entry {
 	u16 	vector; /* kernel uses to write alloc vector */
 	u16 	vector; /* kernel uses to write alloc vector */
 	u16	entry; /* driver uses to specify entry */
 	u16	entry; /* driver uses to specify entry */
 };
 };
 
 
-A device driver is responsible for initializing the field 'entry' of
-each element with a unique entry supported by MSI-X table. Otherwise,
--EINVAL will be returned as a result. A successful return of zero
-indicates the PCI subsystem completed initializing each of the requested
-entries of the MSI-X table with message address and message data.
-Last but not least, the PCI subsystem will write the 1:1
-vector-to-entry mapping into the field 'vector' of each element. A
-device driver is responsible for keeping track of allocated MSI-X
-vectors in its internal data structure.
-
-A return of zero indicates that the number of MSI-X vectors was
-successfully allocated. A return of greater than zero indicates
-MSI-X vector shortage. Or a return of less than zero indicates
-a failure. This failure may be a result of duplicate entries
-specified in second argument, or a result of no available vector,
-or a result of failing to initialize MSI-X table entries.
-
-5.3.3 API pci_disable_msix
+This allows for the device to use these interrupts in a sparse fashion;
+for example it could use interrupts 3 and 1027 and allocate only a
+two-element array.  The driver is expected to fill in the 'entry' value
+in each element of the array to indicate which entries it wants the kernel
+to assign interrupts for.  It is invalid to fill in two entries with the
+same number.
+
+4.3.1 pci_enable_msix
+
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+
+Calling this function asks the PCI subsystem to allocate 'nvec' MSIs.
+The 'entries' argument is a pointer to an array of msix_entry structs
+which should be at least 'nvec' entries in size.  On success, the
+function will return 0 and the device will have been switched into
+MSI-X interrupt mode.  The 'vector' elements in each entry will have
+been filled in with the interrupt number.  The driver should then call
+request_irq() for each 'vector' that it decides to use.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to allocate any more MSI-X interrupts for
+this device.  If it returns a positive number, it indicates the maximum
+number of interrupt vectors that could have been allocated. See example
+below.
+
+This function, in contrast with pci_enable_msi(), does not adjust
+dev->irq.  The device will not generate interrupts for this interrupt
+number once MSI-X is enabled.  The device driver is responsible for
+keeping track of the interrupts assigned to the MSI-X vectors so it can
+free them again later.
+
+Device drivers should normally call this function once per device
+during the initialization phase.
+
+It is ideal if drivers can cope with a variable number of MSI-X interrupts,
+there are many reasons why the platform may not be able to provide the
+exact number a driver asks for.
+
+A request loop to achieve that might look like:
+
+static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
+{
+	while (nvec >= FOO_DRIVER_MINIMUM_NVEC) {
+		rc = pci_enable_msix(adapter->pdev,
+				     adapter->msix_entries, nvec);
+		if (rc > 0)
+			nvec = rc;
+		else
+			return rc;
+	}
+
+	return -ENOSPC;
+}
+
+4.3.2 pci_disable_msix
 
 
 void pci_disable_msix(struct pci_dev *dev)
 void pci_disable_msix(struct pci_dev *dev)
 
 
-This API should always be used to undo the effect of pci_enable_msix()
-when a device driver is unloading. Note that a device driver should
-always call free_irq() on all MSI-X vectors it has done request_irq()
-on before calling this API. Failure to do so results in a BUG_ON() and
-a device will be left with MSI-X enabled and leaks its vectors.
-
-5.3.4 MSI-X mode vs. legacy mode diagram
-
-The below diagram shows the events which switch the interrupt
-mode on the MSI-X capable device function between MSI-X mode and
-PIN-IRQ assertion mode (legacy).
-
-	 ------------   pci_enable_msix(,,n) ------------------------
-	|	     | <===============	    | 			     |
-	| MSI-X MODE |	  	     	    | PIN-IRQ ASSERTION MODE |
-	| 	     | ===============>	    |			     |
- 	 ------------	pci_disable_msix     ------------------------
-
-Figure 2. MSI-X Mode vs. Legacy Mode
-
-In Figure 2, a device operates by default in legacy mode. A
-successful MSI-X request (using pci_enable_msix()) switches a
-device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector
-stored in dev->irq will be saved by the PCI subsystem; however,
-unlike MSI mode, the PCI subsystem will not replace dev->irq with
-assigned MSI-X vector because the PCI subsystem already writes the 1:1
-vector-to-entry mapping into the field 'vector' of each element
-specified in second argument.
-
-To return back to its default mode, a device driver should always call
-pci_disable_msix() to undo the effect of pci_enable_msix(). Note that
-a device driver should always call free_irq() on all MSI-X vectors it
-has done request_irq() on before calling pci_disable_msix(). Failure
-to do so results in a BUG_ON() and a device will be left with MSI-X
-enabled and leaks its vectors. Otherwise, the PCI subsystem switches a
-device function's interrupt mode from MSI-X mode to legacy mode and
-marks all allocated MSI-X vectors as unused.
-
-Once being marked as unused, there is no guarantee that the PCI
-subsystem will reserve these MSI-X vectors for a device. Depending on
-the availability of current PCI vector resources and the number of
-MSI/MSI-X requests from other drivers, these MSI-X vectors may be
-re-assigned.
-
-For the case where the PCI subsystem re-assigned these MSI-X vectors
-to other drivers, a request to switch back to MSI-X mode may result
-being assigned with another set of MSI-X vectors or a failure if no
-more vectors are available.
-
-5.4 Handling function implementing both MSI and MSI-X capabilities
-
-For the case where a function implements both MSI and MSI-X
-capabilities, the PCI subsystem enables a device to run either in MSI
-mode or MSI-X mode but not both. A device driver determines whether it
-wants MSI or MSI-X enabled on its hardware device. Once a device
-driver requests for MSI, for example, it is prohibited from requesting
-MSI-X; in other words, a device driver is not permitted to ping-pong
-between MSI mod MSI-X mode during a run-time.
-
-5.5 Hardware requirements for MSI/MSI-X support
-
-MSI/MSI-X support requires support from both system hardware and
-individual hardware device functions.
-
-5.5.1 Required x86 hardware support
-
-Since the target of MSI address is the local APIC CPU, enabling
-MSI/MSI-X support in the Linux kernel is dependent on whether existing
-system hardware supports local APIC. Users should verify that their
-system supports local APIC operation by testing that it runs when
-CONFIG_X86_LOCAL_APIC=y.
-
-In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set;
-however, in UP environment, users must manually set
-CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting
-CONFIG_PCI_MSI enables the VECTOR based scheme and the option for
-MSI-capable device drivers to selectively enable MSI/MSI-X.
-
-Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X
-vector is allocated new during runtime and MSI/MSI-X support does not
-depend on BIOS support. This key independency enables MSI/MSI-X
-support on future IOxAPIC free platforms.
-
-5.5.2 Device hardware support
-
-The hardware device function supports MSI by indicating the
-MSI/MSI-X capability structure on its PCI capability list. By
-default, this capability structure will not be initialized by
-the kernel to enable MSI during the system boot. In other words,
-the device function is running on its default pin assertion mode.
-Note that in many cases the hardware supporting MSI have bugs,
-which may result in system hangs. The software driver of specific
-MSI-capable hardware is responsible for deciding whether to call
-pci_enable_msi or not. A return of zero indicates the kernel
-successfully initialized the MSI/MSI-X capability structure of the
-device function. The device function is now running on MSI/MSI-X mode.
-
-5.6 How to tell whether MSI/MSI-X is enabled on device function
-
-At the driver level, a return of zero from the function call of
-pci_enable_msi()/pci_enable_msix() indicates to a device driver that
-its device function is initialized successfully and ready to run in
-MSI/MSI-X mode.
-
-At the user level, users can use the command 'cat /proc/interrupts'
-to display the vectors allocated for devices and their interrupt
-MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is
-enabled on a SCSI Adaptec 39320D Ultra320 controller.
-
-           CPU0       CPU1
-  0:     324639          0    IO-APIC-edge  timer
-  1:       1186          0    IO-APIC-edge  i8042
-  2:          0          0          XT-PIC  cascade
- 12:       2797          0    IO-APIC-edge  i8042
- 14:       6543          0    IO-APIC-edge  ide0
- 15:          1          0    IO-APIC-edge  ide1
-169:          0          0   IO-APIC-level  uhci-hcd
-185:          0          0   IO-APIC-level  uhci-hcd
-193:        138         10         PCI-MSI  aic79xx
-201:         30          0         PCI-MSI  aic79xx
-225:         30          0   IO-APIC-level  aic7xxx
-233:         30          0   IO-APIC-level  aic7xxx
-NMI:          0          0
-LOC:     324553     325068
-ERR:          0
-MIS:          0
-
-6. MSI quirks
-
-Several PCI chipsets or devices are known to not support MSI.
-The PCI stack provides 3 possible levels of MSI disabling:
-* on a single device
-* on all devices behind a specific bridge
-* globally
-
-6.1. Disabling MSI on a single device
-
-Under some circumstances it might be required to disable MSI on a
-single device.  This may be achieved by either not calling pci_enable_msi()
-or all, or setting the pci_dev->no_msi flag before (most of the time
-in a quirk).
-
-6.2. Disabling MSI below a bridge
-
-The vast majority of MSI quirks are required by PCI bridges not
-being able to route MSI between busses. In this case, MSI have to be
-disabled on all devices behind this bridge. It is achieves by setting
-the PCI_BUS_FLAGS_NO_MSI flag in the pci_bus->bus_flags of the bridge
-subordinate bus. There is no need to set the same flag on bridges that
-are below the broken bridge. When pci_enable_msi() is called to enable
-MSI on a device, pci_msi_supported() takes care of checking the NO_MSI
-flag in all parent busses of the device.
-
-Some bridges actually support dynamic MSI support enabling/disabling
-by changing some bits in their PCI configuration space (especially
-the Hypertransport chipsets such as the nVidia nForce and Serverworks
-HT2000). It may then be required to update the NO_MSI flag on the
-corresponding devices in the sysfs hierarchy. To enable MSI support
-on device "0000:00:0e", do:
-
-	echo 1 > /sys/bus/pci/devices/0000:00:0e/msi_bus
-
-To disable MSI support, echo 0 instead of 1. Note that it should be
-used with caution since changing this value might break interrupts.
-
-6.3. Disabling MSI globally
-
-Some extreme cases may require to disable MSI globally on the system.
-For now, the only known case is a Serverworks PCI-X chipsets (MSI are
-not supported on several busses that are not all connected to the
-chipset in the Linux PCI hierarchy). In the vast majority of other
-cases, disabling only behind a specific bridge is enough.
-
-For debugging purpose, the user may also pass pci=nomsi on the kernel
-command-line to explicitly disable MSI globally. But, once the appro-
-priate quirks are added to the kernel, this option should not be
-required anymore.
-
-6.4. Finding why MSI cannot be enabled on a device
-
-Assuming that MSI are not enabled on a device, you should look at
-dmesg to find messages that quirks may output when disabling MSI
-on some devices, some bridges or even globally.
-Then, lspci -t gives the list of bridges above a device. Reading
-/sys/bus/pci/devices/0000:00:0e/msi_bus will tell you whether MSI
-are enabled (1) or disabled (0). In 0 is found in a single bridge
-msi_bus file above the device, MSI cannot be enabled.
-
-7. FAQ
-
-Q1. Are there any limitations on using the MSI?
-
-A1. If the PCI device supports MSI and conforms to the
-specification and the platform supports the APIC local bus,
-then using MSI should work.
-
-Q2. Will it work on all the Pentium processors (P3, P4, Xeon,
-AMD processors)? In P3 IPI's are transmitted on the APIC local
-bus and in P4 and Xeon they are transmitted on the system
-bus. Are there any implications with this?
-
-A2. MSI support enables a PCI device sending an inbound
-memory write (0xfeexxxxx as target address) on its PCI bus
-directly to the FSB. Since the message address has a
-redirection hint bit cleared, it should work.
-
-Q3. The target address 0xfeexxxxx will be translated by the
-Host Bridge into an interrupt message. Are there any
-limitations on the chipsets such as Intel 8xx, Intel e7xxx,
-or VIA?
-
-A3. If these chipsets support an inbound memory write with
-target address set as 0xfeexxxxx, as conformed to PCI
-specification 2.3 or latest, then it should work.
-
-Q4. From the driver point of view, if the MSI is lost because
-of errors occurring during inbound memory write, then it may
-wait forever. Is there a mechanism for it to recover?
-
-A4. Since the target of the transaction is an inbound memory
-write, all transaction termination conditions (Retry,
-Master-Abort, Target-Abort, or normal completion) are
-supported. A device sending an MSI must abide by all the PCI
-rules and conditions regarding that inbound memory write. So,
-if a retry is signaled it must retry, etc... We believe that
-the recommendation for Abort is also a retry (refer to PCI
-specification 2.3 or latest).
+This API should be used to undo the effect of pci_enable_msix().  It frees
+the previously allocated message signaled interrupts.  The interrupts may
+subsequently be assigned to another device, so drivers should not cache
+the value of the 'vector' elements over a call to pci_disable_msix().
+
+A device driver must always call free_irq() on the interrupt(s)
+for which it has called request_irq() before calling this function.
+Failure to do so will result in a BUG_ON(), the device will be left with
+MSI enabled and will leak its vector.
+
+4.3.3 The MSI-X Table
+
+The MSI-X capability specifies a BAR and offset within that BAR for the
+MSI-X Table.  This address is mapped by the PCI subsystem, and should not
+be accessed directly by the device driver.  If the driver wishes to
+mask or unmask an interrupt, it should call disable_irq() / enable_irq().
+
+4.4 Handling devices implementing both MSI and MSI-X capabilities
+
+If a device implements both MSI and MSI-X capabilities, it can
+run in either MSI mode or MSI-X mode but not both simultaneously.
+This is a requirement of the PCI spec, and it is enforced by the
+PCI layer.  Calling pci_enable_msi() when MSI-X is already enabled or
+pci_enable_msix() when MSI is already enabled will result in an error.
+If a device driver wishes to switch between MSI and MSI-X at runtime,
+it must first quiesce the device, then switch it back to pin-interrupt
+mode, before calling pci_enable_msi() or pci_enable_msix() and resuming
+operation.  This is not expected to be a common operation but may be
+useful for debugging or testing during development.
+
+4.5 Considerations when using MSIs
+
+4.5.1 Choosing between MSI-X and MSI
+
+If your device supports both MSI-X and MSI capabilities, you should use
+the MSI-X facilities in preference to the MSI facilities.  As mentioned
+above, MSI-X supports any number of interrupts between 1 and 2048.
+In constrast, MSI is restricted to a maximum of 32 interrupts (and
+must be a power of two).  In addition, the MSI interrupt vectors must
+be allocated consecutively, so the system may not be able to allocate
+as many vectors for MSI as it could for MSI-X.  On some platforms, MSI
+interrupts must all be targetted at the same set of CPUs whereas MSI-X
+interrupts can all be targetted at different CPUs.
+
+4.5.2 Spinlocks
+
+Most device drivers have a per-device spinlock which is taken in the
+interrupt handler.  With pin-based interrupts or a single MSI, it is not
+necessary to disable interrupts (Linux guarantees the same interrupt will
+not be re-entered).  If a device uses multiple interrupts, the driver
+must disable interrupts while the lock is held.  If the device sends
+a different interrupt, the driver will deadlock trying to recursively
+acquire the spinlock.
+
+There are two solutions.  The first is to take the lock with
+spin_lock_irqsave() or spin_lock_irq() (see
+Documentation/DocBook/kernel-locking).  The second is to specify
+IRQF_DISABLED to request_irq() so that the kernel runs the entire
+interrupt routine with interrupts disabled.
+
+If your MSI interrupt routine does not hold the lock for the whole time
+it is running, the first solution may be best.  The second solution is
+normally preferred as it avoids making two transitions from interrupt
+disabled to enabled and back again.
+
+4.6 How to tell whether MSI/MSI-X is enabled on a device
+
+Using 'lspci -v' (as root) may show some devices with "MSI", "Message
+Signalled Interrupts" or "MSI-X" capabilities.  Each of these capabilities
+has an 'Enable' flag which will be followed with either "+" (enabled)
+or "-" (disabled).
+
+
+5. MSI quirks
+
+Several PCI chipsets or devices are known not to support MSIs.
+The PCI stack provides three ways to disable MSIs:
+
+1. globally
+2. on all devices behind a specific bridge
+3. on a single device
+
+5.1. Disabling MSIs globally
+
+Some host chipsets simply don't support MSIs properly.  If we're
+lucky, the manufacturer knows this and has indicated it in the ACPI
+FADT table.  In this case, Linux will automatically disable MSIs.
+Some boards don't include this information in the table and so we have
+to detect them ourselves.  The complete list of these is found near the
+quirk_disable_all_msi() function in drivers/pci/quirks.c.
+
+If you have a board which has problems with MSIs, you can pass pci=nomsi
+on the kernel command line to disable MSIs on all devices.  It would be
+in your best interests to report the problem to linux-pci@vger.kernel.org
+including a full 'lspci -v' so we can add the quirks to the kernel.
+
+5.2. Disabling MSIs below a bridge
+
+Some PCI bridges are not able to route MSIs between busses properly.
+In this case, MSIs must be disabled on all devices behind the bridge.
+
+Some bridges allow you to enable MSIs by changing some bits in their
+PCI configuration space (especially the Hypertransport chipsets such
+as the nVidia nForce and Serverworks HT2000).  As with host chipsets,
+Linux mostly knows about them and automatically enables MSIs if it can.
+If you have a bridge which Linux doesn't yet know about, you can enable
+MSIs in configuration space using whatever method you know works, then
+enable MSIs on that bridge by doing:
+
+       echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
+
+where $bridge is the PCI address of the bridge you've enabled (eg
+0000:00:0e.0).
+
+To disable MSIs, echo 0 instead of 1.  Changing this value should be
+done with caution as it can break interrupt handling for all devices
+below this bridge.
+
+Again, please notify linux-pci@vger.kernel.org of any bridges that need
+special handling.
+
+5.3. Disabling MSIs on a single device
+
+Some devices are known to have faulty MSI implementations.  Usually this
+is handled in the individual device driver but occasionally it's necessary
+to handle this with a quirk.  Some drivers have an option to disable use
+of MSI.  While this is a convenient workaround for the driver author,
+it is not good practise, and should not be emulated.
+
+5.4. Finding why MSIs are disabled on a device
+
+From the above three sections, you can see that there are many reasons
+why MSIs may not be enabled for a given device.  Your first step should
+be to examine your dmesg carefully to determine whether MSIs are enabled
+for your machine.  You should also check your .config to be sure you
+have enabled CONFIG_PCI_MSI.
+
+Then, 'lspci -t' gives the list of bridges above a device.  Reading
+/sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1)
+or disabled (0).  If 0 is found in any of the msi_bus files belonging
+to bridges between the PCI root and the device, MSIs are disabled.
+
+It is also worth checking the device driver to see whether it supports MSIs.
+For example, it may contain calls to pci_enable_msi(), pci_enable_msix() or
+pci_enable_msi_block().

+ 99 - 0
Documentation/PCI/pci-iov-howto.txt

@@ -0,0 +1,99 @@
+		PCI Express I/O Virtualization Howto
+		Copyright (C) 2009 Intel Corporation
+		    Yu Zhao <yu.zhao@intel.com>
+
+
+1. Overview
+
+1.1 What is SR-IOV
+
+Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
+capability which makes one physical device appear as multiple virtual
+devices. The physical device is referred to as Physical Function (PF)
+while the virtual devices are referred to as Virtual Functions (VF).
+Allocation of the VF can be dynamically controlled by the PF via
+registers encapsulated in the capability. By default, this feature is
+not enabled and the PF behaves as traditional PCIe device. Once it's
+turned on, each VF's PCI configuration space can be accessed by its own
+Bus, Device and Function Number (Routing ID). And each VF also has PCI
+Memory Space, which is used to map its register set. VF device driver
+operates on the register set so it can be functional and appear as a
+real existing PCI device.
+
+2. User Guide
+
+2.1 How can I enable SR-IOV capability
+
+The device driver (PF driver) will control the enabling and disabling
+of the capability via API provided by SR-IOV core. If the hardware
+has SR-IOV capability, loading its PF driver would enable it and all
+VFs associated with the PF.
+
+2.2 How can I use the Virtual Functions
+
+The VF is treated as hot-plugged PCI devices in the kernel, so they
+should be able to work in the same way as real PCI devices. The VF
+requires device driver that is same as a normal PCI device's.
+
+3. Developer Guide
+
+3.1 SR-IOV API
+
+To enable SR-IOV capability:
+	int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+	'nr_virtfn' is number of VFs to be enabled.
+
+To disable SR-IOV capability:
+	void pci_disable_sriov(struct pci_dev *dev);
+
+To notify SR-IOV core of Virtual Function Migration:
+	irqreturn_t pci_sriov_migration(struct pci_dev *dev);
+
+3.2 Usage example
+
+Following piece of code illustrates the usage of the SR-IOV API.
+
+static int __devinit dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	pci_enable_sriov(dev, NR_VIRTFN);
+
+	...
+
+	return 0;
+}
+
+static void __devexit dev_remove(struct pci_dev *dev)
+{
+	pci_disable_sriov(dev);
+
+	...
+}
+
+static int dev_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	...
+
+	return 0;
+}
+
+static int dev_resume(struct pci_dev *dev)
+{
+	...
+
+	return 0;
+}
+
+static void dev_shutdown(struct pci_dev *dev)
+{
+	...
+}
+
+static struct pci_driver dev_driver = {
+	.name =		"SR-IOV Physical Function driver",
+	.id_table =	dev_id_table,
+	.probe =	dev_probe,
+	.remove =	__devexit_p(dev_remove),
+	.suspend =	dev_suspend,
+	.resume =	dev_resume,
+	.shutdown =	dev_shutdown,
+};

+ 32 - 0
Documentation/feature-removal-schedule.txt

@@ -392,3 +392,35 @@ Why:	The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t)
 	have been kept around for migration reasons. After more than two years
 	have been kept around for migration reasons. After more than two years
 	it's time to remove them finally
 	it's time to remove them finally
 Who:	Thomas Gleixner <tglx@linutronix.de>
 Who:	Thomas Gleixner <tglx@linutronix.de>
+
+---------------------------
+
+What:	fakephp and associated sysfs files in /sys/bus/pci/slots/
+When:	2011
+Why:	In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to
+	represent a machine's physical PCI slots. The change in semantics
+	had userspace implications, as the hotplug core no longer allowed
+	drivers to create multiple sysfs files per physical slot (required
+	for multi-function devices, e.g.). fakephp was seen as a developer's
+	tool only, and its interface changed. Too late, we learned that
+	there were some users of the fakephp interface.
+
+	In 2.6.30, the original fakephp interface was restored. At the same
+	time, the PCI core gained the ability that fakephp provided, namely
+	function-level hot-remove and hot-add.
+
+	Since the PCI core now provides the same functionality, exposed in:
+
+		/sys/bus/pci/rescan
+		/sys/bus/pci/devices/.../remove
+		/sys/bus/pci/devices/.../rescan
+
+	there is no functional reason to maintain fakephp as well.
+
+	We will keep the existing module so that 'modprobe fakephp' will
+	present the old /sys/bus/pci/slots/... interface for compatibility,
+	but users are urged to migrate their applications to the API above.
+
+	After a reasonable transition period, we will remove the legacy
+	fakephp interface.
+Who:	Alex Chiang <achiang@hp.com>

+ 10 - 0
Documentation/filesystems/sysfs-pci.txt

@@ -12,6 +12,7 @@ that support it.  For example, a given bus might look like this:
      |   |-- enable
      |   |-- enable
      |   |-- irq
      |   |-- irq
      |   |-- local_cpus
      |   |-- local_cpus
+     |   |-- remove
      |   |-- resource
      |   |-- resource
      |   |-- resource0
      |   |-- resource0
      |   |-- resource1
      |   |-- resource1
@@ -36,6 +37,7 @@ files, each with their own function.
        enable	           Whether the device is enabled (ascii, rw)
        enable	           Whether the device is enabled (ascii, rw)
        irq		   IRQ number (ascii, ro)
        irq		   IRQ number (ascii, ro)
        local_cpus	   nearby CPU mask (cpumask, ro)
        local_cpus	   nearby CPU mask (cpumask, ro)
+       remove		   remove device from kernel's list (ascii, wo)
        resource		   PCI resource host addresses (ascii, ro)
        resource		   PCI resource host addresses (ascii, ro)
        resource0..N	   PCI resource N, if present (binary, mmap)
        resource0..N	   PCI resource N, if present (binary, mmap)
        resource0_wc..N_wc  PCI WC map resource N, if prefetchable (binary, mmap)
        resource0_wc..N_wc  PCI WC map resource N, if prefetchable (binary, mmap)
@@ -46,6 +48,7 @@ files, each with their own function.
 
 
   ro - read only file
   ro - read only file
   rw - file is readable and writable
   rw - file is readable and writable
+  wo - write only file
   mmap - file is mmapable
   mmap - file is mmapable
   ascii - file contains ascii text
   ascii - file contains ascii text
   binary - file contains binary data
   binary - file contains binary data
@@ -73,6 +76,13 @@ that the device must be enabled for a rom read to return data succesfully.
 In the event a driver is not bound to the device, it can be enabled using the
 In the event a driver is not bound to the device, it can be enabled using the
 'enable' file, documented above.
 'enable' file, documented above.
 
 
+The 'remove' file is used to remove the PCI device, by writing a non-zero
+integer to the file.  This does not involve any kind of hot-plug functionality,
+e.g. powering off the device.  The device is removed from the kernel's list of
+PCI devices, the sysfs directory for it is removed, and the device will be
+removed from any drivers attached to it. Removal of PCI root buses is
+disallowed.
+
 Accessing legacy resources through sysfs
 Accessing legacy resources through sysfs
 ----------------------------------------
 ----------------------------------------
 
 

+ 11 - 0
Documentation/kernel-parameters.txt

@@ -1695,6 +1695,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			See also Documentation/blockdev/paride.txt.
 			See also Documentation/blockdev/paride.txt.
 
 
 	pci=option[,option...]	[PCI] various PCI subsystem options:
 	pci=option[,option...]	[PCI] various PCI subsystem options:
+		earlydump	[X86] dump PCI config space before the kernel
+			        changes anything
 		off		[X86] don't probe for the PCI bus
 		off		[X86] don't probe for the PCI bus
 		bios		[X86-32] force use of PCI BIOS, don't access
 		bios		[X86-32] force use of PCI BIOS, don't access
 				the hardware directly. Use this if your machine
 				the hardware directly. Use this if your machine
@@ -1794,6 +1796,15 @@ and is between 256 and 4096 characters. It is defined in the file
 		cbmemsize=nn[KMG]	The fixed amount of bus space which is
 		cbmemsize=nn[KMG]	The fixed amount of bus space which is
 				reserved for the CardBus bridge's memory
 				reserved for the CardBus bridge's memory
 				window. The default value is 64 megabytes.
 				window. The default value is 64 megabytes.
+		resource_alignment=
+				Format:
+				[<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...]
+				Specifies alignment and device to reassign
+				aligned memory resources.
+				If <order of align> is not specified,
+				PAGE_SIZE is used as alignment.
+				PCI-PCI bridge can be specified, if resource
+				windows need to be expanded.
 
 
 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 			Management.
 			Management.

+ 14 - 0
arch/alpha/include/asm/pci.h

@@ -273,4 +273,18 @@ struct pci_dev *alpha_gendev_to_pci(struct device *dev);
 
 
 extern struct pci_dev *isa_bridge;
 extern struct pci_dev *isa_bridge;
 
 
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+			   size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+			    size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+				      struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state);
+extern void pci_adjust_legacy_attr(struct pci_bus *bus,
+				   enum pci_mmap_state mmap_type);
+#define HAVE_PCI_LEGACY	1
+
+extern int pci_create_resource_files(struct pci_dev *dev);
+extern void pci_remove_resource_files(struct pci_dev *dev);
+
 #endif /* __ALPHA_PCI_H */
 #endif /* __ALPHA_PCI_H */

+ 1 - 1
arch/alpha/kernel/Makefile

@@ -12,7 +12,7 @@ obj-y    := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
 
 
 obj-$(CONFIG_VGA_HOSE)	+= console.o
 obj-$(CONFIG_VGA_HOSE)	+= console.o
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_SMP)	+= smp.o
-obj-$(CONFIG_PCI)	+= pci.o pci_iommu.o
+obj-$(CONFIG_PCI)	+= pci.o pci_iommu.o pci-sysfs.o
 obj-$(CONFIG_SRM_ENV)	+= srm_env.o
 obj-$(CONFIG_SRM_ENV)	+= srm_env.o
 obj-$(CONFIG_MODULES)	+= module.o
 obj-$(CONFIG_MODULES)	+= module.o
 
 

+ 366 - 0
arch/alpha/kernel/pci-sysfs.c

@@ -0,0 +1,366 @@
+/*
+ * arch/alpha/kernel/pci-sysfs.c
+ *
+ * Copyright (C) 2009 Ivan Kokshaysky
+ *
+ * Alpha PCI resource files.
+ *
+ * Loosely based on generic HAVE_PCI_MMAP implementation in
+ * drivers/pci/pci-sysfs.c
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+static int hose_mmap_page_range(struct pci_controller *hose,
+				struct vm_area_struct *vma,
+				enum pci_mmap_state mmap_type, int sparse)
+{
+	unsigned long base;
+
+	if (mmap_type == pci_mmap_mem)
+		base = sparse ? hose->sparse_mem_base : hose->dense_mem_base;
+	else
+		base = sparse ? hose->sparse_io_base : hose->dense_io_base;
+
+	vma->vm_pgoff += base >> PAGE_SHIFT;
+	vma->vm_flags |= (VM_IO | VM_RESERVED);
+
+	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+}
+
+static int __pci_mmap_fits(struct pci_dev *pdev, int num,
+			   struct vm_area_struct *vma, int sparse)
+{
+	unsigned long nr, start, size;
+	int shift = sparse ? 5 : 0;
+
+	nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	start = vma->vm_pgoff;
+	size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1;
+
+	if (start < size && size - start >= nr)
+		return 1;
+	WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on %s BAR %d "
+		"(size 0x%08lx)\n",
+		current->comm, sparse ? " sparse" : "", start, start + nr,
+		pci_name(pdev), num, size);
+	return 0;
+}
+
+/**
+ * pci_mmap_resource - map a PCI resource into user memory space
+ * @kobj: kobject for mapping
+ * @attr: struct bin_attribute for the file being mapped
+ * @vma: struct vm_area_struct passed into the mmap
+ * @sparse: address space type
+ *
+ * Use the bus mapping routines to map a PCI resource into userspace.
+ */
+static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
+			     struct vm_area_struct *vma, int sparse)
+{
+	struct pci_dev *pdev = to_pci_dev(container_of(kobj,
+						       struct device, kobj));
+	struct resource *res = (struct resource *)attr->private;
+	enum pci_mmap_state mmap_type;
+	struct pci_bus_region bar;
+	int i;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++)
+		if (res == &pdev->resource[i])
+			break;
+	if (i >= PCI_ROM_RESOURCE)
+		return -ENODEV;
+
+	if (!__pci_mmap_fits(pdev, i, vma, sparse))
+		return -EINVAL;
+
+	if (iomem_is_exclusive(res->start))
+		return -EINVAL;
+
+	pcibios_resource_to_bus(pdev, &bar, res);
+	vma->vm_pgoff += bar.start >> (PAGE_SHIFT - (sparse ? 5 : 0));
+	mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
+
+	return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse);
+}
+
+static int pci_mmap_resource_sparse(struct kobject *kobj,
+				    struct bin_attribute *attr,
+				    struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 1);
+}
+
+static int pci_mmap_resource_dense(struct kobject *kobj,
+				   struct bin_attribute *attr,
+				   struct vm_area_struct *vma)
+{
+	return pci_mmap_resource(kobj, attr, vma, 0);
+}
+
+/**
+ * pci_remove_resource_files - cleanup resource files
+ * @dev: dev to cleanup
+ *
+ * If we created resource files for @dev, remove them from sysfs and
+ * free their resources.
+ */
+void pci_remove_resource_files(struct pci_dev *pdev)
+{
+	int i;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+		struct bin_attribute *res_attr;
+
+		res_attr = pdev->res_attr[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+
+		res_attr = pdev->res_attr_wc[i];
+		if (res_attr) {
+			sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+			kfree(res_attr);
+		}
+	}
+}
+
+static int sparse_mem_mmap_fits(struct pci_dev *pdev, int num)
+{
+	struct pci_bus_region bar;
+	struct pci_controller *hose = pdev->sysdata;
+	long dense_offset;
+	unsigned long sparse_size;
+
+	pcibios_resource_to_bus(pdev, &bar, &pdev->resource[num]);
+
+	/* All core logic chips have 4G sparse address space, except
+	   CIA which has 16G (see xxx_SPARSE_MEM and xxx_DENSE_MEM
+	   definitions in asm/core_xxx.h files). This corresponds
+	   to 128M or 512M of the bus space. */
+	dense_offset = (long)(hose->dense_mem_base - hose->sparse_mem_base);
+	sparse_size = dense_offset >= 0x400000000UL ? 0x20000000 : 0x8000000;
+
+	return bar.end < sparse_size;
+}
+
+static int pci_create_one_attr(struct pci_dev *pdev, int num, char *name,
+			       char *suffix, struct bin_attribute *res_attr,
+			       unsigned long sparse)
+{
+	size_t size = pci_resource_len(pdev, num);
+
+	sprintf(name, "resource%d%s", num, suffix);
+	res_attr->mmap = sparse ? pci_mmap_resource_sparse :
+				  pci_mmap_resource_dense;
+	res_attr->attr.name = name;
+	res_attr->attr.mode = S_IRUSR | S_IWUSR;
+	res_attr->size = sparse ? size << 5 : size;
+	res_attr->private = &pdev->resource[num];
+	return sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
+}
+
+static int pci_create_attr(struct pci_dev *pdev, int num)
+{
+	/* allocate attribute structure, piggyback attribute name */
+	int retval, nlen1, nlen2 = 0, res_count = 1;
+	unsigned long sparse_base, dense_base;
+	struct bin_attribute *attr;
+	struct pci_controller *hose = pdev->sysdata;
+	char *suffix, *attr_name;
+
+	suffix = "";	/* Assume bwx machine, normal resourceN files. */
+	nlen1 = 10;
+
+	if (pdev->resource[num].flags & IORESOURCE_MEM) {
+		sparse_base = hose->sparse_mem_base;
+		dense_base = hose->dense_mem_base;
+		if (sparse_base && !sparse_mem_mmap_fits(pdev, num)) {
+			sparse_base = 0;
+			suffix = "_dense";
+			nlen1 = 16;	/* resourceN_dense */
+		}
+	} else {
+		sparse_base = hose->sparse_io_base;
+		dense_base = hose->dense_io_base;
+	}
+
+	if (sparse_base) {
+		suffix = "_sparse";
+		nlen1 = 17;
+		if (dense_base) {
+			nlen2 = 16;	/* resourceN_dense */
+			res_count = 2;
+		}
+	}
+
+	attr = kzalloc(sizeof(*attr) * res_count + nlen1 + nlen2, GFP_ATOMIC);
+	if (!attr)
+		return -ENOMEM;
+
+	/* Create bwx, sparse or single dense file */
+	attr_name = (char *)(attr + res_count);
+	pdev->res_attr[num] = attr;
+	retval = pci_create_one_attr(pdev, num, attr_name, suffix, attr,
+				     sparse_base);
+	if (retval || res_count == 1)
+		return retval;
+
+	/* Create dense file */
+	attr_name += nlen1;
+	attr++;
+	pdev->res_attr_wc[num] = attr;
+	return pci_create_one_attr(pdev, num, attr_name, "_dense", attr, 0);
+}
+
+/**
+ * pci_create_resource_files - create resource files in sysfs for @dev
+ * @dev: dev in question
+ *
+ * Walk the resources in @dev creating files for each resource available.
+ */
+int pci_create_resource_files(struct pci_dev *pdev)
+{
+	int i;
+	int retval;
+
+	/* Expose the PCI resources from this device as files */
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+
+		/* skip empty resources */
+		if (!pci_resource_len(pdev, i))
+			continue;
+
+		retval = pci_create_attr(pdev, i);
+		if (retval) {
+			pci_remove_resource_files(pdev);
+			return retval;
+		}
+	}
+	return 0;
+}
+
+/* Legacy I/O bus mapping stuff. */
+
+static int __legacy_mmap_fits(struct pci_controller *hose,
+			      struct vm_area_struct *vma,
+			      unsigned long res_size, int sparse)
+{
+	unsigned long nr, start, size;
+
+	nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	start = vma->vm_pgoff;
+	size = ((res_size - 1) >> PAGE_SHIFT) + 1;
+
+	if (start < size && size - start >= nr)
+		return 1;
+	WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on hose %d "
+		"(size 0x%08lx)\n",
+		current->comm, sparse ? " sparse" : "", start, start + nr,
+		hose->index, size);
+	return 0;
+}
+
+static inline int has_sparse(struct pci_controller *hose,
+			     enum pci_mmap_state mmap_type)
+{
+	unsigned long base;
+
+	base = (mmap_type == pci_mmap_mem) ? hose->sparse_mem_base :
+					     hose->sparse_io_base;
+
+	return base != 0;
+}
+
+int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+			       enum pci_mmap_state mmap_type)
+{
+	struct pci_controller *hose = bus->sysdata;
+	int sparse = has_sparse(hose, mmap_type);
+	unsigned long res_size;
+
+	res_size = (mmap_type == pci_mmap_mem) ? bus->legacy_mem->size :
+						 bus->legacy_io->size;
+	if (!__legacy_mmap_fits(hose, vma, res_size, sparse))
+		return -EINVAL;
+
+	return hose_mmap_page_range(hose, vma, mmap_type, sparse);
+}
+
+/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @b: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Adjust file name and size for sparse mappings.
+ */
+void pci_adjust_legacy_attr(struct pci_bus *bus, enum pci_mmap_state mmap_type)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	if (!has_sparse(hose, mmap_type))
+		return;
+
+	if (mmap_type == pci_mmap_mem) {
+		bus->legacy_mem->attr.name = "legacy_mem_sparse";
+		bus->legacy_mem->size <<= 5;
+	} else {
+		bus->legacy_io->attr.name = "legacy_io_sparse";
+		bus->legacy_io->size <<= 5;
+	}
+	return;
+}
+
+/* Legacy I/O bus read/write functions */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	port += hose->io_space->start;
+
+	switch(size) {
+	case 1:
+		*((u8 *)val) = inb(port);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		*((u16 *)val) = inw(port);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		*((u32 *)val) = inl(port);
+		return 4;
+	}
+	return -EINVAL;
+}
+
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	port += hose->io_space->start;
+
+	switch(size) {
+	case 1:
+		outb(port, val);
+		return 1;
+	case 2:
+		if (port & 1)
+			return -EINVAL;
+		outw(port, val);
+		return 2;
+	case 4:
+		if (port & 3)
+			return -EINVAL;
+		outl(port, val);
+		return 4;
+	}
+	return -EINVAL;
+}

+ 4 - 0
arch/powerpc/include/asm/pci.h

@@ -114,6 +114,10 @@ extern int pci_domain_nr(struct pci_bus *bus);
 /* Decide whether to display the domain number in /proc */
 /* Decide whether to display the domain number in /proc */
 extern int pci_proc_domain(struct pci_bus *bus);
 extern int pci_proc_domain(struct pci_bus *bus);
 
 
+/* MSI arch hooks */
+#define arch_setup_msi_irqs arch_setup_msi_irqs
+#define arch_teardown_msi_irqs arch_teardown_msi_irqs
+#define arch_msi_check_device arch_msi_check_device
 
 
 struct vm_area_struct;
 struct vm_area_struct;
 /* Map a range of PCI memory or I/O space for a device into user space */
 /* Map a range of PCI memory or I/O space for a device into user space */

+ 5 - 0
arch/powerpc/kernel/msi.c

@@ -9,6 +9,7 @@
 
 
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/msi.h>
 #include <linux/msi.h>
+#include <linux/pci.h>
 
 
 #include <asm/machdep.h>
 #include <asm/machdep.h>
 
 
@@ -19,6 +20,10 @@ int arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
 		return -ENOSYS;
 		return -ENOSYS;
 	}
 	}
 
 
+	/* PowerPC doesn't support multiple MSI yet */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
 	if (ppc_md.msi_check_device) {
 	if (ppc_md.msi_check_device) {
 		pr_debug("msi: Using platform check routine.\n");
 		pr_debug("msi: Using platform check routine.\n");
 		return ppc_md.msi_check_device(dev, nvec, type);
 		return ppc_md.msi_check_device(dev, nvec, type);

+ 3 - 0
arch/x86/include/asm/pci.h

@@ -86,6 +86,9 @@ static inline void early_quirks(void) { }
 
 
 extern void pci_iommu_alloc(void);
 extern void pci_iommu_alloc(void);
 
 
+/* MSI arch hook */
+#define arch_setup_msi_irqs arch_setup_msi_irqs
+
 #endif  /* __KERNEL__ */
 #endif  /* __KERNEL__ */
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32

+ 4 - 0
arch/x86/kernel/apic/io_apic.c

@@ -3468,6 +3468,10 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	struct intel_iommu *iommu = NULL;
 	struct intel_iommu *iommu = NULL;
 	int index = 0;
 	int index = 0;
 
 
+	/* x86 doesn't support multiple MSI yet */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
 	irq_want = nr_irqs_gsi;
 	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
 	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 	list_for_each_entry(msidesc, &dev->msi_list, list) {

+ 1 - 2
arch/x86/kernel/pci-dma.c

@@ -300,8 +300,7 @@ fs_initcall(pci_iommu_init);
 static __devinit void via_no_dac(struct pci_dev *dev)
 static __devinit void via_no_dac(struct pci_dev *dev)
 {
 {
 	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
 	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
-		printk(KERN_INFO
-			"PCI: VIA PCI bridge detected. Disabling DAC.\n");
+		dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
 		forbid_dac = 1;
 		forbid_dac = 1;
 	}
 	}
 }
 }

+ 11 - 8
arch/x86/pci/early.c

@@ -69,11 +69,12 @@ void early_dump_pci_device(u8 bus, u8 slot, u8 func)
 	int j;
 	int j;
 	u32 val;
 	u32 val;
 
 
-	printk(KERN_INFO "PCI: %02x:%02x:%02x", bus, slot, func);
+	printk(KERN_INFO "pci 0000:%02x:%02x.%d config space:",
+	       bus, slot, func);
 
 
 	for (i = 0; i < 256; i += 4) {
 	for (i = 0; i < 256; i += 4) {
 		if (!(i & 0x0f))
 		if (!(i & 0x0f))
-			printk("\n%04x:",i);
+			printk("\n  %02x:",i);
 
 
 		val = read_pci_config(bus, slot, func, i);
 		val = read_pci_config(bus, slot, func, i);
 		for (j = 0; j < 4; j++) {
 		for (j = 0; j < 4; j++) {
@@ -96,20 +97,22 @@ void early_dump_pci_devices(void)
 			for (func = 0; func < 8; func++) {
 			for (func = 0; func < 8; func++) {
 				u32 class;
 				u32 class;
 				u8 type;
 				u8 type;
+
 				class = read_pci_config(bus, slot, func,
 				class = read_pci_config(bus, slot, func,
 							PCI_CLASS_REVISION);
 							PCI_CLASS_REVISION);
 				if (class == 0xffffffff)
 				if (class == 0xffffffff)
-					break;
+					continue;
 
 
 				early_dump_pci_device(bus, slot, func);
 				early_dump_pci_device(bus, slot, func);
 
 
-				/* No multi-function device? */
-				type = read_pci_config_byte(bus, slot, func,
+				if (func == 0) {
+					type = read_pci_config_byte(bus, slot,
+								    func,
 							       PCI_HEADER_TYPE);
 							       PCI_HEADER_TYPE);
-				if (!(type & 0x80))
-					break;
+					if (!(type & 0x80))
+						break;
+				}
 			}
 			}
 		}
 		}
 	}
 	}
 }
 }
-

+ 0 - 20
arch/x86/pci/fixup.c

@@ -494,26 +494,6 @@ static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
 			  pci_siemens_interrupt_controller);
 			  pci_siemens_interrupt_controller);
 
 
-/*
- * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
- * 4096 bytes configuration space for each function of their processor
- * configuration space.
- */
-static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
-{
-	dev->cfg_size = pci_cfg_space_size_ext(dev);
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
-
 /*
 /*
  * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
  * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
  * confusing the PCI engine:
  * confusing the PCI engine:

+ 1 - 2
arch/x86/pci/legacy.c

@@ -50,8 +50,6 @@ static int __init pci_legacy_init(void)
 	if (pci_root_bus)
 	if (pci_root_bus)
 		pci_bus_add_devices(pci_root_bus);
 		pci_bus_add_devices(pci_root_bus);
 
 
-	pcibios_fixup_peer_bridges();
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -67,6 +65,7 @@ int __init pci_subsys_init(void)
 	pci_visws_init();
 	pci_visws_init();
 #endif
 #endif
 	pci_legacy_init();
 	pci_legacy_init();
+	pcibios_fixup_peer_bridges();
 	pcibios_irq_init();
 	pcibios_irq_init();
 	pcibios_init();
 	pcibios_init();
 
 

+ 173 - 54
arch/x86/pci/mmconfig-shared.c

@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <linux/bitmap.h>
+#include <linux/sort.h>
 #include <asm/e820.h>
 #include <asm/e820.h>
 #include <asm/pci_x86.h>
 #include <asm/pci_x86.h>
 
 
@@ -24,24 +25,49 @@
 /* Indicate if the mmcfg resources have been placed into the resource table. */
 /* Indicate if the mmcfg resources have been placed into the resource table. */
 static int __initdata pci_mmcfg_resources_inserted;
 static int __initdata pci_mmcfg_resources_inserted;
 
 
+static __init int extend_mmcfg(int num)
+{
+	struct acpi_mcfg_allocation *new;
+	int new_num = pci_mmcfg_config_num + num;
+
+	new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL);
+	if (!new)
+		return -1;
+
+	if (pci_mmcfg_config) {
+		memcpy(new, pci_mmcfg_config,
+			 sizeof(pci_mmcfg_config[0]) * new_num);
+		kfree(pci_mmcfg_config);
+	}
+	pci_mmcfg_config = new;
+
+	return 0;
+}
+
+static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end)
+{
+	int i = pci_mmcfg_config_num;
+
+	pci_mmcfg_config_num++;
+	pci_mmcfg_config[i].address = addr;
+	pci_mmcfg_config[i].pci_segment = segment;
+	pci_mmcfg_config[i].start_bus_number = start;
+	pci_mmcfg_config[i].end_bus_number = end;
+}
+
 static const char __init *pci_mmcfg_e7520(void)
 static const char __init *pci_mmcfg_e7520(void)
 {
 {
 	u32 win;
 	u32 win;
 	raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win);
 	raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win);
 
 
 	win = win & 0xf000;
 	win = win & 0xf000;
-	if(win == 0x0000 || win == 0xf000)
-		pci_mmcfg_config_num = 0;
-	else {
-		pci_mmcfg_config_num = 1;
-		pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
-		if (!pci_mmcfg_config)
-			return NULL;
-		pci_mmcfg_config[0].address = win << 16;
-		pci_mmcfg_config[0].pci_segment = 0;
-		pci_mmcfg_config[0].start_bus_number = 0;
-		pci_mmcfg_config[0].end_bus_number = 255;
-	}
+	if (win == 0x0000 || win == 0xf000)
+		return NULL;
+
+	if (extend_mmcfg(1) == -1)
+		return NULL;
+
+	fill_one_mmcfg(win << 16, 0, 0, 255);
 
 
 	return "Intel Corporation E7520 Memory Controller Hub";
 	return "Intel Corporation E7520 Memory Controller Hub";
 }
 }
@@ -50,13 +76,11 @@ static const char __init *pci_mmcfg_intel_945(void)
 {
 {
 	u32 pciexbar, mask = 0, len = 0;
 	u32 pciexbar, mask = 0, len = 0;
 
 
-	pci_mmcfg_config_num = 1;
-
 	raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar);
 	raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar);
 
 
 	/* Enable bit */
 	/* Enable bit */
 	if (!(pciexbar & 1))
 	if (!(pciexbar & 1))
-		pci_mmcfg_config_num = 0;
+		return NULL;
 
 
 	/* Size bits */
 	/* Size bits */
 	switch ((pciexbar >> 1) & 3) {
 	switch ((pciexbar >> 1) & 3) {
@@ -73,28 +97,23 @@ static const char __init *pci_mmcfg_intel_945(void)
 		len  = 0x04000000U;
 		len  = 0x04000000U;
 		break;
 		break;
 	default:
 	default:
-		pci_mmcfg_config_num = 0;
+		return NULL;
 	}
 	}
 
 
 	/* Errata #2, things break when not aligned on a 256Mb boundary */
 	/* Errata #2, things break when not aligned on a 256Mb boundary */
 	/* Can only happen in 64M/128M mode */
 	/* Can only happen in 64M/128M mode */
 
 
 	if ((pciexbar & mask) & 0x0fffffffU)
 	if ((pciexbar & mask) & 0x0fffffffU)
-		pci_mmcfg_config_num = 0;
+		return NULL;
 
 
 	/* Don't hit the APIC registers and their friends */
 	/* Don't hit the APIC registers and their friends */
 	if ((pciexbar & mask) >= 0xf0000000U)
 	if ((pciexbar & mask) >= 0xf0000000U)
-		pci_mmcfg_config_num = 0;
-
-	if (pci_mmcfg_config_num) {
-		pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
-		if (!pci_mmcfg_config)
-			return NULL;
-		pci_mmcfg_config[0].address = pciexbar & mask;
-		pci_mmcfg_config[0].pci_segment = 0;
-		pci_mmcfg_config[0].start_bus_number = 0;
-		pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1;
-	}
+		return NULL;
+
+	if (extend_mmcfg(1) == -1)
+		return NULL;
+
+	fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1);
 
 
 	return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
 	return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
 }
 }
@@ -138,22 +157,77 @@ static const char __init *pci_mmcfg_amd_fam10h(void)
 		busnbits = 8;
 		busnbits = 8;
 	}
 	}
 
 
-	pci_mmcfg_config_num = (1 << segnbits);
-	pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) *
-				   pci_mmcfg_config_num, GFP_KERNEL);
-	if (!pci_mmcfg_config)
+	if (extend_mmcfg(1 << segnbits) == -1)
 		return NULL;
 		return NULL;
 
 
-	for (i = 0; i < (1 << segnbits); i++) {
-		pci_mmcfg_config[i].address = base + (1<<28) * i;
-		pci_mmcfg_config[i].pci_segment = i;
-		pci_mmcfg_config[i].start_bus_number = 0;
-		pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1;
-	}
+	for (i = 0; i < (1 << segnbits); i++)
+		fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1);
 
 
 	return "AMD Family 10h NB";
 	return "AMD Family 10h NB";
 }
 }
 
 
+static bool __initdata mcp55_checked;
+static const char __init *pci_mmcfg_nvidia_mcp55(void)
+{
+	int bus;
+	int mcp55_mmconf_found = 0;
+
+	static const u32 extcfg_regnum		= 0x90;
+	static const u32 extcfg_regsize		= 4;
+	static const u32 extcfg_enable_mask	= 1<<31;
+	static const u32 extcfg_start_mask	= 0xff<<16;
+	static const int extcfg_start_shift	= 16;
+	static const u32 extcfg_size_mask	= 0x3<<28;
+	static const int extcfg_size_shift	= 28;
+	static const int extcfg_sizebus[]	= {0x100, 0x80, 0x40, 0x20};
+	static const u32 extcfg_base_mask[]	= {0x7ff8, 0x7ffc, 0x7ffe, 0x7fff};
+	static const int extcfg_base_lshift	= 25;
+
+	/*
+	 * do check if amd fam10h already took over
+	 */
+	if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked)
+		return NULL;
+
+	mcp55_checked = true;
+	for (bus = 0; bus < 256; bus++) {
+		u64 base;
+		u32 l, extcfg;
+		u16 vendor, device;
+		int start, size_index, end;
+
+		raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), 0, 4, &l);
+		vendor = l & 0xffff;
+		device = (l >> 16) & 0xffff;
+
+		if (PCI_VENDOR_ID_NVIDIA != vendor || 0x0369 != device)
+			continue;
+
+		raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), extcfg_regnum,
+				  extcfg_regsize, &extcfg);
+
+		if (!(extcfg & extcfg_enable_mask))
+			continue;
+
+		if (extend_mmcfg(1) == -1)
+			continue;
+
+		size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift;
+		base = extcfg & extcfg_base_mask[size_index];
+		/* base could > 4G */
+		base <<= extcfg_base_lshift;
+		start = (extcfg & extcfg_start_mask) >> extcfg_start_shift;
+		end = start + extcfg_sizebus[size_index] - 1;
+		fill_one_mmcfg(base, 0, start, end);
+		mcp55_mmconf_found++;
+	}
+
+	if (!mcp55_mmconf_found)
+		return NULL;
+
+	return "nVidia MCP55";
+}
+
 struct pci_mmcfg_hostbridge_probe {
 struct pci_mmcfg_hostbridge_probe {
 	u32 bus;
 	u32 bus;
 	u32 devfn;
 	u32 devfn;
@@ -171,8 +245,52 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = {
 	  0x1200, pci_mmcfg_amd_fam10h },
 	  0x1200, pci_mmcfg_amd_fam10h },
 	{ 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD,
 	{ 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD,
 	  0x1200, pci_mmcfg_amd_fam10h },
 	  0x1200, pci_mmcfg_amd_fam10h },
+	{ 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_NVIDIA,
+	  0x0369, pci_mmcfg_nvidia_mcp55 },
 };
 };
 
 
+static int __init cmp_mmcfg(const void *x1, const void *x2)
+{
+	const typeof(pci_mmcfg_config[0]) *m1 = x1;
+	const typeof(pci_mmcfg_config[0]) *m2 = x2;
+	int start1, start2;
+
+	start1 = m1->start_bus_number;
+	start2 = m2->start_bus_number;
+
+	return start1 - start2;
+}
+
+static void __init pci_mmcfg_check_end_bus_number(void)
+{
+	int i;
+	typeof(pci_mmcfg_config[0]) *cfg, *cfgx;
+
+	/* sort them at first */
+	sort(pci_mmcfg_config, pci_mmcfg_config_num,
+		 sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL);
+
+	/* last one*/
+	if (pci_mmcfg_config_num > 0) {
+		i = pci_mmcfg_config_num - 1;
+		cfg = &pci_mmcfg_config[i];
+		if (cfg->end_bus_number < cfg->start_bus_number)
+			cfg->end_bus_number = 255;
+	}
+
+	/* don't overlap please */
+	for (i = 0; i < pci_mmcfg_config_num - 1; i++) {
+		cfg = &pci_mmcfg_config[i];
+		cfgx = &pci_mmcfg_config[i+1];
+
+		if (cfg->end_bus_number < cfg->start_bus_number)
+			cfg->end_bus_number = 255;
+
+		if (cfg->end_bus_number >= cfgx->start_bus_number)
+			cfg->end_bus_number = cfgx->start_bus_number - 1;
+	}
+}
+
 static int __init pci_mmcfg_check_hostbridge(void)
 static int __init pci_mmcfg_check_hostbridge(void)
 {
 {
 	u32 l;
 	u32 l;
@@ -186,31 +304,33 @@ static int __init pci_mmcfg_check_hostbridge(void)
 
 
 	pci_mmcfg_config_num = 0;
 	pci_mmcfg_config_num = 0;
 	pci_mmcfg_config = NULL;
 	pci_mmcfg_config = NULL;
-	name = NULL;
 
 
-	for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
+	for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
 		bus =  pci_mmcfg_probes[i].bus;
 		bus =  pci_mmcfg_probes[i].bus;
 		devfn = pci_mmcfg_probes[i].devfn;
 		devfn = pci_mmcfg_probes[i].devfn;
 		raw_pci_ops->read(0, bus, devfn, 0, 4, &l);
 		raw_pci_ops->read(0, bus, devfn, 0, 4, &l);
 		vendor = l & 0xffff;
 		vendor = l & 0xffff;
 		device = (l >> 16) & 0xffff;
 		device = (l >> 16) & 0xffff;
 
 
+		name = NULL;
 		if (pci_mmcfg_probes[i].vendor == vendor &&
 		if (pci_mmcfg_probes[i].vendor == vendor &&
 		    pci_mmcfg_probes[i].device == device)
 		    pci_mmcfg_probes[i].device == device)
 			name = pci_mmcfg_probes[i].probe();
 			name = pci_mmcfg_probes[i].probe();
-	}
 
 
-	if (name) {
-		printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n",
-		       name, pci_mmcfg_config_num ? "with" : "without");
+		if (name)
+			printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n",
+			       name);
 	}
 	}
 
 
-	return name != NULL;
+	/* some end_bus_number is crazy, fix it */
+	pci_mmcfg_check_end_bus_number();
+
+	return pci_mmcfg_config_num != 0;
 }
 }
 
 
 static void __init pci_mmcfg_insert_resources(void)
 static void __init pci_mmcfg_insert_resources(void)
 {
 {
-#define PCI_MMCFG_RESOURCE_NAME_LEN 19
+#define PCI_MMCFG_RESOURCE_NAME_LEN 24
 	int i;
 	int i;
 	struct resource *res;
 	struct resource *res;
 	char *names;
 	char *names;
@@ -228,9 +348,10 @@ static void __init pci_mmcfg_insert_resources(void)
 		struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i];
 		struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i];
 		num_buses = cfg->end_bus_number - cfg->start_bus_number + 1;
 		num_buses = cfg->end_bus_number - cfg->start_bus_number + 1;
 		res->name = names;
 		res->name = names;
-		snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
-			 cfg->pci_segment);
-		res->start = cfg->address;
+		snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN,
+			 "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment,
+			 cfg->start_bus_number, cfg->end_bus_number);
+		res->start = cfg->address + (cfg->start_bus_number << 20);
 		res->end = res->start + (num_buses << 20) - 1;
 		res->end = res->start + (num_buses << 20) - 1;
 		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 		insert_resource(&iomem_resource, res);
 		insert_resource(&iomem_resource, res);
@@ -354,8 +475,6 @@ static void __init pci_mmcfg_reject_broken(int early)
 	    (pci_mmcfg_config[0].address == 0))
 	    (pci_mmcfg_config[0].address == 0))
 		return;
 		return;
 
 
-	cfg = &pci_mmcfg_config[0];
-
 	for (i = 0; i < pci_mmcfg_config_num; i++) {
 	for (i = 0; i < pci_mmcfg_config_num; i++) {
 		int valid = 0;
 		int valid = 0;
 		u64 addr, size;
 		u64 addr, size;
@@ -423,10 +542,10 @@ static void __init __pci_mmcfg_init(int early)
 			known_bridge = 1;
 			known_bridge = 1;
 	}
 	}
 
 
-	if (!known_bridge) {
+	if (!known_bridge)
 		acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
 		acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
-		pci_mmcfg_reject_broken(early);
-	}
+
+	pci_mmcfg_reject_broken(early);
 
 
 	if ((pci_mmcfg_config_num == 0) ||
 	if ((pci_mmcfg_config_num == 0) ||
 	    (pci_mmcfg_config == NULL) ||
 	    (pci_mmcfg_config == NULL) ||

+ 11 - 6
arch/x86/pci/mmconfig_64.c

@@ -112,13 +112,18 @@ static struct pci_raw_ops pci_mmcfg = {
 static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
 static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
 {
 {
 	void __iomem *addr;
 	void __iomem *addr;
-	u32 size;
-
-	size = (cfg->end_bus_number + 1) << 20;
-	addr = ioremap_nocache(cfg->address, size);
+	u64 start, size;
+
+	start = cfg->start_bus_number;
+	start <<= 20;
+	start += cfg->address;
+	size = cfg->end_bus_number + 1 - cfg->start_bus_number;
+	size <<= 20;
+	addr = ioremap_nocache(start, size);
 	if (addr) {
 	if (addr) {
 		printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n",
 		printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n",
-		       cfg->address, cfg->address + size - 1);
+		       start, start + size - 1);
+		addr -= cfg->start_bus_number << 20;
 	}
 	}
 	return addr;
 	return addr;
 }
 }
@@ -157,7 +162,7 @@ void __init pci_mmcfg_arch_free(void)
 
 
 	for (i = 0; i < pci_mmcfg_config_num; ++i) {
 	for (i = 0; i < pci_mmcfg_config_num; ++i) {
 		if (pci_mmcfg_virt[i].virt) {
 		if (pci_mmcfg_virt[i].virt) {
-			iounmap(pci_mmcfg_virt[i].virt);
+			iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20));
 			pci_mmcfg_virt[i].virt = NULL;
 			pci_mmcfg_virt[i].virt = NULL;
 			pci_mmcfg_virt[i].cfg = NULL;
 			pci_mmcfg_virt[i].cfg = NULL;
 		}
 		}

+ 178 - 2
drivers/acpi/pci_root.c

@@ -66,11 +66,18 @@ struct acpi_pci_root {
 	struct acpi_device * device;
 	struct acpi_device * device;
 	struct acpi_pci_id id;
 	struct acpi_pci_id id;
 	struct pci_bus *bus;
 	struct pci_bus *bus;
+
+	u32 osc_support_set;	/* _OSC state of support bits */
+	u32 osc_control_set;	/* _OSC state of control bits */
+	u32 osc_control_qry;	/* the latest _OSC query result */
+
+	u32 osc_queried:1;	/* has _OSC control been queried? */
 };
 };
 
 
 static LIST_HEAD(acpi_pci_roots);
 static LIST_HEAD(acpi_pci_roots);
 
 
 static struct acpi_pci_driver *sub_driver;
 static struct acpi_pci_driver *sub_driver;
+static DEFINE_MUTEX(osc_lock);
 
 
 int acpi_pci_register_driver(struct acpi_pci_driver *driver)
 int acpi_pci_register_driver(struct acpi_pci_driver *driver)
 {
 {
@@ -185,6 +192,175 @@ static void acpi_pci_bridge_scan(struct acpi_device *device)
 		}
 		}
 }
 }
 
 
+static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
+			  0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
+
+static acpi_status acpi_pci_run_osc(acpi_handle handle,
+				    const u32 *capbuf, u32 *retval)
+{
+	acpi_status status;
+	struct acpi_object_list input;
+	union acpi_object in_params[4];
+	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *out_obj;
+	u32 errors;
+
+	/* Setting up input parameters */
+	input.count = 4;
+	input.pointer = in_params;
+	in_params[0].type 		= ACPI_TYPE_BUFFER;
+	in_params[0].buffer.length 	= 16;
+	in_params[0].buffer.pointer	= OSC_UUID;
+	in_params[1].type 		= ACPI_TYPE_INTEGER;
+	in_params[1].integer.value 	= 1;
+	in_params[2].type 		= ACPI_TYPE_INTEGER;
+	in_params[2].integer.value	= 3;
+	in_params[3].type		= ACPI_TYPE_BUFFER;
+	in_params[3].buffer.length 	= 12;
+	in_params[3].buffer.pointer 	= (u8 *)capbuf;
+
+	status = acpi_evaluate_object(handle, "_OSC", &input, &output);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	if (!output.length)
+		return AE_NULL_OBJECT;
+
+	out_obj = output.pointer;
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
+		printk(KERN_DEBUG "_OSC evaluation returned wrong type\n");
+		status = AE_TYPE;
+		goto out_kfree;
+	}
+	/* Need to ignore the bit0 in result code */
+	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+	if (errors) {
+		if (errors & OSC_REQUEST_ERROR)
+			printk(KERN_DEBUG "_OSC request failed\n");
+		if (errors & OSC_INVALID_UUID_ERROR)
+			printk(KERN_DEBUG "_OSC invalid UUID\n");
+		if (errors & OSC_INVALID_REVISION_ERROR)
+			printk(KERN_DEBUG "_OSC invalid revision\n");
+		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
+			if (capbuf[OSC_QUERY_TYPE] & OSC_QUERY_ENABLE)
+				goto out_success;
+			printk(KERN_DEBUG
+			       "Firmware did not grant requested _OSC control\n");
+			status = AE_SUPPORT;
+			goto out_kfree;
+		}
+		status = AE_ERROR;
+		goto out_kfree;
+	}
+out_success:
+	*retval = *((u32 *)(out_obj->buffer.pointer + 8));
+	status = AE_OK;
+
+out_kfree:
+	kfree(output.pointer);
+	return status;
+}
+
+static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 flags)
+{
+	acpi_status status;
+	u32 support_set, result, capbuf[3];
+
+	/* do _OSC query for all possible controls */
+	support_set = root->osc_support_set | (flags & OSC_SUPPORT_MASKS);
+	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
+	capbuf[OSC_SUPPORT_TYPE] = support_set;
+	capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
+
+	status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
+	if (ACPI_SUCCESS(status)) {
+		root->osc_support_set = support_set;
+		root->osc_control_qry = result;
+		root->osc_queried = 1;
+	}
+	return status;
+}
+
+static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags)
+{
+	acpi_status status;
+	acpi_handle tmp;
+
+	status = acpi_get_handle(root->device->handle, "_OSC", &tmp);
+	if (ACPI_FAILURE(status))
+		return status;
+	mutex_lock(&osc_lock);
+	status = acpi_pci_query_osc(root, flags);
+	mutex_unlock(&osc_lock);
+	return status;
+}
+
+static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
+{
+	struct acpi_pci_root *root;
+	list_for_each_entry(root, &acpi_pci_roots, node) {
+		if (root->device->handle == handle)
+			return root;
+	}
+	return NULL;
+}
+
+/**
+ * acpi_pci_osc_control_set - commit requested control to Firmware
+ * @handle: acpi_handle for the target ACPI object
+ * @flags: driver's requested control bits
+ *
+ * Attempt to take control from Firmware on requested control bits.
+ **/
+acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
+{
+	acpi_status status;
+	u32 control_req, result, capbuf[3];
+	acpi_handle tmp;
+	struct acpi_pci_root *root;
+
+	status = acpi_get_handle(handle, "_OSC", &tmp);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	control_req = (flags & OSC_CONTROL_MASKS);
+	if (!control_req)
+		return AE_TYPE;
+
+	root = acpi_pci_find_root(handle);
+	if (!root)
+		return AE_NOT_EXIST;
+
+	mutex_lock(&osc_lock);
+	/* No need to evaluate _OSC if the control was already granted. */
+	if ((root->osc_control_set & control_req) == control_req)
+		goto out;
+
+	/* Need to query controls first before requesting them */
+	if (!root->osc_queried) {
+		status = acpi_pci_query_osc(root, root->osc_support_set);
+		if (ACPI_FAILURE(status))
+			goto out;
+	}
+	if ((root->osc_control_qry & control_req) != control_req) {
+		printk(KERN_DEBUG
+		       "Firmware did not grant requested _OSC control\n");
+		status = AE_SUPPORT;
+		goto out;
+	}
+
+	capbuf[OSC_QUERY_TYPE] = 0;
+	capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set;
+	capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req;
+	status = acpi_pci_run_osc(handle, capbuf, &result);
+	if (ACPI_SUCCESS(status))
+		root->osc_control_set = result;
+out:
+	mutex_unlock(&osc_lock);
+	return status;
+}
+EXPORT_SYMBOL(acpi_pci_osc_control_set);
+
 static int __devinit acpi_pci_root_add(struct acpi_device *device)
 static int __devinit acpi_pci_root_add(struct acpi_device *device)
 {
 {
 	int result = 0;
 	int result = 0;
@@ -217,7 +393,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	 * PCI domains, so we indicate this in _OSC support capabilities.
 	 * PCI domains, so we indicate this in _OSC support capabilities.
 	 */
 	 */
 	flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT;
 	flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT;
-	pci_acpi_osc_support(device->handle, flags);
+	acpi_pci_osc_support(root, flags);
 
 
 	/* 
 	/* 
 	 * Segment
 	 * Segment
@@ -353,7 +529,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	if (pci_msi_enabled())
 	if (pci_msi_enabled())
 		flags |= OSC_MSI_SUPPORT;
 		flags |= OSC_MSI_SUPPORT;
 	if (flags != base_flags)
 	if (flags != base_flags)
-		pci_acpi_osc_support(device->handle, flags);
+		acpi_pci_osc_support(root, flags);
 
 
       end:
       end:
 	if (result) {
 	if (result) {

+ 10 - 0
drivers/pci/Kconfig

@@ -59,3 +59,13 @@ config HT_IRQ
 	   This allows native hypertransport devices to use interrupts.
 	   This allows native hypertransport devices to use interrupts.
 
 
 	   If unsure say Y.
 	   If unsure say Y.
+
+config PCI_IOV
+	bool "PCI IOV support"
+	depends on PCI
+	help
+	  I/O Virtualization is a PCI feature supported by some devices
+	  which allows them to create virtual devices which share their
+	  physical resources.
+
+	  If unsure, say N.

+ 2 - 0
drivers/pci/Makefile

@@ -29,6 +29,8 @@ obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
 
 
 obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
 obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
 
 
+obj-$(CONFIG_PCI_IOV) += iov.o
+
 #
 #
 # Some architectures use the generic PCI setup functions
 # Some architectures use the generic PCI setup functions
 #
 #

+ 5 - 3
drivers/pci/bus.c

@@ -133,7 +133,7 @@ int pci_bus_add_child(struct pci_bus *bus)
  *
  *
  * Call hotplug for each new devices.
  * Call hotplug for each new devices.
  */
  */
-void pci_bus_add_devices(struct pci_bus *bus)
+void pci_bus_add_devices(const struct pci_bus *bus)
 {
 {
 	struct pci_dev *dev;
 	struct pci_dev *dev;
 	struct pci_bus *child;
 	struct pci_bus *child;
@@ -184,8 +184,10 @@ void pci_enable_bridges(struct pci_bus *bus)
 
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		if (dev->subordinate) {
 		if (dev->subordinate) {
-			retval = pci_enable_device(dev);
-			pci_set_master(dev);
+			if (atomic_read(&dev->enable_cnt) == 0) {
+				retval = pci_enable_device(dev);
+				pci_set_master(dev);
+			}
 			pci_enable_bridges(dev->subordinate);
 			pci_enable_bridges(dev->subordinate);
 		}
 		}
 	}
 	}

+ 21 - 37
drivers/pci/hotplug/acpi_pcihp.c

@@ -30,9 +30,8 @@
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
 #include <linux/pci_hotplug.h>
+#include <linux/acpi.h>
 #include <linux/pci-acpi.h>
 #include <linux/pci-acpi.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
 
 
 #define MY_NAME	"acpi_pcihp"
 #define MY_NAME	"acpi_pcihp"
 
 
@@ -333,19 +332,14 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 {
 {
 	acpi_status status = AE_NOT_FOUND;
 	acpi_status status = AE_NOT_FOUND;
 	acpi_handle handle, phandle;
 	acpi_handle handle, phandle;
-	struct pci_bus *pbus = bus;
-	struct pci_dev *pdev;
-
-	do {
-		pdev = pbus->self;
-		if (!pdev) {
-			handle = acpi_get_pci_rootbridge_handle(
-				pci_domain_nr(pbus), pbus->number);
+	struct pci_bus *pbus;
+
+	handle = NULL;
+	for (pbus = bus; pbus; pbus = pbus->parent) {
+		handle = acpi_pci_get_bridge_handle(pbus);
+		if (handle)
 			break;
 			break;
-		}
-		handle = DEVICE_ACPI_HANDLE(&(pdev->dev));
-		pbus = pbus->parent;
-	} while (!handle);
+	}
 
 
 	/*
 	/*
 	 * _HPP settings apply to all child buses, until another _HPP is
 	 * _HPP settings apply to all child buses, until another _HPP is
@@ -378,12 +372,10 @@ EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware);
  *
  *
  * Attempt to take hotplug control from firmware.
  * Attempt to take hotplug control from firmware.
  */
  */
-int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
+int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
 {
 {
 	acpi_status status;
 	acpi_status status;
 	acpi_handle chandle, handle;
 	acpi_handle chandle, handle;
-	struct pci_dev *pdev = dev;
-	struct pci_bus *parent;
 	struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
 
 
 	flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |
 	flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |
@@ -408,33 +400,25 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
 		acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
 		acpi_get_name(handle, ACPI_FULL_PATHNAME, &string);
 		dbg("Trying to get hotplug control for %s\n",
 		dbg("Trying to get hotplug control for %s\n",
 				(char *)string.pointer);
 				(char *)string.pointer);
-		status = pci_osc_control_set(handle, flags);
+		status = acpi_pci_osc_control_set(handle, flags);
 		if (ACPI_SUCCESS(status))
 		if (ACPI_SUCCESS(status))
 			goto got_one;
 			goto got_one;
 		kfree(string.pointer);
 		kfree(string.pointer);
 		string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL };
 		string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL };
 	}
 	}
 
 
-	pdev = dev;
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
-	while (!handle) {
+	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	if (!handle) {
 		/*
 		/*
 		 * This hotplug controller was not listed in the ACPI name
 		 * This hotplug controller was not listed in the ACPI name
 		 * space at all. Try to get acpi handle of parent pci bus.
 		 * space at all. Try to get acpi handle of parent pci bus.
 		 */
 		 */
-		if (!pdev || !pdev->bus->parent)
-			break;
-		parent = pdev->bus->parent;
-		dbg("Could not find %s in acpi namespace, trying parent\n",
-		    pci_name(pdev));
-		if (!parent->self)
-			/* Parent must be a host bridge */
-			handle = acpi_get_pci_rootbridge_handle(
-					pci_domain_nr(parent),
-					parent->number);
-		else
-			handle = DEVICE_ACPI_HANDLE(&(parent->self->dev));
-		pdev = parent->self;
+		struct pci_bus *pbus;
+		for (pbus = pdev->bus; pbus; pbus = pbus->parent) {
+			handle = acpi_pci_get_bridge_handle(pbus);
+			if (handle)
+				break;
+		}
 	}
 	}
 
 
 	while (handle) {
 	while (handle) {
@@ -453,13 +437,13 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags)
 	}
 	}
 
 
 	dbg("Cannot get control of hotplug hardware for pci %s\n",
 	dbg("Cannot get control of hotplug hardware for pci %s\n",
-	    pci_name(dev));
+	    pci_name(pdev));
 
 
 	kfree(string.pointer);
 	kfree(string.pointer);
 	return -ENODEV;
 	return -ENODEV;
 got_one:
 got_one:
-	dbg("Gained control for hotplug HW for pci %s (%s)\n", pci_name(dev),
-			(char *)string.pointer);
+	dbg("Gained control for hotplug HW for pci %s (%s)\n",
+	    pci_name(pdev), (char *)string.pointer);
 	kfree(string.pointer);
 	kfree(string.pointer);
 	return 0;
 	return 0;
 }
 }

+ 106 - 338
drivers/pci/hotplug/fakephp.c

@@ -1,395 +1,163 @@
-/*
- * Fake PCI Hot Plug Controller Driver
+/* Works like the fakephp driver used to, except a little better.
  *
  *
- * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
- * Copyright (C) 2003 IBM Corp.
- * Copyright (C) 2003 Rolf Eike Beer <eike-kernel@sf-tec.de>
+ * - It's possible to remove devices with subordinate busses.
+ * - New PCI devices that appear via any method, not just a fakephp triggered
+ *   rescan, will be noticed.
+ * - Devices that are removed via any method, not just a fakephp triggered
+ *   removal, will also be noticed.
  *
  *
- * Based on ideas and code from:
- * 	Vladimir Kondratiev <vladimir.kondratiev@intel.com>
- *	Rolf Eike Beer <eike-kernel@sf-tec.de>
+ * Uses nothing from the pci-hotplug subsystem.
  *
  *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 2 of the License.
- *
- * Send feedback to <greg@kroah.com>
  */
  */
 
 
-/*
- *
- * This driver will "emulate" removing PCI devices from the system.  If
- * the "power" file is written to with "0" then the specified PCI device
- * will be completely removed from the kernel.
- *
- * WARNING, this does NOT turn off the power to the PCI device.  This is
- * a "logical" removal, not a physical or electrical removal.
- *
- * Use this module at your own risk, you have been warned!
- *
- * Enabling PCI devices is left as an exercise for the reader...
- *
- */
-#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/pci_hotplug.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
 #include <linux/init.h>
 #include <linux/init.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
+#include <linux/pci.h>
+#include <linux/device.h>
 #include "../pci.h"
 #include "../pci.h"
 
 
-#if !defined(MODULE)
-	#define MY_NAME	"fakephp"
-#else
-	#define MY_NAME	THIS_MODULE->name
-#endif
-
-#define dbg(format, arg...)					\
-	do {							\
-		if (debug)					\
-			printk(KERN_DEBUG "%s: " format,	\
-				MY_NAME , ## arg); 		\
-	} while (0)
-#define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
-#define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg)
-
-#define DRIVER_AUTHOR	"Greg Kroah-Hartman <greg@kroah.com>"
-#define DRIVER_DESC	"Fake PCI Hot Plug Controller Driver"
-
-struct dummy_slot {
-	struct list_head node;
-	struct hotplug_slot *slot;
-	struct pci_dev *dev;
-	struct work_struct remove_work;
-	unsigned long removed;
+struct legacy_slot {
+	struct kobject		kobj;
+	struct pci_dev		*dev;
+	struct list_head	list;
 };
 };
 
 
-static int debug;
-static int dup_slots;
-static LIST_HEAD(slot_list);
-static struct workqueue_struct *dummyphp_wq;
-
-static void pci_rescan_worker(struct work_struct *work);
-static DECLARE_WORK(pci_rescan_work, pci_rescan_worker);
-
-static int enable_slot (struct hotplug_slot *slot);
-static int disable_slot (struct hotplug_slot *slot);
+static LIST_HEAD(legacy_list);
 
 
-static struct hotplug_slot_ops dummy_hotplug_slot_ops = {
-	.owner			= THIS_MODULE,
-	.enable_slot		= enable_slot,
-	.disable_slot		= disable_slot,
-};
-
-static void dummy_release(struct hotplug_slot *slot)
+static ssize_t legacy_show(struct kobject *kobj, struct attribute *attr,
+			   char *buf)
 {
 {
-	struct dummy_slot *dslot = slot->private;
-
-	list_del(&dslot->node);
-	kfree(dslot->slot->info);
-	kfree(dslot->slot);
-	pci_dev_put(dslot->dev);
-	kfree(dslot);
+	struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
+	strcpy(buf, "1\n");
+	return 2;
 }
 }
 
 
-#define SLOT_NAME_SIZE	8
-
-static int add_slot(struct pci_dev *dev)
+static void remove_callback(void *data)
 {
 {
-	struct dummy_slot *dslot;
-	struct hotplug_slot *slot;
-	char name[SLOT_NAME_SIZE];
-	int retval = -ENOMEM;
-	static int count = 1;
-
-	slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
-	if (!slot)
-		goto error;
-
-	slot->info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL);
-	if (!slot->info)
-		goto error_slot;
-
-	slot->info->power_status = 1;
-	slot->info->max_bus_speed = PCI_SPEED_UNKNOWN;
-	slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
-
-	dslot = kzalloc(sizeof(struct dummy_slot), GFP_KERNEL);
-	if (!dslot)
-		goto error_info;
-
-	if (dup_slots)
-		snprintf(name, SLOT_NAME_SIZE, "fake");
-	else
-		snprintf(name, SLOT_NAME_SIZE, "fake%d", count++);
-	dbg("slot->name = %s\n", name);
-	slot->ops = &dummy_hotplug_slot_ops;
-	slot->release = &dummy_release;
-	slot->private = dslot;
-
-	retval = pci_hp_register(slot, dev->bus, PCI_SLOT(dev->devfn), name);
-	if (retval) {
-		err("pci_hp_register failed with error %d\n", retval);
-		goto error_dslot;
-	}
-
-	dbg("slot->name = %s\n", hotplug_slot_name(slot));
-	dslot->slot = slot;
-	dslot->dev = pci_dev_get(dev);
-	list_add (&dslot->node, &slot_list);
-	return retval;
-
-error_dslot:
-	kfree(dslot);
-error_info:
-	kfree(slot->info);
-error_slot:
-	kfree(slot);
-error:
-	return retval;
+	pci_remove_bus_device((struct pci_dev *)data);
 }
 }
 
 
-static int __init pci_scan_buses(void)
+static ssize_t legacy_store(struct kobject *kobj, struct attribute *attr,
+			    const char *buf, size_t len)
 {
 {
-	struct pci_dev *dev = NULL;
-	int lastslot = 0;
+	struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
+	unsigned long val;
 
 
-	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
-		if (PCI_FUNC(dev->devfn) > 0 &&
-				lastslot == PCI_SLOT(dev->devfn))
-			continue;
-		lastslot = PCI_SLOT(dev->devfn);
-		add_slot(dev);
-	}
+	if (strict_strtoul(buf, 0, &val) < 0)
+		return -EINVAL;
 
 
-	return 0;
+	if (val)
+		pci_rescan_bus(slot->dev->bus);
+	else
+		sysfs_schedule_callback(&slot->dev->dev.kobj, remove_callback,
+					slot->dev, THIS_MODULE);
+	return len;
 }
 }
 
 
-static void remove_slot(struct dummy_slot *dslot)
-{
-	int retval;
-
-	dbg("removing slot %s\n", hotplug_slot_name(dslot->slot));
-	retval = pci_hp_deregister(dslot->slot);
-	if (retval)
-		err("Problem unregistering a slot %s\n",
-			hotplug_slot_name(dslot->slot));
-}
+static struct attribute *legacy_attrs[] = {
+	&(struct attribute){ .name = "power", .mode = 0644 },
+	NULL,
+};
 
 
-/* called from the single-threaded workqueue handler to remove a slot */
-static void remove_slot_worker(struct work_struct *work)
+static void legacy_release(struct kobject *kobj)
 {
 {
-	struct dummy_slot *dslot =
-		container_of(work, struct dummy_slot, remove_work);
-	remove_slot(dslot);
-}
+	struct legacy_slot *slot = container_of(kobj, typeof(*slot), kobj);
 
 
-/**
- * pci_rescan_slot - Rescan slot
- * @temp: Device template. Should be set: bus and devfn.
- *
- * Tries hard not to re-enable already existing devices;
- * also handles scanning of subfunctions.
- */
-static int pci_rescan_slot(struct pci_dev *temp)
-{
-	struct pci_bus *bus = temp->bus;
-	struct pci_dev *dev;
-	int func;
-	u8 hdr_type;
-	int count = 0;
-
-	if (!pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) {
-		temp->hdr_type = hdr_type & 0x7f;
-		if ((dev = pci_get_slot(bus, temp->devfn)) != NULL)
-			pci_dev_put(dev);
-		else {
-			dev = pci_scan_single_device(bus, temp->devfn);
-			if (dev) {
-				dbg("New device on %s function %x:%x\n",
-					bus->name, temp->devfn >> 3,
-					temp->devfn & 7);
-				count++;
-			}
-		}
-		/* multifunction device? */
-		if (!(hdr_type & 0x80))
-			return count;
-
-		/* continue scanning for other functions */
-		for (func = 1, temp->devfn++; func < 8; func++, temp->devfn++) {
-			if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type))
-				continue;
-			temp->hdr_type = hdr_type & 0x7f;
-
-			if ((dev = pci_get_slot(bus, temp->devfn)) != NULL)
-				pci_dev_put(dev);
-			else {
-				dev = pci_scan_single_device(bus, temp->devfn);
-				if (dev) {
-					dbg("New device on %s function %x:%x\n",
-						bus->name, temp->devfn >> 3,
-						temp->devfn & 7);
-					count++;
-				}
-			}
-		}
-	}
-
-	return count;
+	pci_dev_put(slot->dev);
+	kfree(slot);
 }
 }
 
 
+static struct kobj_type legacy_ktype = {
+	.sysfs_ops = &(struct sysfs_ops){
+		.store = legacy_store, .show = legacy_show
+	},
+	.release = &legacy_release,
+	.default_attrs = legacy_attrs,
+};
 
 
-/**
- * pci_rescan_bus - Rescan PCI bus
- * @bus: the PCI bus to rescan
- *
- * Call pci_rescan_slot for each possible function of the bus.
- */
-static void pci_rescan_bus(const struct pci_bus *bus)
+static int legacy_add_slot(struct pci_dev *pdev)
 {
 {
-	unsigned int devfn;
-	struct pci_dev *dev;
-	int retval;
-	int found = 0;
-	dev = alloc_pci_dev();
-	if (!dev)
-		return;
+	struct legacy_slot *slot = kzalloc(sizeof(*slot), GFP_KERNEL);
 
 
-	dev->bus = (struct pci_bus*)bus;
-	dev->sysdata = bus->sysdata;
-	for (devfn = 0; devfn < 0x100; devfn += 8) {
-		dev->devfn = devfn;
-		found += pci_rescan_slot(dev);
-	}
-
-	if (found) {
-		pci_bus_assign_resources(bus);
-		list_for_each_entry(dev, &bus->devices, bus_list) {
-			/* Skip already-added devices */
-			if (dev->is_added)
-					continue;
-			retval = pci_bus_add_device(dev);
-			if (retval)
-				dev_err(&dev->dev,
-					"Error adding device, continuing\n");
-			else
-				add_slot(dev);
-		}
-		pci_bus_add_devices(bus);
-	}
-	kfree(dev);
-}
+	if (!slot)
+		return -ENOMEM;
 
 
-/* recursively scan all buses */
-static void pci_rescan_buses(const struct list_head *list)
-{
-	const struct list_head *l;
-	list_for_each(l,list) {
-		const struct pci_bus *b = pci_bus_b(l);
-		pci_rescan_bus(b);
-		pci_rescan_buses(&b->children);
+	if (kobject_init_and_add(&slot->kobj, &legacy_ktype,
+				 &pci_slots_kset->kobj, "%s",
+				 dev_name(&pdev->dev))) {
+		dev_warn(&pdev->dev, "Failed to created legacy fake slot\n");
+		return -EINVAL;
 	}
 	}
-}
+	slot->dev = pci_dev_get(pdev);
 
 
-/* initiate rescan of all pci buses */
-static inline void pci_rescan(void) {
-	pci_rescan_buses(&pci_root_buses);
-}
-
-/* called from the single-threaded workqueue handler to rescan all pci buses */
-static void pci_rescan_worker(struct work_struct *work)
-{
-	pci_rescan();
-}
+	list_add(&slot->list, &legacy_list);
 
 
-static int enable_slot(struct hotplug_slot *hotplug_slot)
-{
-	/* mis-use enable_slot for rescanning of the pci bus */
-	cancel_work_sync(&pci_rescan_work);
-	queue_work(dummyphp_wq, &pci_rescan_work);
 	return 0;
 	return 0;
 }
 }
 
 
-static int disable_slot(struct hotplug_slot *slot)
+static int legacy_notify(struct notifier_block *nb,
+			 unsigned long action, void *data)
 {
 {
-	struct dummy_slot *dslot;
-	struct pci_dev *dev;
-	int func;
-
-	if (!slot)
-		return -ENODEV;
-	dslot = slot->private;
-
-	dbg("%s - physical_slot = %s\n", __func__, hotplug_slot_name(slot));
+	struct pci_dev *pdev = to_pci_dev(data);
 
 
-	for (func = 7; func >= 0; func--) {
-		dev = pci_get_slot(dslot->dev->bus, dslot->dev->devfn + func);
-		if (!dev)
-			continue;
+	if (action == BUS_NOTIFY_ADD_DEVICE) {
+		legacy_add_slot(pdev);
+	} else if (action == BUS_NOTIFY_DEL_DEVICE) {
+		struct legacy_slot *slot;
 
 
-		if (test_and_set_bit(0, &dslot->removed)) {
-			dbg("Slot already scheduled for removal\n");
-			pci_dev_put(dev);
-			return -ENODEV;
-		}
+		list_for_each_entry(slot, &legacy_list, list)
+			if (slot->dev == pdev)
+				goto found;
 
 
-		/* remove the device from the pci core */
-		pci_remove_bus_device(dev);
-
-		/* queue work item to blow away this sysfs entry and other
-		 * parts.
-		 */
-		INIT_WORK(&dslot->remove_work, remove_slot_worker);
-		queue_work(dummyphp_wq, &dslot->remove_work);
-
-		pci_dev_put(dev);
+		dev_warn(&pdev->dev, "Missing legacy fake slot?");
+		return -ENODEV;
+found:
+		kobject_del(&slot->kobj);
+		list_del(&slot->list);
+		kobject_put(&slot->kobj);
 	}
 	}
+
 	return 0;
 	return 0;
 }
 }
 
 
-static void cleanup_slots (void)
-{
-	struct list_head *tmp;
-	struct list_head *next;
-	struct dummy_slot *dslot;
-
-	destroy_workqueue(dummyphp_wq);
-	list_for_each_safe (tmp, next, &slot_list) {
-		dslot = list_entry (tmp, struct dummy_slot, node);
-		remove_slot(dslot);
-	}
-	
-}
+static struct notifier_block legacy_notifier = {
+	.notifier_call = legacy_notify
+};
 
 
-static int __init dummyphp_init(void)
+static int __init init_legacy(void)
 {
 {
-	info(DRIVER_DESC "\n");
+	struct pci_dev *pdev = NULL;
 
 
-	dummyphp_wq = create_singlethread_workqueue(MY_NAME);
-	if (!dummyphp_wq)
-		return -ENOMEM;
+	/* Add existing devices */
+	while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev)))
+		legacy_add_slot(pdev);
 
 
-	return pci_scan_buses();
+	/* Be alerted of any new ones */
+	bus_register_notifier(&pci_bus_type, &legacy_notifier);
+	return 0;
 }
 }
+module_init(init_legacy);
 
 
-
-static void __exit dummyphp_exit(void)
+static void __exit remove_legacy(void)
 {
 {
-	cleanup_slots();
+	struct legacy_slot *slot, *tmp;
+
+	bus_unregister_notifier(&pci_bus_type, &legacy_notifier);
+
+	list_for_each_entry_safe(slot, tmp, &legacy_list, list) {
+		list_del(&slot->list);
+		kobject_del(&slot->kobj);
+		kobject_put(&slot->kobj);
+	}
 }
 }
+module_exit(remove_legacy);
 
 
-module_init(dummyphp_init);
-module_exit(dummyphp_exit);
 
 
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("Trent Piepho <xyzzy@speakeasy.org>");
+MODULE_DESCRIPTION("Legacy version of the fakephp interface");
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
-module_param(debug, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
-module_param(dup_slots, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(dup_slots, "Force duplicate slot names for debugging");

+ 7 - 6
drivers/pci/hotplug/pciehp.h

@@ -46,10 +46,10 @@ extern int pciehp_force;
 extern struct workqueue_struct *pciehp_wq;
 extern struct workqueue_struct *pciehp_wq;
 
 
 #define dbg(format, arg...)						\
 #define dbg(format, arg...)						\
-	do {								\
-		if (pciehp_debug)					\
-			printk("%s: " format, MY_NAME , ## arg);	\
-	} while (0)
+do {									\
+	if (pciehp_debug)						\
+		printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg);	\
+} while (0)
 #define err(format, arg...)						\
 #define err(format, arg...)						\
 	printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
 	printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
 #define info(format, arg...)						\
 #define info(format, arg...)						\
@@ -60,7 +60,7 @@ extern struct workqueue_struct *pciehp_wq;
 #define ctrl_dbg(ctrl, format, arg...)					\
 #define ctrl_dbg(ctrl, format, arg...)					\
 	do {								\
 	do {								\
 		if (pciehp_debug)					\
 		if (pciehp_debug)					\
-			dev_printk(, &ctrl->pcie->device,		\
+			dev_printk(KERN_DEBUG, &ctrl->pcie->device,	\
 					format, ## arg);		\
 					format, ## arg);		\
 	} while (0)
 	} while (0)
 #define ctrl_err(ctrl, format, arg...)					\
 #define ctrl_err(ctrl, format, arg...)					\
@@ -108,10 +108,11 @@ struct controller {
 	u32 slot_cap;
 	u32 slot_cap;
 	u8 cap_base;
 	u8 cap_base;
 	struct timer_list poll_timer;
 	struct timer_list poll_timer;
-	int cmd_busy;
+	unsigned int cmd_busy:1;
 	unsigned int no_cmd_complete:1;
 	unsigned int no_cmd_complete:1;
 	unsigned int link_active_reporting:1;
 	unsigned int link_active_reporting:1;
 	unsigned int notification_enabled:1;
 	unsigned int notification_enabled:1;
+	unsigned int power_fault_detected;
 };
 };
 
 
 #define INT_BUTTON_IGNORE		0
 #define INT_BUTTON_IGNORE		0

+ 6 - 15
drivers/pci/hotplug/pciehp_acpi.c

@@ -67,37 +67,27 @@ static int __init parse_detect_mode(void)
 	return PCIEHP_DETECT_DEFAULT;
 	return PCIEHP_DETECT_DEFAULT;
 }
 }
 
 
-static struct pcie_port_service_id __initdata port_pci_ids[] = {
-	{
-		.vendor = PCI_ANY_ID,
-		.device = PCI_ANY_ID,
-		.port_type = PCIE_ANY_PORT,
-		.service_type = PCIE_PORT_SERVICE_HP,
-		.driver_data =  0,
-        }, { /* end: all zeroes */ }
-};
-
 static int __initdata dup_slot_id;
 static int __initdata dup_slot_id;
 static int __initdata acpi_slot_detected;
 static int __initdata acpi_slot_detected;
 static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots);
 static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots);
 
 
 /* Dummy driver for dumplicate name detection */
 /* Dummy driver for dumplicate name detection */
-static int __init dummy_probe(struct pcie_device *dev,
-			      const struct pcie_port_service_id *id)
+static int __init dummy_probe(struct pcie_device *dev)
 {
 {
 	int pos;
 	int pos;
 	u32 slot_cap;
 	u32 slot_cap;
 	struct slot *slot, *tmp;
 	struct slot *slot, *tmp;
 	struct pci_dev *pdev = dev->port;
 	struct pci_dev *pdev = dev->port;
 	struct pci_bus *pbus = pdev->subordinate;
 	struct pci_bus *pbus = pdev->subordinate;
-	if (!(slot = kzalloc(sizeof(*slot), GFP_KERNEL)))
-		return -ENOMEM;
 	/* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
 	/* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
 	if (pciehp_get_hp_hw_control_from_firmware(pdev))
 	if (pciehp_get_hp_hw_control_from_firmware(pdev))
 		return -ENODEV;
 		return -ENODEV;
 	if (!(pos = pci_find_capability(pdev, PCI_CAP_ID_EXP)))
 	if (!(pos = pci_find_capability(pdev, PCI_CAP_ID_EXP)))
 		return -ENODEV;
 		return -ENODEV;
 	pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &slot_cap);
 	pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &slot_cap);
+	slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+	if (!slot)
+		return -ENOMEM;
 	slot->number = slot_cap >> 19;
 	slot->number = slot_cap >> 19;
 	list_for_each_entry(tmp, &dummy_slots, slot_list) {
 	list_for_each_entry(tmp, &dummy_slots, slot_list) {
 		if (tmp->number == slot->number)
 		if (tmp->number == slot->number)
@@ -111,7 +101,8 @@ static int __init dummy_probe(struct pcie_device *dev,
 
 
 static struct pcie_port_service_driver __initdata dummy_driver = {
 static struct pcie_port_service_driver __initdata dummy_driver = {
         .name           = "pciehp_dummy",
         .name           = "pciehp_dummy",
-        .id_table       = port_pci_ids,
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_HP,
         .probe          = dummy_probe,
         .probe          = dummy_probe,
 };
 };
 
 

+ 5 - 13
drivers/pci/hotplug/pciehp_core.c

@@ -401,7 +401,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
 	return 0;
 	return 0;
 }
 }
 
 
-static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_id *id)
+static int pciehp_probe(struct pcie_device *dev)
 {
 {
 	int rc;
 	int rc;
 	struct controller *ctrl;
 	struct controller *ctrl;
@@ -475,7 +475,7 @@ static void pciehp_remove (struct pcie_device *dev)
 }
 }
 
 
 #ifdef CONFIG_PM
 #ifdef CONFIG_PM
-static int pciehp_suspend (struct pcie_device *dev, pm_message_t state)
+static int pciehp_suspend (struct pcie_device *dev)
 {
 {
 	dev_info(&dev->device, "%s ENTRY\n", __func__);
 	dev_info(&dev->device, "%s ENTRY\n", __func__);
 	return 0;
 	return 0;
@@ -503,20 +503,12 @@ static int pciehp_resume (struct pcie_device *dev)
 	}
 	}
 	return 0;
 	return 0;
 }
 }
-#endif
-
-static struct pcie_port_service_id port_pci_ids[] = { {
-	.vendor = PCI_ANY_ID,
-	.device = PCI_ANY_ID,
-	.port_type = PCIE_ANY_PORT,
-	.service_type = PCIE_PORT_SERVICE_HP,
-	.driver_data =	0,
-	}, { /* end: all zeroes */ }
-};
+#endif /* PM */
 
 
 static struct pcie_port_service_driver hpdriver_portdrv = {
 static struct pcie_port_service_driver hpdriver_portdrv = {
 	.name		= PCIE_MODULE_NAME,
 	.name		= PCIE_MODULE_NAME,
-	.id_table	= &port_pci_ids[0],
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_HP,
 
 
 	.probe		= pciehp_probe,
 	.probe		= pciehp_probe,
 	.remove		= pciehp_remove,
 	.remove		= pciehp_remove,

+ 13 - 21
drivers/pci/hotplug/pciehp_hpc.c

@@ -548,23 +548,21 @@ static int hpc_power_on_slot(struct slot * slot)
 
 
 	slot_cmd = POWER_ON;
 	slot_cmd = POWER_ON;
 	cmd_mask = PCI_EXP_SLTCTL_PCC;
 	cmd_mask = PCI_EXP_SLTCTL_PCC;
-	/* Enable detection that we turned off at slot power-off time */
 	if (!pciehp_poll_mode) {
 	if (!pciehp_poll_mode) {
-		slot_cmd |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
-			     PCI_EXP_SLTCTL_PDCE);
-		cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
-			     PCI_EXP_SLTCTL_PDCE);
+		/* Enable power fault detection turned off at power off time */
+		slot_cmd |= PCI_EXP_SLTCTL_PFDE;
+		cmd_mask |= PCI_EXP_SLTCTL_PFDE;
 	}
 	}
 
 
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
-
 	if (retval) {
 	if (retval) {
 		ctrl_err(ctrl, "Write %x command failed!\n", slot_cmd);
 		ctrl_err(ctrl, "Write %x command failed!\n", slot_cmd);
-		return -1;
+		return retval;
 	}
 	}
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
 		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
 		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
 
 
+	ctrl->power_fault_detected = 0;
 	return retval;
 	return retval;
 }
 }
 
 
@@ -621,18 +619,10 @@ static int hpc_power_off_slot(struct slot * slot)
 
 
 	slot_cmd = POWER_OFF;
 	slot_cmd = POWER_OFF;
 	cmd_mask = PCI_EXP_SLTCTL_PCC;
 	cmd_mask = PCI_EXP_SLTCTL_PCC;
-	/*
-	 * If we get MRL or presence detect interrupts now, the isr
-	 * will notice the sticky power-fault bit too and issue power
-	 * indicator change commands. This will lead to an endless loop
-	 * of command completions, since the power-fault bit remains on
-	 * till the slot is powered on again.
-	 */
 	if (!pciehp_poll_mode) {
 	if (!pciehp_poll_mode) {
-		slot_cmd &= ~(PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
-			      PCI_EXP_SLTCTL_PDCE);
-		cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
-			     PCI_EXP_SLTCTL_PDCE);
+		/* Disable power fault detection */
+		slot_cmd &= ~PCI_EXP_SLTCTL_PFDE;
+		cmd_mask |= PCI_EXP_SLTCTL_PFDE;
 	}
 	}
 
 
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
@@ -672,10 +662,11 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 		detected &= (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
 		detected &= (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
 			     PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC |
 			     PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC |
 			     PCI_EXP_SLTSTA_CC);
 			     PCI_EXP_SLTSTA_CC);
+		detected &= ~intr_loc;
 		intr_loc |= detected;
 		intr_loc |= detected;
 		if (!intr_loc)
 		if (!intr_loc)
 			return IRQ_NONE;
 			return IRQ_NONE;
-		if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, detected)) {
+		if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, intr_loc)) {
 			ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
 			ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
 				 __func__);
 				 __func__);
 			return IRQ_NONE;
 			return IRQ_NONE;
@@ -709,9 +700,10 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 		pciehp_handle_presence_change(p_slot);
 		pciehp_handle_presence_change(p_slot);
 
 
 	/* Check Power Fault Detected */
 	/* Check Power Fault Detected */
-	if (intr_loc & PCI_EXP_SLTSTA_PFD)
+	if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
+		ctrl->power_fault_detected = 1;
 		pciehp_handle_power_fault(p_slot);
 		pciehp_handle_power_fault(p_slot);
-
+	}
 	return IRQ_HANDLED;
 	return IRQ_HANDLED;
 }
 }
 
 

+ 5 - 5
drivers/pci/hotplug/shpchp.h

@@ -48,10 +48,10 @@ extern int shpchp_debug;
 extern struct workqueue_struct *shpchp_wq;
 extern struct workqueue_struct *shpchp_wq;
 
 
 #define dbg(format, arg...)						\
 #define dbg(format, arg...)						\
-	do {								\
-		if (shpchp_debug)					\
-			printk("%s: " format, MY_NAME , ## arg);	\
-	} while (0)
+do {									\
+	if (shpchp_debug)						\
+		printk(KERN_DEBUG "%s: " format, MY_NAME , ## arg);	\
+} while (0)
 #define err(format, arg...)						\
 #define err(format, arg...)						\
 	printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
 	printk(KERN_ERR "%s: " format, MY_NAME , ## arg)
 #define info(format, arg...)						\
 #define info(format, arg...)						\
@@ -62,7 +62,7 @@ extern struct workqueue_struct *shpchp_wq;
 #define ctrl_dbg(ctrl, format, arg...)					\
 #define ctrl_dbg(ctrl, format, arg...)					\
 	do {								\
 	do {								\
 		if (shpchp_debug)					\
 		if (shpchp_debug)					\
-			dev_printk(, &ctrl->pci_dev->dev,		\
+			dev_printk(KERN_DEBUG, &ctrl->pci_dev->dev,	\
 					format, ## arg);		\
 					format, ## arg);		\
 	} while (0)
 	} while (0)
 #define ctrl_err(ctrl, format, arg...)					\
 #define ctrl_err(ctrl, format, arg...)					\

+ 1 - 1
drivers/pci/hotplug/shpchp_pci.c

@@ -137,7 +137,7 @@ int __ref shpchp_configure_device(struct slot *p_slot)
 							busnr))
 							busnr))
 					break;
 					break;
 			}
 			}
-			if (busnr >= end) {
+			if (busnr > end) {
 				ctrl_err(ctrl,
 				ctrl_err(ctrl,
 					 "No free bus for hot-added bridge\n");
 					 "No free bus for hot-added bridge\n");
 				pci_dev_put(dev);
 				pci_dev_put(dev);

+ 1 - 1
drivers/pci/intel-iommu.c

@@ -1782,7 +1782,7 @@ static inline void iommu_prepare_isa(void)
 	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
 	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
 
 
 	if (ret)
 	if (ret)
-		printk("IOMMU: Failed to create 0-64M identity map, "
+		printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, "
 			"floppy might not work\n");
 			"floppy might not work\n");
 
 
 }
 }

+ 680 - 0
drivers/pci/iov.c

@@ -0,0 +1,680 @@
+/*
+ * drivers/pci/iov.c
+ *
+ * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
+ *
+ * PCI Express I/O Virtualization (IOV) support.
+ *   Single Root IOV 1.0
+ */
+
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include "pci.h"
+
+#define VIRTFN_ID_LEN	16
+
+static inline u8 virtfn_bus(struct pci_dev *dev, int id)
+{
+	return dev->bus->number + ((dev->devfn + dev->sriov->offset +
+				    dev->sriov->stride * id) >> 8);
+}
+
+static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
+{
+	return (dev->devfn + dev->sriov->offset +
+		dev->sriov->stride * id) & 0xff;
+}
+
+static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
+{
+	int rc;
+	struct pci_bus *child;
+
+	if (bus->number == busnr)
+		return bus;
+
+	child = pci_find_bus(pci_domain_nr(bus), busnr);
+	if (child)
+		return child;
+
+	child = pci_add_new_bus(bus, NULL, busnr);
+	if (!child)
+		return NULL;
+
+	child->subordinate = busnr;
+	child->dev.parent = bus->bridge;
+	rc = pci_bus_add_child(child);
+	if (rc) {
+		pci_remove_bus(child);
+		return NULL;
+	}
+
+	return child;
+}
+
+static void virtfn_remove_bus(struct pci_bus *bus, int busnr)
+{
+	struct pci_bus *child;
+
+	if (bus->number == busnr)
+		return;
+
+	child = pci_find_bus(pci_domain_nr(bus), busnr);
+	BUG_ON(!child);
+
+	if (list_empty(&child->devices))
+		pci_remove_bus(child);
+}
+
+static int virtfn_add(struct pci_dev *dev, int id, int reset)
+{
+	int i;
+	int rc;
+	u64 size;
+	char buf[VIRTFN_ID_LEN];
+	struct pci_dev *virtfn;
+	struct resource *res;
+	struct pci_sriov *iov = dev->sriov;
+
+	virtfn = alloc_pci_dev();
+	if (!virtfn)
+		return -ENOMEM;
+
+	mutex_lock(&iov->dev->sriov->lock);
+	virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id));
+	if (!virtfn->bus) {
+		kfree(virtfn);
+		mutex_unlock(&iov->dev->sriov->lock);
+		return -ENOMEM;
+	}
+	virtfn->devfn = virtfn_devfn(dev, id);
+	virtfn->vendor = dev->vendor;
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device);
+	pci_setup_device(virtfn);
+	virtfn->dev.parent = dev->dev.parent;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		if (!res->parent)
+			continue;
+		virtfn->resource[i].name = pci_name(virtfn);
+		virtfn->resource[i].flags = res->flags;
+		size = resource_size(res);
+		do_div(size, iov->total);
+		virtfn->resource[i].start = res->start + size * id;
+		virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
+		rc = request_resource(res, &virtfn->resource[i]);
+		BUG_ON(rc);
+	}
+
+	if (reset)
+		pci_execute_reset_function(virtfn);
+
+	pci_device_add(virtfn, virtfn->bus);
+	mutex_unlock(&iov->dev->sriov->lock);
+
+	virtfn->physfn = pci_dev_get(dev);
+	virtfn->is_virtfn = 1;
+
+	rc = pci_bus_add_device(virtfn);
+	if (rc)
+		goto failed1;
+	sprintf(buf, "virtfn%u", id);
+	rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
+	if (rc)
+		goto failed1;
+	rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
+	if (rc)
+		goto failed2;
+
+	kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
+
+	return 0;
+
+failed2:
+	sysfs_remove_link(&dev->dev.kobj, buf);
+failed1:
+	pci_dev_put(dev);
+	mutex_lock(&iov->dev->sriov->lock);
+	pci_remove_bus_device(virtfn);
+	virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+	mutex_unlock(&iov->dev->sriov->lock);
+
+	return rc;
+}
+
+static void virtfn_remove(struct pci_dev *dev, int id, int reset)
+{
+	char buf[VIRTFN_ID_LEN];
+	struct pci_bus *bus;
+	struct pci_dev *virtfn;
+	struct pci_sriov *iov = dev->sriov;
+
+	bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id));
+	if (!bus)
+		return;
+
+	virtfn = pci_get_slot(bus, virtfn_devfn(dev, id));
+	if (!virtfn)
+		return;
+
+	pci_dev_put(virtfn);
+
+	if (reset) {
+		device_release_driver(&virtfn->dev);
+		pci_execute_reset_function(virtfn);
+	}
+
+	sprintf(buf, "virtfn%u", id);
+	sysfs_remove_link(&dev->dev.kobj, buf);
+	sysfs_remove_link(&virtfn->dev.kobj, "physfn");
+
+	mutex_lock(&iov->dev->sriov->lock);
+	pci_remove_bus_device(virtfn);
+	virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
+	mutex_unlock(&iov->dev->sriov->lock);
+
+	pci_dev_put(dev);
+}
+
+static int sriov_migration(struct pci_dev *dev)
+{
+	u16 status;
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!iov->nr_virtfn)
+		return 0;
+
+	if (!(iov->cap & PCI_SRIOV_CAP_VFM))
+		return 0;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status);
+	if (!(status & PCI_SRIOV_STATUS_VFM))
+		return 0;
+
+	schedule_work(&iov->mtask);
+
+	return 1;
+}
+
+static void sriov_migration_task(struct work_struct *work)
+{
+	int i;
+	u8 state;
+	u16 status;
+	struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask);
+
+	for (i = iov->initial; i < iov->nr_virtfn; i++) {
+		state = readb(iov->mstate + i);
+		if (state == PCI_SRIOV_VFM_MI) {
+			writeb(PCI_SRIOV_VFM_AV, iov->mstate + i);
+			state = readb(iov->mstate + i);
+			if (state == PCI_SRIOV_VFM_AV)
+				virtfn_add(iov->self, i, 1);
+		} else if (state == PCI_SRIOV_VFM_MO) {
+			virtfn_remove(iov->self, i, 1);
+			writeb(PCI_SRIOV_VFM_UA, iov->mstate + i);
+			state = readb(iov->mstate + i);
+			if (state == PCI_SRIOV_VFM_AV)
+				virtfn_add(iov->self, i, 0);
+		}
+	}
+
+	pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status);
+	status &= ~PCI_SRIOV_STATUS_VFM;
+	pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status);
+}
+
+static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn)
+{
+	int bir;
+	u32 table;
+	resource_size_t pa;
+	struct pci_sriov *iov = dev->sriov;
+
+	if (nr_virtfn <= iov->initial)
+		return 0;
+
+	pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table);
+	bir = PCI_SRIOV_VFM_BIR(table);
+	if (bir > PCI_STD_RESOURCE_END)
+		return -EIO;
+
+	table = PCI_SRIOV_VFM_OFFSET(table);
+	if (table + nr_virtfn > pci_resource_len(dev, bir))
+		return -EIO;
+
+	pa = pci_resource_start(dev, bir) + table;
+	iov->mstate = ioremap(pa, nr_virtfn);
+	if (!iov->mstate)
+		return -ENOMEM;
+
+	INIT_WORK(&iov->mtask, sriov_migration_task);
+
+	iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR;
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+
+	return 0;
+}
+
+static void sriov_disable_migration(struct pci_dev *dev)
+{
+	struct pci_sriov *iov = dev->sriov;
+
+	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+
+	cancel_work_sync(&iov->mtask);
+	iounmap(iov->mstate);
+}
+
+static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
+{
+	int rc;
+	int i, j;
+	int nres;
+	u16 offset, stride, initial;
+	struct resource *res;
+	struct pci_dev *pdev;
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!nr_virtfn)
+		return 0;
+
+	if (iov->nr_virtfn)
+		return -EINVAL;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
+	if (initial > iov->total ||
+	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total)))
+		return -EIO;
+
+	if (nr_virtfn < 0 || nr_virtfn > iov->total ||
+	    (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
+		return -EINVAL;
+
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset);
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride);
+	if (!offset || (nr_virtfn > 1 && !stride))
+		return -EIO;
+
+	nres = 0;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		if (res->parent)
+			nres++;
+	}
+	if (nres != iov->nres) {
+		dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n");
+		return -ENOMEM;
+	}
+
+	iov->offset = offset;
+	iov->stride = stride;
+
+	if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) {
+		dev_err(&dev->dev, "SR-IOV: bus number out of range\n");
+		return -ENOMEM;
+	}
+
+	if (iov->link != dev->devfn) {
+		pdev = pci_get_slot(dev->bus, iov->link);
+		if (!pdev)
+			return -ENODEV;
+
+		pci_dev_put(pdev);
+
+		if (!pdev->is_physfn)
+			return -ENODEV;
+
+		rc = sysfs_create_link(&dev->dev.kobj,
+					&pdev->dev.kobj, "dep_link");
+		if (rc)
+			return rc;
+	}
+
+	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
+	pci_block_user_cfg_access(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	msleep(100);
+	pci_unblock_user_cfg_access(dev);
+
+	iov->initial = initial;
+	if (nr_virtfn < initial)
+		initial = nr_virtfn;
+
+	for (i = 0; i < initial; i++) {
+		rc = virtfn_add(dev, i, 0);
+		if (rc)
+			goto failed;
+	}
+
+	if (iov->cap & PCI_SRIOV_CAP_VFM) {
+		rc = sriov_enable_migration(dev, nr_virtfn);
+		if (rc)
+			goto failed;
+	}
+
+	kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
+	iov->nr_virtfn = nr_virtfn;
+
+	return 0;
+
+failed:
+	for (j = 0; j < i; j++)
+		virtfn_remove(dev, j, 0);
+
+	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+	pci_block_user_cfg_access(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	ssleep(1);
+	pci_unblock_user_cfg_access(dev);
+
+	if (iov->link != dev->devfn)
+		sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+	return rc;
+}
+
+static void sriov_disable(struct pci_dev *dev)
+{
+	int i;
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!iov->nr_virtfn)
+		return;
+
+	if (iov->cap & PCI_SRIOV_CAP_VFM)
+		sriov_disable_migration(dev);
+
+	for (i = 0; i < iov->nr_virtfn; i++)
+		virtfn_remove(dev, i, 0);
+
+	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
+	pci_block_user_cfg_access(dev);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	ssleep(1);
+	pci_unblock_user_cfg_access(dev);
+
+	if (iov->link != dev->devfn)
+		sysfs_remove_link(&dev->dev.kobj, "dep_link");
+
+	iov->nr_virtfn = 0;
+}
+
+static int sriov_init(struct pci_dev *dev, int pos)
+{
+	int i;
+	int rc;
+	int nres;
+	u32 pgsz;
+	u16 ctrl, total, offset, stride;
+	struct pci_sriov *iov;
+	struct resource *res;
+	struct pci_dev *pdev;
+
+	if (dev->pcie_type != PCI_EXP_TYPE_RC_END &&
+	    dev->pcie_type != PCI_EXP_TYPE_ENDPOINT)
+		return -ENODEV;
+
+	pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
+	if (ctrl & PCI_SRIOV_CTRL_VFE) {
+		pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
+		ssleep(1);
+	}
+
+	pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
+	if (!total)
+		return 0;
+
+	ctrl = 0;
+	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
+		if (pdev->is_physfn)
+			goto found;
+
+	pdev = NULL;
+	if (pci_ari_enabled(dev->bus))
+		ctrl |= PCI_SRIOV_CTRL_ARI;
+
+found:
+	pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
+	pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total);
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset);
+	pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride);
+	if (!offset || (total > 1 && !stride))
+		return -EIO;
+
+	pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
+	i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
+	pgsz &= ~((1 << i) - 1);
+	if (!pgsz)
+		return -EIO;
+
+	pgsz &= ~(pgsz - 1);
+	pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
+
+	nres = 0;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		i += __pci_read_base(dev, pci_bar_unknown, res,
+				     pos + PCI_SRIOV_BAR + i * 4);
+		if (!res->flags)
+			continue;
+		if (resource_size(res) & (PAGE_SIZE - 1)) {
+			rc = -EIO;
+			goto failed;
+		}
+		res->end = res->start + resource_size(res) * total - 1;
+		nres++;
+	}
+
+	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+	if (!iov) {
+		rc = -ENOMEM;
+		goto failed;
+	}
+
+	iov->pos = pos;
+	iov->nres = nres;
+	iov->ctrl = ctrl;
+	iov->total = total;
+	iov->offset = offset;
+	iov->stride = stride;
+	iov->pgsz = pgsz;
+	iov->self = dev;
+	pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
+	pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
+
+	if (pdev)
+		iov->dev = pci_dev_get(pdev);
+	else {
+		iov->dev = dev;
+		mutex_init(&iov->lock);
+	}
+
+	dev->sriov = iov;
+	dev->is_physfn = 1;
+
+	return 0;
+
+failed:
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = dev->resource + PCI_IOV_RESOURCES + i;
+		res->flags = 0;
+	}
+
+	return rc;
+}
+
+static void sriov_release(struct pci_dev *dev)
+{
+	BUG_ON(dev->sriov->nr_virtfn);
+
+	if (dev == dev->sriov->dev)
+		mutex_destroy(&dev->sriov->lock);
+	else
+		pci_dev_put(dev->sriov->dev);
+
+	kfree(dev->sriov);
+	dev->sriov = NULL;
+}
+
+static void sriov_restore_state(struct pci_dev *dev)
+{
+	int i;
+	u16 ctrl;
+	struct pci_sriov *iov = dev->sriov;
+
+	pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
+	if (ctrl & PCI_SRIOV_CTRL_VFE)
+		return;
+
+	for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
+		pci_update_resource(dev, i);
+
+	pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn);
+	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
+	if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
+		msleep(100);
+}
+
+/**
+ * pci_iov_init - initialize the IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_iov_init(struct pci_dev *dev)
+{
+	int pos;
+
+	if (!dev->is_pcie)
+		return -ENODEV;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+	if (pos)
+		return sriov_init(dev, pos);
+
+	return -ENODEV;
+}
+
+/**
+ * pci_iov_release - release resources used by the IOV capability
+ * @dev: the PCI device
+ */
+void pci_iov_release(struct pci_dev *dev)
+{
+	if (dev->is_physfn)
+		sriov_release(dev);
+}
+
+/**
+ * pci_iov_resource_bar - get position of the SR-IOV BAR
+ * @dev: the PCI device
+ * @resno: the resource number
+ * @type: the BAR type to be filled in
+ *
+ * Returns position of the BAR encapsulated in the SR-IOV capability.
+ */
+int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+			 enum pci_bar_type *type)
+{
+	if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END)
+		return 0;
+
+	BUG_ON(!dev->is_physfn);
+
+	*type = pci_bar_unknown;
+
+	return dev->sriov->pos + PCI_SRIOV_BAR +
+		4 * (resno - PCI_IOV_RESOURCES);
+}
+
+/**
+ * pci_restore_iov_state - restore the state of the IOV capability
+ * @dev: the PCI device
+ */
+void pci_restore_iov_state(struct pci_dev *dev)
+{
+	if (dev->is_physfn)
+		sriov_restore_state(dev);
+}
+
+/**
+ * pci_iov_bus_range - find bus range used by Virtual Function
+ * @bus: the PCI bus
+ *
+ * Returns max number of buses (exclude current one) used by Virtual
+ * Functions.
+ */
+int pci_iov_bus_range(struct pci_bus *bus)
+{
+	int max = 0;
+	u8 busnr;
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (!dev->is_physfn)
+			continue;
+		busnr = virtfn_bus(dev, dev->sriov->total - 1);
+		if (busnr > max)
+			max = busnr;
+	}
+
+	return max ? max - bus->number : 0;
+}
+
+/**
+ * pci_enable_sriov - enable the SR-IOV capability
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+	might_sleep();
+
+	if (!dev->is_physfn)
+		return -ENODEV;
+
+	return sriov_enable(dev, nr_virtfn);
+}
+EXPORT_SYMBOL_GPL(pci_enable_sriov);
+
+/**
+ * pci_disable_sriov - disable the SR-IOV capability
+ * @dev: the PCI device
+ */
+void pci_disable_sriov(struct pci_dev *dev)
+{
+	might_sleep();
+
+	if (!dev->is_physfn)
+		return;
+
+	sriov_disable(dev);
+}
+EXPORT_SYMBOL_GPL(pci_disable_sriov);
+
+/**
+ * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration
+ * @dev: the PCI device
+ *
+ * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not.
+ *
+ * Physical Function driver is responsible to register IRQ handler using
+ * VF Migration Interrupt Message Number, and call this function when the
+ * interrupt is generated by the hardware.
+ */
+irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+{
+	if (!dev->is_physfn)
+		return IRQ_NONE;
+
+	return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE;
+}
+EXPORT_SYMBOL_GPL(pci_sriov_migration);

+ 212 - 214
drivers/pci/msi.c

@@ -27,48 +27,53 @@ static int pci_msi_enable = 1;
 
 
 /* Arch hooks */
 /* Arch hooks */
 
 
-int __attribute__ ((weak))
-arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
+#ifndef arch_msi_check_device
+int arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
 {
 {
 	return 0;
 	return 0;
 }
 }
+#endif
 
 
-int __attribute__ ((weak))
-arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry)
-{
-	return 0;
-}
-
-int __attribute__ ((weak))
-arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+#ifndef arch_setup_msi_irqs
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 {
 	struct msi_desc *entry;
 	struct msi_desc *entry;
 	int ret;
 	int ret;
 
 
+	/*
+	 * If an architecture wants to support multiple MSI, it needs to
+	 * override arch_setup_msi_irqs()
+	 */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
 	list_for_each_entry(entry, &dev->msi_list, list) {
 	list_for_each_entry(entry, &dev->msi_list, list) {
 		ret = arch_setup_msi_irq(dev, entry);
 		ret = arch_setup_msi_irq(dev, entry);
-		if (ret)
+		if (ret < 0)
 			return ret;
 			return ret;
+		if (ret > 0)
+			return -ENOSPC;
 	}
 	}
 
 
 	return 0;
 	return 0;
 }
 }
+#endif
 
 
-void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq)
-{
-	return;
-}
-
-void __attribute__ ((weak))
-arch_teardown_msi_irqs(struct pci_dev *dev)
+#ifndef arch_teardown_msi_irqs
+void arch_teardown_msi_irqs(struct pci_dev *dev)
 {
 {
 	struct msi_desc *entry;
 	struct msi_desc *entry;
 
 
 	list_for_each_entry(entry, &dev->msi_list, list) {
 	list_for_each_entry(entry, &dev->msi_list, list) {
-		if (entry->irq != 0)
-			arch_teardown_msi_irq(entry->irq);
+		int i, nvec;
+		if (entry->irq == 0)
+			continue;
+		nvec = 1 << entry->msi_attrib.multiple;
+		for (i = 0; i < nvec; i++)
+			arch_teardown_msi_irq(entry->irq + i);
 	}
 	}
 }
 }
+#endif
 
 
 static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
 static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
 {
 {
@@ -111,27 +116,14 @@ static inline __attribute_const__ u32 msi_mask(unsigned x)
 	return (1 << (1 << x)) - 1;
 	return (1 << (1 << x)) - 1;
 }
 }
 
 
-static void msix_flush_writes(struct irq_desc *desc)
+static inline __attribute_const__ u32 msi_capable_mask(u16 control)
 {
 {
-	struct msi_desc *entry;
+	return msi_mask((control >> 1) & 7);
+}
 
 
-	entry = get_irq_desc_msi(desc);
-	BUG_ON(!entry || !entry->dev);
-	switch (entry->msi_attrib.type) {
-	case PCI_CAP_ID_MSI:
-		/* nothing to do */
-		break;
-	case PCI_CAP_ID_MSIX:
-	{
-		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
-			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
-		readl(entry->mask_base + offset);
-		break;
-	}
-	default:
-		BUG();
-		break;
-	}
+static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
+{
+	return msi_mask((control >> 4) & 7);
 }
 }
 
 
 /*
 /*
@@ -143,49 +135,71 @@ static void msix_flush_writes(struct irq_desc *desc)
  * Returns 1 if it succeeded in masking the interrupt and 0 if the device
  * Returns 1 if it succeeded in masking the interrupt and 0 if the device
  * doesn't support MSI masking.
  * doesn't support MSI masking.
  */
  */
-static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag)
+static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
 {
-	struct msi_desc *entry;
+	u32 mask_bits = desc->masked;
 
 
-	entry = get_irq_desc_msi(desc);
-	BUG_ON(!entry || !entry->dev);
-	switch (entry->msi_attrib.type) {
-	case PCI_CAP_ID_MSI:
-		if (entry->msi_attrib.maskbit) {
-			int pos;
-			u32 mask_bits;
-
-			pos = (long)entry->mask_base;
-			pci_read_config_dword(entry->dev, pos, &mask_bits);
-			mask_bits &= ~(mask);
-			mask_bits |= flag & mask;
-			pci_write_config_dword(entry->dev, pos, mask_bits);
-		} else {
-			return 0;
-		}
-		break;
-	case PCI_CAP_ID_MSIX:
-	{
-		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
-			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
-		writel(flag, entry->mask_base + offset);
-		readl(entry->mask_base + offset);
-		break;
-	}
-	default:
-		BUG();
-		break;
+	if (!desc->msi_attrib.maskbit)
+		return;
+
+	mask_bits &= ~mask;
+	mask_bits |= flag;
+	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
+	desc->masked = mask_bits;
+}
+
+/*
+ * This internal function does not flush PCI writes to the device.
+ * All users must ensure that they read from the device before either
+ * assuming that the device state is up to date, or returning out of this
+ * file.  This saves a few milliseconds when initialising devices with lots
+ * of MSI-X interrupts.
+ */
+static void msix_mask_irq(struct msi_desc *desc, u32 flag)
+{
+	u32 mask_bits = desc->masked;
+	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+					PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+	mask_bits &= ~1;
+	mask_bits |= flag;
+	writel(mask_bits, desc->mask_base + offset);
+	desc->masked = mask_bits;
+}
+
+static void msi_set_mask_bit(unsigned irq, u32 flag)
+{
+	struct msi_desc *desc = get_irq_msi(irq);
+
+	if (desc->msi_attrib.is_msix) {
+		msix_mask_irq(desc, flag);
+		readl(desc->mask_base);		/* Flush write to device */
+	} else {
+		unsigned offset = irq - desc->dev->irq;
+		msi_mask_irq(desc, 1 << offset, flag << offset);
 	}
 	}
-	entry->msi_attrib.masked = !!flag;
-	return 1;
+}
+
+void mask_msi_irq(unsigned int irq)
+{
+	msi_set_mask_bit(irq, 1);
+}
+
+void unmask_msi_irq(unsigned int irq)
+{
+	msi_set_mask_bit(irq, 0);
 }
 }
 
 
 void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 {
 {
 	struct msi_desc *entry = get_irq_desc_msi(desc);
 	struct msi_desc *entry = get_irq_desc_msi(desc);
-	switch(entry->msi_attrib.type) {
-	case PCI_CAP_ID_MSI:
-	{
+	if (entry->msi_attrib.is_msix) {
+		void __iomem *base = entry->mask_base +
+			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
+	} else {
 		struct pci_dev *dev = entry->dev;
 		struct pci_dev *dev = entry->dev;
 		int pos = entry->msi_attrib.pos;
 		int pos = entry->msi_attrib.pos;
 		u16 data;
 		u16 data;
@@ -201,21 +215,6 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 			pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
 			pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
 		}
 		}
 		msg->data = data;
 		msg->data = data;
-		break;
-	}
-	case PCI_CAP_ID_MSIX:
-	{
-		void __iomem *base;
-		base = entry->mask_base +
-			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
-
-		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
-		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
-		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
- 		break;
- 	}
- 	default:
-		BUG();
 	}
 	}
 }
 }
 
 
@@ -229,11 +228,25 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
 void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 {
 {
 	struct msi_desc *entry = get_irq_desc_msi(desc);
 	struct msi_desc *entry = get_irq_desc_msi(desc);
-	switch (entry->msi_attrib.type) {
-	case PCI_CAP_ID_MSI:
-	{
+	if (entry->msi_attrib.is_msix) {
+		void __iomem *base;
+		base = entry->mask_base +
+			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+		writel(msg->address_lo,
+			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+		writel(msg->address_hi,
+			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
+	} else {
 		struct pci_dev *dev = entry->dev;
 		struct pci_dev *dev = entry->dev;
 		int pos = entry->msi_attrib.pos;
 		int pos = entry->msi_attrib.pos;
+		u16 msgctl;
+
+		pci_read_config_word(dev, msi_control_reg(pos), &msgctl);
+		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
+		msgctl |= entry->msi_attrib.multiple << 4;
+		pci_write_config_word(dev, msi_control_reg(pos), msgctl);
 
 
 		pci_write_config_dword(dev, msi_lower_address_reg(pos),
 		pci_write_config_dword(dev, msi_lower_address_reg(pos),
 					msg->address_lo);
 					msg->address_lo);
@@ -246,23 +259,6 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 			pci_write_config_word(dev, msi_data_reg(pos, 0),
 			pci_write_config_word(dev, msi_data_reg(pos, 0),
 						msg->data);
 						msg->data);
 		}
 		}
-		break;
-	}
-	case PCI_CAP_ID_MSIX:
-	{
-		void __iomem *base;
-		base = entry->mask_base +
-			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
-
-		writel(msg->address_lo,
-			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
-		writel(msg->address_hi,
-			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
-		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
-		break;
-	}
-	default:
-		BUG();
 	}
 	}
 	entry->msg = *msg;
 	entry->msg = *msg;
 }
 }
@@ -274,37 +270,18 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
 	write_msi_msg_desc(desc, msg);
 	write_msi_msg_desc(desc, msg);
 }
 }
 
 
-void mask_msi_irq(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	msi_set_mask_bits(desc, 1, 1);
-	msix_flush_writes(desc);
-}
-
-void unmask_msi_irq(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	msi_set_mask_bits(desc, 1, 0);
-	msix_flush_writes(desc);
-}
-
 static int msi_free_irqs(struct pci_dev* dev);
 static int msi_free_irqs(struct pci_dev* dev);
 
 
-static struct msi_desc* alloc_msi_entry(void)
+static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
 {
 {
-	struct msi_desc *entry;
-
-	entry = kzalloc(sizeof(struct msi_desc), GFP_KERNEL);
-	if (!entry)
+	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
 		return NULL;
 		return NULL;
 
 
-	INIT_LIST_HEAD(&entry->list);
-	entry->irq = 0;
-	entry->dev = NULL;
+	INIT_LIST_HEAD(&desc->list);
+	desc->dev = dev;
 
 
-	return entry;
+	return desc;
 }
 }
 
 
 static void pci_intx_for_msi(struct pci_dev *dev, int enable)
 static void pci_intx_for_msi(struct pci_dev *dev, int enable)
@@ -328,15 +305,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
 	pci_intx_for_msi(dev, 0);
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, 0);
 	msi_set_enable(dev, 0);
 	write_msi_msg(dev->irq, &entry->msg);
 	write_msi_msg(dev->irq, &entry->msg);
-	if (entry->msi_attrib.maskbit) {
-		struct irq_desc *desc = irq_to_desc(dev->irq);
-		msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask,
-				  entry->msi_attrib.masked);
-	}
 
 
 	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
 	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
+	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
 	control &= ~PCI_MSI_FLAGS_QSIZE;
 	control &= ~PCI_MSI_FLAGS_QSIZE;
-	control |= PCI_MSI_FLAGS_ENABLE;
+	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
 	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
 	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
 }
 }
 
 
@@ -354,9 +327,8 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
 	msix_set_enable(dev, 0);
 	msix_set_enable(dev, 0);
 
 
 	list_for_each_entry(entry, &dev->msi_list, list) {
 	list_for_each_entry(entry, &dev->msi_list, list) {
-		struct irq_desc *desc = irq_to_desc(entry->irq);
 		write_msi_msg(entry->irq, &entry->msg);
 		write_msi_msg(entry->irq, &entry->msg);
-		msi_set_mask_bits(desc, 1, entry->msi_attrib.masked);
+		msix_mask_irq(entry, entry->masked);
 	}
 	}
 
 
 	BUG_ON(list_empty(&dev->msi_list));
 	BUG_ON(list_empty(&dev->msi_list));
@@ -378,52 +350,48 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 /**
 /**
  * msi_capability_init - configure device's MSI capability structure
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
  * @dev: pointer to the pci_dev data structure of MSI device function
+ * @nvec: number of interrupts to allocate
  *
  *
- * Setup the MSI capability structure of device function with a single
- * MSI irq, regardless of device function is capable of handling
- * multiple messages. A return of zero indicates the successful setup
- * of an entry zero with the new MSI irq or non-zero for otherwise.
- **/
-static int msi_capability_init(struct pci_dev *dev)
+ * Setup the MSI capability structure of the device with the requested
+ * number of interrupts.  A return value of zero indicates the successful
+ * setup of an entry with the new MSI irq.  A negative return value indicates
+ * an error, and a positive return value indicates the number of interrupts
+ * which could have been allocated.
+ */
+static int msi_capability_init(struct pci_dev *dev, int nvec)
 {
 {
 	struct msi_desc *entry;
 	struct msi_desc *entry;
 	int pos, ret;
 	int pos, ret;
 	u16 control;
 	u16 control;
+	unsigned mask;
 
 
 	msi_set_enable(dev, 0);	/* Ensure msi is disabled as I set it up */
 	msi_set_enable(dev, 0);	/* Ensure msi is disabled as I set it up */
 
 
    	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
    	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
 	/* MSI Entry Initialization */
 	/* MSI Entry Initialization */
-	entry = alloc_msi_entry();
+	entry = alloc_msi_entry(dev);
 	if (!entry)
 	if (!entry)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	entry->msi_attrib.type = PCI_CAP_ID_MSI;
+	entry->msi_attrib.is_msix = 0;
 	entry->msi_attrib.is_64 = is_64bit_address(control);
 	entry->msi_attrib.is_64 = is_64bit_address(control);
 	entry->msi_attrib.entry_nr = 0;
 	entry->msi_attrib.entry_nr = 0;
 	entry->msi_attrib.maskbit = is_mask_bit_support(control);
 	entry->msi_attrib.maskbit = is_mask_bit_support(control);
-	entry->msi_attrib.masked = 1;
 	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.pos = pos;
 	entry->msi_attrib.pos = pos;
-	entry->dev = dev;
-	if (entry->msi_attrib.maskbit) {
-		unsigned int base, maskbits, temp;
-
-		base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64);
-		entry->mask_base = (void __iomem *)(long)base;
-
-		/* All MSIs are unmasked by default, Mask them all */
-		pci_read_config_dword(dev, base, &maskbits);
-		temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1);
-		maskbits |= temp;
-		pci_write_config_dword(dev, base, maskbits);
-		entry->msi_attrib.maskbits_mask = temp;
-	}
+
+	entry->mask_pos = msi_mask_bits_reg(pos, entry->msi_attrib.is_64);
+	/* All MSIs are unmasked by default, Mask them all */
+	if (entry->msi_attrib.maskbit)
+		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
+	mask = msi_capable_mask(control);
+	msi_mask_irq(entry, mask, mask);
+
 	list_add_tail(&entry->list, &dev->msi_list);
 	list_add_tail(&entry->list, &dev->msi_list);
 
 
 	/* Configure MSI capability structure */
 	/* Configure MSI capability structure */
-	ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI);
+	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
 	if (ret) {
 	if (ret) {
 		msi_free_irqs(dev);
 		msi_free_irqs(dev);
 		return ret;
 		return ret;
@@ -476,26 +444,28 @@ static int msix_capability_init(struct pci_dev *dev,
 
 
 	/* MSI-X Table Initialization */
 	/* MSI-X Table Initialization */
 	for (i = 0; i < nvec; i++) {
 	for (i = 0; i < nvec; i++) {
-		entry = alloc_msi_entry();
+		entry = alloc_msi_entry(dev);
 		if (!entry)
 		if (!entry)
 			break;
 			break;
 
 
  		j = entries[i].entry;
  		j = entries[i].entry;
-		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
+		entry->msi_attrib.is_msix = 1;
 		entry->msi_attrib.is_64 = 1;
 		entry->msi_attrib.is_64 = 1;
 		entry->msi_attrib.entry_nr = j;
 		entry->msi_attrib.entry_nr = j;
-		entry->msi_attrib.maskbit = 1;
-		entry->msi_attrib.masked = 1;
 		entry->msi_attrib.default_irq = dev->irq;
 		entry->msi_attrib.default_irq = dev->irq;
 		entry->msi_attrib.pos = pos;
 		entry->msi_attrib.pos = pos;
-		entry->dev = dev;
 		entry->mask_base = base;
 		entry->mask_base = base;
+		entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE +
+					PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+		msix_mask_irq(entry, 1);
 
 
 		list_add_tail(&entry->list, &dev->msi_list);
 		list_add_tail(&entry->list, &dev->msi_list);
 	}
 	}
 
 
 	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
 	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
-	if (ret) {
+	if (ret < 0) {
+		/* If we had some success report the number of irqs
+		 * we succeeded in setting up. */
 		int avail = 0;
 		int avail = 0;
 		list_for_each_entry(entry, &dev->msi_list, list) {
 		list_for_each_entry(entry, &dev->msi_list, list) {
 			if (entry->irq != 0) {
 			if (entry->irq != 0) {
@@ -503,14 +473,13 @@ static int msix_capability_init(struct pci_dev *dev,
 			}
 			}
 		}
 		}
 
 
-		msi_free_irqs(dev);
+		if (avail != 0)
+			ret = avail;
+	}
 
 
-		/* If we had some success report the number of irqs
-		 * we succeeded in setting up.
-		 */
-		if (avail == 0)
-			avail = ret;
-		return avail;
+	if (ret) {
+		msi_free_irqs(dev);
+		return ret;
 	}
 	}
 
 
 	i = 0;
 	i = 0;
@@ -575,39 +544,54 @@ static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
 }
 }
 
 
 /**
 /**
- * pci_enable_msi - configure device's MSI capability structure
- * @dev: pointer to the pci_dev data structure of MSI device function
+ * pci_enable_msi_block - configure device's MSI capability structure
+ * @dev: device to configure
+ * @nvec: number of interrupts to configure
  *
  *
- * Setup the MSI capability structure of device function with
- * a single MSI irq upon its software driver call to request for
- * MSI mode enabled on its hardware device function. A return of zero
- * indicates the successful setup of an entry zero with the new MSI
- * irq or non-zero for otherwise.
- **/
-int pci_enable_msi(struct pci_dev* dev)
+ * Allocate IRQs for a device with the MSI capability.
+ * This function returns a negative errno if an error occurs.  If it
+ * is unable to allocate the number of interrupts requested, it returns
+ * the number of interrupts it might be able to allocate.  If it successfully
+ * allocates at least the number of interrupts requested, it returns 0 and
+ * updates the @dev's irq member to the lowest new interrupt number; the
+ * other interrupt numbers allocated to this device are consecutive.
+ */
+int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 {
 {
-	int status;
+	int status, pos, maxvec;
+	u16 msgctl;
 
 
-	status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI);
+	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	if (!pos)
+		return -EINVAL;
+	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
+	maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+	if (nvec > maxvec)
+		return maxvec;
+
+	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
 	if (status)
 	if (status)
 		return status;
 		return status;
 
 
 	WARN_ON(!!dev->msi_enabled);
 	WARN_ON(!!dev->msi_enabled);
 
 
-	/* Check whether driver already requested for MSI-X irqs */
+	/* Check whether driver already requested MSI-X irqs */
 	if (dev->msix_enabled) {
 	if (dev->msix_enabled) {
 		dev_info(&dev->dev, "can't enable MSI "
 		dev_info(&dev->dev, "can't enable MSI "
 			 "(MSI-X already enabled)\n");
 			 "(MSI-X already enabled)\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
-	status = msi_capability_init(dev);
+
+	status = msi_capability_init(dev, nvec);
 	return status;
 	return status;
 }
 }
-EXPORT_SYMBOL(pci_enable_msi);
+EXPORT_SYMBOL(pci_enable_msi_block);
 
 
-void pci_msi_shutdown(struct pci_dev* dev)
+void pci_msi_shutdown(struct pci_dev *dev)
 {
 {
-	struct msi_desc *entry;
+	struct msi_desc *desc;
+	u32 mask;
+	u16 ctrl;
 
 
 	if (!pci_msi_enable || !dev || !dev->msi_enabled)
 	if (!pci_msi_enable || !dev || !dev->msi_enabled)
 		return;
 		return;
@@ -617,19 +601,15 @@ void pci_msi_shutdown(struct pci_dev* dev)
 	dev->msi_enabled = 0;
 	dev->msi_enabled = 0;
 
 
 	BUG_ON(list_empty(&dev->msi_list));
 	BUG_ON(list_empty(&dev->msi_list));
-	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
-	/* Return the the pci reset with msi irqs unmasked */
-	if (entry->msi_attrib.maskbit) {
-		u32 mask = entry->msi_attrib.maskbits_mask;
-		struct irq_desc *desc = irq_to_desc(dev->irq);
-		msi_set_mask_bits(desc, mask, ~mask);
-	}
-	if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
-		return;
+	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);
+	pci_read_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS, &ctrl);
+	mask = msi_capable_mask(ctrl);
+	msi_mask_irq(desc, mask, ~mask);
 
 
 	/* Restore dev->irq to its default pin-assertion irq */
 	/* Restore dev->irq to its default pin-assertion irq */
-	dev->irq = entry->msi_attrib.default_irq;
+	dev->irq = desc->msi_attrib.default_irq;
 }
 }
+
 void pci_disable_msi(struct pci_dev* dev)
 void pci_disable_msi(struct pci_dev* dev)
 {
 {
 	struct msi_desc *entry;
 	struct msi_desc *entry;
@@ -640,7 +620,7 @@ void pci_disable_msi(struct pci_dev* dev)
 	pci_msi_shutdown(dev);
 	pci_msi_shutdown(dev);
 
 
 	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
 	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
-	if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
+	if (entry->msi_attrib.is_msix)
 		return;
 		return;
 
 
 	msi_free_irqs(dev);
 	msi_free_irqs(dev);
@@ -652,14 +632,18 @@ static int msi_free_irqs(struct pci_dev* dev)
 	struct msi_desc *entry, *tmp;
 	struct msi_desc *entry, *tmp;
 
 
 	list_for_each_entry(entry, &dev->msi_list, list) {
 	list_for_each_entry(entry, &dev->msi_list, list) {
-		if (entry->irq)
-			BUG_ON(irq_has_action(entry->irq));
+		int i, nvec;
+		if (!entry->irq)
+			continue;
+		nvec = 1 << entry->msi_attrib.multiple;
+		for (i = 0; i < nvec; i++)
+			BUG_ON(irq_has_action(entry->irq + i));
 	}
 	}
 
 
 	arch_teardown_msi_irqs(dev);
 	arch_teardown_msi_irqs(dev);
 
 
 	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
 	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
-		if (entry->msi_attrib.type == PCI_CAP_ID_MSIX) {
+		if (entry->msi_attrib.is_msix) {
 			writel(1, entry->mask_base + entry->msi_attrib.entry_nr
 			writel(1, entry->mask_base + entry->msi_attrib.entry_nr
 				  * PCI_MSIX_ENTRY_SIZE
 				  * PCI_MSIX_ENTRY_SIZE
 				  + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
 				  + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
@@ -674,6 +658,23 @@ static int msi_free_irqs(struct pci_dev* dev)
 	return 0;
 	return 0;
 }
 }
 
 
+/**
+ * pci_msix_table_size - return the number of device's MSI-X table entries
+ * @dev: pointer to the pci_dev data structure of MSI-X device function
+ */
+int pci_msix_table_size(struct pci_dev *dev)
+{
+	int pos;
+	u16 control;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+	if (!pos)
+		return 0;
+
+	pci_read_config_word(dev, msi_control_reg(pos), &control);
+	return multi_msix_capable(control);
+}
+
 /**
 /**
  * pci_enable_msix - configure device's MSI-X capability structure
  * pci_enable_msix - configure device's MSI-X capability structure
  * @dev: pointer to the pci_dev data structure of MSI-X device function
  * @dev: pointer to the pci_dev data structure of MSI-X device function
@@ -691,9 +692,8 @@ static int msi_free_irqs(struct pci_dev* dev)
  **/
  **/
 int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
 int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
 {
 {
-	int status, pos, nr_entries;
+	int status, nr_entries;
 	int i, j;
 	int i, j;
-	u16 control;
 
 
 	if (!entries)
 	if (!entries)
  		return -EINVAL;
  		return -EINVAL;
@@ -702,9 +702,7 @@ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
 	if (status)
 	if (status)
 		return status;
 		return status;
 
 
-	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
-	pci_read_config_word(dev, msi_control_reg(pos), &control);
-	nr_entries = multi_msix_capable(control);
+	nr_entries = pci_msix_table_size(dev);
 	if (nvec > nr_entries)
 	if (nvec > nr_entries)
 		return -EINVAL;
 		return -EINVAL;
 
 

+ 0 - 6
drivers/pci/msi.h

@@ -20,14 +20,8 @@
 #define msi_mask_bits_reg(base, is64bit) \
 #define msi_mask_bits_reg(base, is64bit) \
 	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
 	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
 #define msi_disable(control)		control &= ~PCI_MSI_FLAGS_ENABLE
 #define msi_disable(control)		control &= ~PCI_MSI_FLAGS_ENABLE
-#define multi_msi_capable(control) \
-	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
-#define multi_msi_enable(control, num) \
-	control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
 #define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
 #define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
 #define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
 #define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
-#define msi_enable(control, num) multi_msi_enable(control, num); \
-	control |= PCI_MSI_FLAGS_ENABLE
 
 
 #define msix_table_offset_reg(base)	(base + 0x04)
 #define msix_table_offset_reg(base)	(base + 0x04)
 #define msix_pba_offset_reg(base)	(base + 0x08)
 #define msix_pba_offset_reg(base)	(base + 0x08)

+ 0 - 215
drivers/pci/pci-acpi.c

@@ -18,221 +18,6 @@
 #include <linux/pci-acpi.h>
 #include <linux/pci-acpi.h>
 #include "pci.h"
 #include "pci.h"
 
 
-struct acpi_osc_data {
-	acpi_handle handle;
-	u32 support_set;
-	u32 control_set;
-	u32 control_query;
-	int is_queried;
-	struct list_head sibiling;
-};
-static LIST_HEAD(acpi_osc_data_list);
-
-struct acpi_osc_args {
-	u32 capbuf[3];
-};
-
-static DEFINE_MUTEX(pci_acpi_lock);
-
-static struct acpi_osc_data *acpi_get_osc_data(acpi_handle handle)
-{
-	struct acpi_osc_data *data;
-
-	list_for_each_entry(data, &acpi_osc_data_list, sibiling) {
-		if (data->handle == handle)
-			return data;
-	}
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return NULL;
-	INIT_LIST_HEAD(&data->sibiling);
-	data->handle = handle;
-	list_add_tail(&data->sibiling, &acpi_osc_data_list);
-	return data;
-}
-
-static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
-			  0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
-
-static acpi_status acpi_run_osc(acpi_handle handle,
-				struct acpi_osc_args *osc_args, u32 *retval)
-{
-	acpi_status status;
-	struct acpi_object_list input;
-	union acpi_object in_params[4];
-	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
-	union acpi_object *out_obj;
-	u32 errors, flags = osc_args->capbuf[OSC_QUERY_TYPE];
-
-	/* Setting up input parameters */
-	input.count = 4;
-	input.pointer = in_params;
-	in_params[0].type 		= ACPI_TYPE_BUFFER;
-	in_params[0].buffer.length 	= 16;
-	in_params[0].buffer.pointer	= OSC_UUID;
-	in_params[1].type 		= ACPI_TYPE_INTEGER;
-	in_params[1].integer.value 	= 1;
-	in_params[2].type 		= ACPI_TYPE_INTEGER;
-	in_params[2].integer.value	= 3;
-	in_params[3].type		= ACPI_TYPE_BUFFER;
-	in_params[3].buffer.length 	= 12;
-	in_params[3].buffer.pointer 	= (u8 *)osc_args->capbuf;
-
-	status = acpi_evaluate_object(handle, "_OSC", &input, &output);
-	if (ACPI_FAILURE(status))
-		return status;
-
-	if (!output.length)
-		return AE_NULL_OBJECT;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		printk(KERN_DEBUG "Evaluate _OSC returns wrong type\n");
-		status = AE_TYPE;
-		goto out_kfree;
-	}
-	/* Need to ignore the bit0 in result code */
-	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
-	if (errors) {
-		if (errors & OSC_REQUEST_ERROR)
-			printk(KERN_DEBUG "_OSC request fails\n"); 
-		if (errors & OSC_INVALID_UUID_ERROR)
-			printk(KERN_DEBUG "_OSC invalid UUID\n"); 
-		if (errors & OSC_INVALID_REVISION_ERROR)
-			printk(KERN_DEBUG "_OSC invalid revision\n"); 
-		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
-			if (flags & OSC_QUERY_ENABLE)
-				goto out_success;
-			printk(KERN_DEBUG "_OSC FW not grant req. control\n");
-			status = AE_SUPPORT;
-			goto out_kfree;
-		}
-		status = AE_ERROR;
-		goto out_kfree;
-	}
-out_success:
-	*retval = *((u32 *)(out_obj->buffer.pointer + 8));
-	status = AE_OK;
-
-out_kfree:
-	kfree(output.pointer);
-	return status;
-}
-
-static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data)
-{
-	acpi_status status;
-	u32 support_set, result;
-	struct acpi_osc_args osc_args;
-
-	/* do _OSC query for all possible controls */
-	support_set = osc_data->support_set | (flags & OSC_SUPPORT_MASKS);
-	osc_args.capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
-	osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set;
-	osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
-
-	status = acpi_run_osc(osc_data->handle, &osc_args, &result);
-	if (ACPI_SUCCESS(status)) {
-		osc_data->support_set = support_set;
-		osc_data->control_query = result;
-		osc_data->is_queried = 1;
-	}
-
-	return status;
-}
-
-/*
- * pci_acpi_osc_support: Invoke _OSC indicating support for the given feature
- * @flags: Bitmask of flags to support
- *
- * See the ACPI spec for the definition of the flags
- */
-int pci_acpi_osc_support(acpi_handle handle, u32 flags)
-{
-	acpi_status status;
-	acpi_handle tmp;
-	struct acpi_osc_data *osc_data;
-	int rc = 0;
-
-	status = acpi_get_handle(handle, "_OSC", &tmp);
-	if (ACPI_FAILURE(status))
-		return -ENOTTY;
-
-	mutex_lock(&pci_acpi_lock);
-	osc_data = acpi_get_osc_data(handle);
-	if (!osc_data) {
-		printk(KERN_ERR "acpi osc data array is full\n");
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	__acpi_query_osc(flags, osc_data);
-out:
-	mutex_unlock(&pci_acpi_lock);
-	return rc;
-}
-
-/**
- * pci_osc_control_set - commit requested control to Firmware
- * @handle: acpi_handle for the target ACPI object
- * @flags: driver's requested control bits
- *
- * Attempt to take control from Firmware on requested control bits.
- **/
-acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
-{
-	acpi_status status;
-	u32 control_req, control_set, result;
-	acpi_handle tmp;
-	struct acpi_osc_data *osc_data;
-	struct acpi_osc_args osc_args;
-
-	status = acpi_get_handle(handle, "_OSC", &tmp);
-	if (ACPI_FAILURE(status))
-		return status;
-
-	mutex_lock(&pci_acpi_lock);
-	osc_data = acpi_get_osc_data(handle);
-	if (!osc_data) {
-		printk(KERN_ERR "acpi osc data array is full\n");
-		status = AE_ERROR;
-		goto out;
-	}
-
-	control_req = (flags & OSC_CONTROL_MASKS);
-	if (!control_req) {
-		status = AE_TYPE;
-		goto out;
-	}
-
-	/* No need to evaluate _OSC if the control was already granted. */
-	if ((osc_data->control_set & control_req) == control_req)
-		goto out;
-
-	if (!osc_data->is_queried) {
-		status = __acpi_query_osc(osc_data->support_set, osc_data);
-		if (ACPI_FAILURE(status))
-			goto out;
-	}
-
-	if ((osc_data->control_query & control_req) != control_req) {
-		status = AE_SUPPORT;
-		goto out;
-	}
-
-	control_set = osc_data->control_set | control_req;
-	osc_args.capbuf[OSC_QUERY_TYPE] = 0;
-	osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set;
-	osc_args.capbuf[OSC_CONTROL_TYPE] = control_set;
-	status = acpi_run_osc(handle, &osc_args, &result);
-	if (ACPI_SUCCESS(status))
-		osc_data->control_set = result;
-out:
-	mutex_unlock(&pci_acpi_lock);
-	return status;
-}
-EXPORT_SYMBOL(pci_osc_control_set);
-
 /*
 /*
  * _SxD returns the D-state with the highest power
  * _SxD returns the D-state with the highest power
  * (lowest D-state number) supported in the S-state "x".
  * (lowest D-state number) supported in the S-state "x".

+ 79 - 2
drivers/pci/pci-driver.c

@@ -99,6 +99,52 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 }
 }
 static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
 static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
 
 
+/**
+ * store_remove_id - remove a PCI device ID from this driver
+ * @driver: target device driver
+ * @buf: buffer for scanning device ID data
+ * @count: input size
+ *
+ * Removes a dynamic pci device ID to this driver.
+ */
+static ssize_t
+store_remove_id(struct device_driver *driver, const char *buf, size_t count)
+{
+	struct pci_dynid *dynid, *n;
+	struct pci_driver *pdrv = to_pci_driver(driver);
+	__u32 vendor, device, subvendor = PCI_ANY_ID,
+		subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
+	int fields = 0;
+	int retval = -ENODEV;
+
+	fields = sscanf(buf, "%x %x %x %x %x %x",
+			&vendor, &device, &subvendor, &subdevice,
+			&class, &class_mask);
+	if (fields < 2)
+		return -EINVAL;
+
+	spin_lock(&pdrv->dynids.lock);
+	list_for_each_entry_safe(dynid, n, &pdrv->dynids.list, node) {
+		struct pci_device_id *id = &dynid->id;
+		if ((id->vendor == vendor) &&
+		    (id->device == device) &&
+		    (subvendor == PCI_ANY_ID || id->subvendor == subvendor) &&
+		    (subdevice == PCI_ANY_ID || id->subdevice == subdevice) &&
+		    !((id->class ^ class) & class_mask)) {
+			list_del(&dynid->node);
+			kfree(dynid);
+			retval = 0;
+			break;
+		}
+	}
+	spin_unlock(&pdrv->dynids.lock);
+
+	if (retval)
+		return retval;
+	return count;
+}
+static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id);
+
 static void
 static void
 pci_free_dynids(struct pci_driver *drv)
 pci_free_dynids(struct pci_driver *drv)
 {
 {
@@ -125,6 +171,20 @@ static void pci_remove_newid_file(struct pci_driver *drv)
 {
 {
 	driver_remove_file(&drv->driver, &driver_attr_new_id);
 	driver_remove_file(&drv->driver, &driver_attr_new_id);
 }
 }
+
+static int
+pci_create_removeid_file(struct pci_driver *drv)
+{
+	int error = 0;
+	if (drv->probe != NULL)
+		error = driver_create_file(&drv->driver,&driver_attr_remove_id);
+	return error;
+}
+
+static void pci_remove_removeid_file(struct pci_driver *drv)
+{
+	driver_remove_file(&drv->driver, &driver_attr_remove_id);
+}
 #else /* !CONFIG_HOTPLUG */
 #else /* !CONFIG_HOTPLUG */
 static inline void pci_free_dynids(struct pci_driver *drv) {}
 static inline void pci_free_dynids(struct pci_driver *drv) {}
 static inline int pci_create_newid_file(struct pci_driver *drv)
 static inline int pci_create_newid_file(struct pci_driver *drv)
@@ -132,6 +192,11 @@ static inline int pci_create_newid_file(struct pci_driver *drv)
 	return 0;
 	return 0;
 }
 }
 static inline void pci_remove_newid_file(struct pci_driver *drv) {}
 static inline void pci_remove_newid_file(struct pci_driver *drv) {}
+static inline int pci_create_removeid_file(struct pci_driver *drv)
+{
+	return 0;
+}
+static inline void pci_remove_removeid_file(struct pci_driver *drv) {}
 #endif
 #endif
 
 
 /**
 /**
@@ -899,13 +964,23 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
 	/* register with core */
 	/* register with core */
 	error = driver_register(&drv->driver);
 	error = driver_register(&drv->driver);
 	if (error)
 	if (error)
-		return error;
+		goto out;
 
 
 	error = pci_create_newid_file(drv);
 	error = pci_create_newid_file(drv);
 	if (error)
 	if (error)
-		driver_unregister(&drv->driver);
+		goto out_newid;
 
 
+	error = pci_create_removeid_file(drv);
+	if (error)
+		goto out_removeid;
+out:
 	return error;
 	return error;
+
+out_removeid:
+	pci_remove_newid_file(drv);
+out_newid:
+	driver_unregister(&drv->driver);
+	goto out;
 }
 }
 
 
 /**
 /**
@@ -921,6 +996,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
 void
 void
 pci_unregister_driver(struct pci_driver *drv)
 pci_unregister_driver(struct pci_driver *drv)
 {
 {
+	pci_remove_removeid_file(drv);
 	pci_remove_newid_file(drv);
 	pci_remove_newid_file(drv);
 	driver_unregister(&drv->driver);
 	driver_unregister(&drv->driver);
 	pci_free_dynids(drv);
 	pci_free_dynids(drv);
@@ -1020,6 +1096,7 @@ struct bus_type pci_bus_type = {
 	.remove		= pci_device_remove,
 	.remove		= pci_device_remove,
 	.shutdown	= pci_device_shutdown,
 	.shutdown	= pci_device_shutdown,
 	.dev_attrs	= pci_dev_attrs,
 	.dev_attrs	= pci_dev_attrs,
+	.bus_attrs	= pci_bus_attrs,
 	.pm		= PCI_PM_OPS_PTR,
 	.pm		= PCI_PM_OPS_PTR,
 };
 };
 
 

+ 120 - 4
drivers/pci/pci-sysfs.c

@@ -219,6 +219,83 @@ msi_bus_store(struct device *dev, struct device_attribute *attr,
 	return count;
 	return count;
 }
 }
 
 
+#ifdef CONFIG_HOTPLUG
+static DEFINE_MUTEX(pci_remove_rescan_mutex);
+static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf,
+				size_t count)
+{
+	unsigned long val;
+	struct pci_bus *b = NULL;
+
+	if (strict_strtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val) {
+		mutex_lock(&pci_remove_rescan_mutex);
+		while ((b = pci_find_next_bus(b)) != NULL)
+			pci_rescan_bus(b);
+		mutex_unlock(&pci_remove_rescan_mutex);
+	}
+	return count;
+}
+
+struct bus_attribute pci_bus_attrs[] = {
+	__ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, bus_rescan_store),
+	__ATTR_NULL
+};
+
+static ssize_t
+dev_rescan_store(struct device *dev, struct device_attribute *attr,
+		 const char *buf, size_t count)
+{
+	unsigned long val;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (strict_strtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val) {
+		mutex_lock(&pci_remove_rescan_mutex);
+		pci_rescan_bus(pdev->bus);
+		mutex_unlock(&pci_remove_rescan_mutex);
+	}
+	return count;
+}
+
+static void remove_callback(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	mutex_lock(&pci_remove_rescan_mutex);
+	pci_remove_bus_device(pdev);
+	mutex_unlock(&pci_remove_rescan_mutex);
+}
+
+static ssize_t
+remove_store(struct device *dev, struct device_attribute *dummy,
+	     const char *buf, size_t count)
+{
+	int ret = 0;
+	unsigned long val;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (strict_strtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (pci_is_root_bus(pdev->bus))
+		return -EBUSY;
+
+	/* An attribute cannot be unregistered by one of its own methods,
+	 * so we have to use this roundabout approach.
+	 */
+	if (val)
+		ret = device_schedule_callback(dev, remove_callback);
+	if (ret)
+		count = ret;
+	return count;
+}
+#endif
+
 struct device_attribute pci_dev_attrs[] = {
 struct device_attribute pci_dev_attrs[] = {
 	__ATTR_RO(resource),
 	__ATTR_RO(resource),
 	__ATTR_RO(vendor),
 	__ATTR_RO(vendor),
@@ -237,9 +314,24 @@ struct device_attribute pci_dev_attrs[] = {
 	__ATTR(broken_parity_status,(S_IRUGO|S_IWUSR),
 	__ATTR(broken_parity_status,(S_IRUGO|S_IWUSR),
 		broken_parity_status_show,broken_parity_status_store),
 		broken_parity_status_show,broken_parity_status_store),
 	__ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store),
 	__ATTR(msi_bus, 0644, msi_bus_show, msi_bus_store),
+#ifdef CONFIG_HOTPLUG
+	__ATTR(remove, (S_IWUSR|S_IWGRP), NULL, remove_store),
+	__ATTR(rescan, (S_IWUSR|S_IWGRP), NULL, dev_rescan_store),
+#endif
 	__ATTR_NULL,
 	__ATTR_NULL,
 };
 };
 
 
+static ssize_t
+boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sprintf(buf, "%u\n",
+		!!(pdev->resource[PCI_ROM_RESOURCE].flags &
+		   IORESOURCE_ROM_SHADOW));
+}
+struct device_attribute vga_attr = __ATTR_RO(boot_vga);
+
 static ssize_t
 static ssize_t
 pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
 pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
 		char *buf, loff_t off, size_t count)
 		char *buf, loff_t off, size_t count)
@@ -492,6 +584,19 @@ pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
         return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
         return pci_mmap_legacy_page_range(bus, vma, pci_mmap_io);
 }
 }
 
 
+/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @b: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Stub implementation. Can be overridden by arch if necessary.
+ */
+void __weak
+pci_adjust_legacy_attr(struct pci_bus *b, enum pci_mmap_state mmap_type)
+{
+	return;
+}
+
 /**
 /**
  * pci_create_legacy_files - create legacy I/O port and memory files
  * pci_create_legacy_files - create legacy I/O port and memory files
  * @b: bus to create files under
  * @b: bus to create files under
@@ -518,6 +623,7 @@ void pci_create_legacy_files(struct pci_bus *b)
 	b->legacy_io->read = pci_read_legacy_io;
 	b->legacy_io->read = pci_read_legacy_io;
 	b->legacy_io->write = pci_write_legacy_io;
 	b->legacy_io->write = pci_write_legacy_io;
 	b->legacy_io->mmap = pci_mmap_legacy_io;
 	b->legacy_io->mmap = pci_mmap_legacy_io;
+	pci_adjust_legacy_attr(b, pci_mmap_io);
 	error = device_create_bin_file(&b->dev, b->legacy_io);
 	error = device_create_bin_file(&b->dev, b->legacy_io);
 	if (error)
 	if (error)
 		goto legacy_io_err;
 		goto legacy_io_err;
@@ -528,6 +634,7 @@ void pci_create_legacy_files(struct pci_bus *b)
 	b->legacy_mem->size = 1024*1024;
 	b->legacy_mem->size = 1024*1024;
 	b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
 	b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
 	b->legacy_mem->mmap = pci_mmap_legacy_mem;
 	b->legacy_mem->mmap = pci_mmap_legacy_mem;
+	pci_adjust_legacy_attr(b, pci_mmap_mem);
 	error = device_create_bin_file(&b->dev, b->legacy_mem);
 	error = device_create_bin_file(&b->dev, b->legacy_mem);
 	if (error)
 	if (error)
 		goto legacy_mem_err;
 		goto legacy_mem_err;
@@ -719,8 +826,8 @@ static int pci_create_resource_files(struct pci_dev *pdev)
 	return 0;
 	return 0;
 }
 }
 #else /* !HAVE_PCI_MMAP */
 #else /* !HAVE_PCI_MMAP */
-static inline int pci_create_resource_files(struct pci_dev *dev) { return 0; }
-static inline void pci_remove_resource_files(struct pci_dev *dev) { return; }
+int __weak pci_create_resource_files(struct pci_dev *dev) { return 0; }
+void __weak pci_remove_resource_files(struct pci_dev *dev) { return; }
 #endif /* HAVE_PCI_MMAP */
 #endif /* HAVE_PCI_MMAP */
 
 
 /**
 /**
@@ -884,18 +991,27 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 		pdev->rom_attr = attr;
 		pdev->rom_attr = attr;
 	}
 	}
 
 
+	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) {
+		retval = device_create_file(&pdev->dev, &vga_attr);
+		if (retval)
+			goto err_rom_file;
+	}
+
 	/* add platform-specific attributes */
 	/* add platform-specific attributes */
 	retval = pcibios_add_platform_entries(pdev);
 	retval = pcibios_add_platform_entries(pdev);
 	if (retval)
 	if (retval)
-		goto err_rom_file;
+		goto err_vga_file;
 
 
 	/* add sysfs entries for various capabilities */
 	/* add sysfs entries for various capabilities */
 	retval = pci_create_capabilities_sysfs(pdev);
 	retval = pci_create_capabilities_sysfs(pdev);
 	if (retval)
 	if (retval)
-		goto err_rom_file;
+		goto err_vga_file;
 
 
 	return 0;
 	return 0;
 
 
+err_vga_file:
+	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+		device_remove_file(&pdev->dev, &vga_attr);
 err_rom_file:
 err_rom_file:
 	if (rom_size) {
 	if (rom_size) {
 		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
 		sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);

+ 171 - 22
drivers/pci/pci.c

@@ -20,6 +20,8 @@
 #include <linux/pm_wakeup.h>
 #include <linux/pm_wakeup.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <asm/dma.h>	/* isa_dma_bridge_buggy */
 #include <asm/dma.h>	/* isa_dma_bridge_buggy */
+#include <linux/device.h>
+#include <asm/setup.h>
 #include "pci.h"
 #include "pci.h"
 
 
 unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
 unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
@@ -677,6 +679,8 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state)
 
 
 EXPORT_SYMBOL(pci_choose_state);
 EXPORT_SYMBOL(pci_choose_state);
 
 
+#define PCI_EXP_SAVE_REGS	7
+
 static int pci_save_pcie_state(struct pci_dev *dev)
 static int pci_save_pcie_state(struct pci_dev *dev)
 {
 {
 	int pos, i = 0;
 	int pos, i = 0;
@@ -689,7 +693,7 @@ static int pci_save_pcie_state(struct pci_dev *dev)
 
 
 	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
 	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
 	if (!save_state) {
 	if (!save_state) {
-		dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__);
+		dev_err(&dev->dev, "buffer not found in %s\n", __func__);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 	cap = (u16 *)&save_state->data[0];
 	cap = (u16 *)&save_state->data[0];
@@ -698,6 +702,9 @@ static int pci_save_pcie_state(struct pci_dev *dev)
 	pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]);
 	pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]);
 	pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]);
 	pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]);
 	pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]);
 	pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]);
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]);
+	pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]);
+	pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -718,6 +725,9 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
 	pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]);
 	pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]);
 	pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]);
 	pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]);
 	pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]);
 	pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]);
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]);
+	pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]);
+	pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]);
 }
 }
 
 
 
 
@@ -732,7 +742,7 @@ static int pci_save_pcix_state(struct pci_dev *dev)
 
 
 	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
 	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX);
 	if (!save_state) {
 	if (!save_state) {
-		dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__);
+		dev_err(&dev->dev, "buffer not found in %s\n", __func__);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
@@ -805,6 +815,7 @@ pci_restore_state(struct pci_dev *dev)
 	}
 	}
 	pci_restore_pcix_state(dev);
 	pci_restore_pcix_state(dev);
 	pci_restore_msi_state(dev);
 	pci_restore_msi_state(dev);
+	pci_restore_iov_state(dev);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1401,7 +1412,8 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev)
 {
 {
 	int error;
 	int error;
 
 
-	error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, 4 * sizeof(u16));
+	error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP,
+					PCI_EXP_SAVE_REGS * sizeof(u16));
 	if (error)
 	if (error)
 		dev_err(&dev->dev,
 		dev_err(&dev->dev,
 			"unable to preallocate PCI Express save buffer\n");
 			"unable to preallocate PCI Express save buffer\n");
@@ -1472,7 +1484,7 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
 	if (!pin)
 	if (!pin)
 		return -1;
 		return -1;
 
 
-	while (dev->bus->self) {
+	while (dev->bus->parent) {
 		pin = pci_swizzle_interrupt_pin(dev, pin);
 		pin = pci_swizzle_interrupt_pin(dev, pin);
 		dev = dev->bus->self;
 		dev = dev->bus->self;
 	}
 	}
@@ -1492,7 +1504,7 @@ u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp)
 {
 {
 	u8 pin = *pinp;
 	u8 pin = *pinp;
 
 
-	while (dev->bus->self) {
+	while (dev->bus->parent) {
 		pin = pci_swizzle_interrupt_pin(dev, pin);
 		pin = pci_swizzle_interrupt_pin(dev, pin);
 		dev = dev->bus->self;
 		dev = dev->bus->self;
 	}
 	}
@@ -2016,18 +2028,24 @@ static int __pcie_flr(struct pci_dev *dev, int probe)
 	pci_block_user_cfg_access(dev);
 	pci_block_user_cfg_access(dev);
 
 
 	/* Wait for Transaction Pending bit clean */
 	/* Wait for Transaction Pending bit clean */
+	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+	if (!(status & PCI_EXP_DEVSTA_TRPND))
+		goto transaction_done;
+
 	msleep(100);
 	msleep(100);
 	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
 	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
-	if (status & PCI_EXP_DEVSTA_TRPND) {
-		dev_info(&dev->dev, "Busy after 100ms while trying to reset; "
+	if (!(status & PCI_EXP_DEVSTA_TRPND))
+		goto transaction_done;
+
+	dev_info(&dev->dev, "Busy after 100ms while trying to reset; "
 			"sleeping for 1 second\n");
 			"sleeping for 1 second\n");
-		ssleep(1);
-		pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
-		if (status & PCI_EXP_DEVSTA_TRPND)
-			dev_info(&dev->dev, "Still busy after 1s; "
+	ssleep(1);
+	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+	if (status & PCI_EXP_DEVSTA_TRPND)
+		dev_info(&dev->dev, "Still busy after 1s; "
 				"proceeding with reset anyway\n");
 				"proceeding with reset anyway\n");
-	}
 
 
+transaction_done:
 	pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL,
 	pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL,
 				PCI_EXP_DEVCTL_BCR_FLR);
 				PCI_EXP_DEVCTL_BCR_FLR);
 	mdelay(100);
 	mdelay(100);
@@ -2054,18 +2072,24 @@ static int __pci_af_flr(struct pci_dev *dev, int probe)
 	pci_block_user_cfg_access(dev);
 	pci_block_user_cfg_access(dev);
 
 
 	/* Wait for Transaction Pending bit clean */
 	/* Wait for Transaction Pending bit clean */
+	pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
+	if (!(status & PCI_AF_STATUS_TP))
+		goto transaction_done;
+
 	msleep(100);
 	msleep(100);
 	pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
 	pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
-	if (status & PCI_AF_STATUS_TP) {
-		dev_info(&dev->dev, "Busy after 100ms while trying to"
-				" reset; sleeping for 1 second\n");
-		ssleep(1);
-		pci_read_config_byte(dev,
-				cappos + PCI_AF_STATUS, &status);
-		if (status & PCI_AF_STATUS_TP)
-			dev_info(&dev->dev, "Still busy after 1s; "
-					"proceeding with reset anyway\n");
-	}
+	if (!(status & PCI_AF_STATUS_TP))
+		goto transaction_done;
+
+	dev_info(&dev->dev, "Busy after 100ms while trying to"
+			" reset; sleeping for 1 second\n");
+	ssleep(1);
+	pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
+	if (status & PCI_AF_STATUS_TP)
+		dev_info(&dev->dev, "Still busy after 1s; "
+				"proceeding with reset anyway\n");
+
+transaction_done:
 	pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
 	pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
 	mdelay(100);
 	mdelay(100);
 
 
@@ -2334,18 +2358,140 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags)
  */
  */
 int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
 int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
 {
 {
+	int reg;
+
 	if (resno < PCI_ROM_RESOURCE) {
 	if (resno < PCI_ROM_RESOURCE) {
 		*type = pci_bar_unknown;
 		*type = pci_bar_unknown;
 		return PCI_BASE_ADDRESS_0 + 4 * resno;
 		return PCI_BASE_ADDRESS_0 + 4 * resno;
 	} else if (resno == PCI_ROM_RESOURCE) {
 	} else if (resno == PCI_ROM_RESOURCE) {
 		*type = pci_bar_mem32;
 		*type = pci_bar_mem32;
 		return dev->rom_base_reg;
 		return dev->rom_base_reg;
+	} else if (resno < PCI_BRIDGE_RESOURCES) {
+		/* device specific resource */
+		reg = pci_iov_resource_bar(dev, resno, type);
+		if (reg)
+			return reg;
 	}
 	}
 
 
 	dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno);
 	dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno);
 	return 0;
 	return 0;
 }
 }
 
 
+#define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
+static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
+spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED;
+
+/**
+ * pci_specified_resource_alignment - get resource alignment specified by user.
+ * @dev: the PCI device to get
+ *
+ * RETURNS: Resource alignment if it is specified.
+ *          Zero if it is not specified.
+ */
+resource_size_t pci_specified_resource_alignment(struct pci_dev *dev)
+{
+	int seg, bus, slot, func, align_order, count;
+	resource_size_t align = 0;
+	char *p;
+
+	spin_lock(&resource_alignment_lock);
+	p = resource_alignment_param;
+	while (*p) {
+		count = 0;
+		if (sscanf(p, "%d%n", &align_order, &count) == 1 &&
+							p[count] == '@') {
+			p += count + 1;
+		} else {
+			align_order = -1;
+		}
+		if (sscanf(p, "%x:%x:%x.%x%n",
+			&seg, &bus, &slot, &func, &count) != 4) {
+			seg = 0;
+			if (sscanf(p, "%x:%x.%x%n",
+					&bus, &slot, &func, &count) != 3) {
+				/* Invalid format */
+				printk(KERN_ERR "PCI: Can't parse resource_alignment parameter: %s\n",
+					p);
+				break;
+			}
+		}
+		p += count;
+		if (seg == pci_domain_nr(dev->bus) &&
+			bus == dev->bus->number &&
+			slot == PCI_SLOT(dev->devfn) &&
+			func == PCI_FUNC(dev->devfn)) {
+			if (align_order == -1) {
+				align = PAGE_SIZE;
+			} else {
+				align = 1 << align_order;
+			}
+			/* Found */
+			break;
+		}
+		if (*p != ';' && *p != ',') {
+			/* End of param or invalid format */
+			break;
+		}
+		p++;
+	}
+	spin_unlock(&resource_alignment_lock);
+	return align;
+}
+
+/**
+ * pci_is_reassigndev - check if specified PCI is target device to reassign
+ * @dev: the PCI device to check
+ *
+ * RETURNS: non-zero for PCI device is a target device to reassign,
+ *          or zero is not.
+ */
+int pci_is_reassigndev(struct pci_dev *dev)
+{
+	return (pci_specified_resource_alignment(dev) != 0);
+}
+
+ssize_t pci_set_resource_alignment_param(const char *buf, size_t count)
+{
+	if (count > RESOURCE_ALIGNMENT_PARAM_SIZE - 1)
+		count = RESOURCE_ALIGNMENT_PARAM_SIZE - 1;
+	spin_lock(&resource_alignment_lock);
+	strncpy(resource_alignment_param, buf, count);
+	resource_alignment_param[count] = '\0';
+	spin_unlock(&resource_alignment_lock);
+	return count;
+}
+
+ssize_t pci_get_resource_alignment_param(char *buf, size_t size)
+{
+	size_t count;
+	spin_lock(&resource_alignment_lock);
+	count = snprintf(buf, size, "%s", resource_alignment_param);
+	spin_unlock(&resource_alignment_lock);
+	return count;
+}
+
+static ssize_t pci_resource_alignment_show(struct bus_type *bus, char *buf)
+{
+	return pci_get_resource_alignment_param(buf, PAGE_SIZE);
+}
+
+static ssize_t pci_resource_alignment_store(struct bus_type *bus,
+					const char *buf, size_t count)
+{
+	return pci_set_resource_alignment_param(buf, count);
+}
+
+BUS_ATTR(resource_alignment, 0644, pci_resource_alignment_show,
+					pci_resource_alignment_store);
+
+static int __init pci_resource_alignment_sysfs_init(void)
+{
+	return bus_create_file(&pci_bus_type,
+					&bus_attr_resource_alignment);
+}
+
+late_initcall(pci_resource_alignment_sysfs_init);
+
 static void __devinit pci_no_domains(void)
 static void __devinit pci_no_domains(void)
 {
 {
 #ifdef CONFIG_PCI_DOMAINS
 #ifdef CONFIG_PCI_DOMAINS
@@ -2394,6 +2540,9 @@ static int __init pci_setup(char *str)
 				pci_cardbus_io_size = memparse(str + 9, &str);
 				pci_cardbus_io_size = memparse(str + 9, &str);
 			} else if (!strncmp(str, "cbmemsize=", 10)) {
 			} else if (!strncmp(str, "cbmemsize=", 10)) {
 				pci_cardbus_mem_size = memparse(str + 10, &str);
 				pci_cardbus_mem_size = memparse(str + 10, &str);
+			} else if (!strncmp(str, "resource_alignment=", 19)) {
+				pci_set_resource_alignment_param(str + 19,
+							strlen(str + 19));
 			} else {
 			} else {
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 						str);
 						str);

+ 65 - 0
drivers/pci/pci.h

@@ -1,6 +1,8 @@
 #ifndef DRIVERS_PCI_H
 #ifndef DRIVERS_PCI_H
 #define DRIVERS_PCI_H
 #define DRIVERS_PCI_H
 
 
+#include <linux/workqueue.h>
+
 #define PCI_CFG_SPACE_SIZE	256
 #define PCI_CFG_SPACE_SIZE	256
 #define PCI_CFG_SPACE_EXP_SIZE	4096
 #define PCI_CFG_SPACE_EXP_SIZE	4096
 
 
@@ -135,6 +137,12 @@ extern int pcie_mch_quirk;
 extern struct device_attribute pci_dev_attrs[];
 extern struct device_attribute pci_dev_attrs[];
 extern struct device_attribute dev_attr_cpuaffinity;
 extern struct device_attribute dev_attr_cpuaffinity;
 extern struct device_attribute dev_attr_cpulistaffinity;
 extern struct device_attribute dev_attr_cpulistaffinity;
+#ifdef CONFIG_HOTPLUG
+extern struct bus_attribute pci_bus_attrs[];
+#else
+#define pci_bus_attrs	NULL
+#endif
+
 
 
 /**
 /**
  * pci_match_one_device - Tell if a PCI device structure has a matching
  * pci_match_one_device - Tell if a PCI device structure has a matching
@@ -177,6 +185,7 @@ enum pci_bar_type {
 	pci_bar_mem64,		/* A 64-bit memory BAR */
 	pci_bar_mem64,		/* A 64-bit memory BAR */
 };
 };
 
 
+extern int pci_setup_device(struct pci_dev *dev);
 extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 				struct resource *res, unsigned int reg);
 				struct resource *res, unsigned int reg);
 extern int pci_resource_bar(struct pci_dev *dev, int resno,
 extern int pci_resource_bar(struct pci_dev *dev, int resno,
@@ -194,4 +203,60 @@ static inline int pci_ari_enabled(struct pci_bus *bus)
 	return bus->self && bus->self->ari_enabled;
 	return bus->self && bus->self->ari_enabled;
 }
 }
 
 
+#ifdef CONFIG_PCI_QUIRKS
+extern int pci_is_reassigndev(struct pci_dev *dev);
+resource_size_t pci_specified_resource_alignment(struct pci_dev *dev);
+extern void pci_disable_bridge_window(struct pci_dev *dev);
+#endif
+
+/* Single Root I/O Virtualization */
+struct pci_sriov {
+	int pos;		/* capability position */
+	int nres;		/* number of resources */
+	u32 cap;		/* SR-IOV Capabilities */
+	u16 ctrl;		/* SR-IOV Control */
+	u16 total;		/* total VFs associated with the PF */
+	u16 initial;		/* initial VFs associated with the PF */
+	u16 nr_virtfn;		/* number of VFs available */
+	u16 offset;		/* first VF Routing ID offset */
+	u16 stride;		/* following VF stride */
+	u32 pgsz;		/* page size for BAR alignment */
+	u8 link;		/* Function Dependency Link */
+	struct pci_dev *dev;	/* lowest numbered PF */
+	struct pci_dev *self;	/* this PF */
+	struct mutex lock;	/* lock for VF bus */
+	struct work_struct mtask; /* VF Migration task */
+	u8 __iomem *mstate;	/* VF Migration State Array */
+};
+
+#ifdef CONFIG_PCI_IOV
+extern int pci_iov_init(struct pci_dev *dev);
+extern void pci_iov_release(struct pci_dev *dev);
+extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+				enum pci_bar_type *type);
+extern void pci_restore_iov_state(struct pci_dev *dev);
+extern int pci_iov_bus_range(struct pci_bus *bus);
+#else
+static inline int pci_iov_init(struct pci_dev *dev)
+{
+	return -ENODEV;
+}
+static inline void pci_iov_release(struct pci_dev *dev)
+
+{
+}
+static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno,
+				       enum pci_bar_type *type)
+{
+	return 0;
+}
+static inline void pci_restore_iov_state(struct pci_dev *dev)
+{
+}
+static inline int pci_iov_bus_range(struct pci_bus *bus)
+{
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
 #endif /* DRIVERS_PCI_H */
 #endif /* DRIVERS_PCI_H */

+ 4 - 24
drivers/pci/pcie/aer/aerdrv.c

@@ -38,30 +38,13 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
 
 
-static int __devinit aer_probe (struct pcie_device *dev,
-	const struct pcie_port_service_id *id );
+static int __devinit aer_probe (struct pcie_device *dev);
 static void aer_remove(struct pcie_device *dev);
 static void aer_remove(struct pcie_device *dev);
-static int aer_suspend(struct pcie_device *dev, pm_message_t state)
-{return 0;}
-static int aer_resume(struct pcie_device *dev) {return 0;}
 static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
 static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
 	enum pci_channel_state error);
 	enum pci_channel_state error);
 static void aer_error_resume(struct pci_dev *dev);
 static void aer_error_resume(struct pci_dev *dev);
 static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
 static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
 
 
-/*
- * PCI Express bus's AER Root service driver data structure
- */
-static struct pcie_port_service_id aer_id[] = {
-	{
-	.vendor 	= PCI_ANY_ID,
-	.device 	= PCI_ANY_ID,
-	.port_type 	= PCIE_RC_PORT,
-	.service_type 	= PCIE_PORT_SERVICE_AER,
-	},
-	{ /* end: all zeroes */ }
-};
-
 static struct pci_error_handlers aer_error_handlers = {
 static struct pci_error_handlers aer_error_handlers = {
 	.error_detected = aer_error_detected,
 	.error_detected = aer_error_detected,
 	.resume = aer_error_resume,
 	.resume = aer_error_resume,
@@ -69,14 +52,12 @@ static struct pci_error_handlers aer_error_handlers = {
 
 
 static struct pcie_port_service_driver aerdriver = {
 static struct pcie_port_service_driver aerdriver = {
 	.name		= "aer",
 	.name		= "aer",
-	.id_table	= &aer_id[0],
+	.port_type	= PCIE_ANY_PORT,
+	.service	= PCIE_PORT_SERVICE_AER,
 
 
 	.probe		= aer_probe,
 	.probe		= aer_probe,
 	.remove		= aer_remove,
 	.remove		= aer_remove,
 
 
-	.suspend	= aer_suspend,
-	.resume		= aer_resume,
-
 	.err_handler	= &aer_error_handlers,
 	.err_handler	= &aer_error_handlers,
 
 
 	.reset_link	= aer_root_reset,
 	.reset_link	= aer_root_reset,
@@ -207,8 +188,7 @@ static void aer_remove(struct pcie_device *dev)
  *
  *
  * Invoked when PCI Express bus loads AER service driver.
  * Invoked when PCI Express bus loads AER service driver.
  **/
  **/
-static int __devinit aer_probe (struct pcie_device *dev,
-				const struct pcie_port_service_id *id )
+static int __devinit aer_probe (struct pcie_device *dev)
 {
 {
 	int status;
 	int status;
 	struct aer_rpc *rpc;
 	struct aer_rpc *rpc;

+ 1 - 1
drivers/pci/pcie/aer/aerdrv_acpi.c

@@ -38,7 +38,7 @@ int aer_osc_setup(struct pcie_device *pciedev)
 
 
 	handle = acpi_find_root_bridge_handle(pdev);
 	handle = acpi_find_root_bridge_handle(pdev);
 	if (handle) {
 	if (handle) {
-		status = pci_osc_control_set(handle,
+		status = acpi_pci_osc_control_set(handle,
 					OSC_PCI_EXPRESS_AER_CONTROL |
 					OSC_PCI_EXPRESS_AER_CONTROL |
 					OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
 					OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
 	}
 	}

+ 5 - 5
drivers/pci/pcie/aer/aerdrv_core.c

@@ -351,21 +351,21 @@ static int find_aer_service_iter(struct device *device, void *data)
 {
 {
 	struct device_driver *driver;
 	struct device_driver *driver;
 	struct pcie_port_service_driver *service_driver;
 	struct pcie_port_service_driver *service_driver;
-	struct pcie_device *pcie_dev;
 	struct find_aer_service_data *result;
 	struct find_aer_service_data *result;
 
 
 	result = (struct find_aer_service_data *) data;
 	result = (struct find_aer_service_data *) data;
 
 
 	if (device->bus == &pcie_port_bus_type) {
 	if (device->bus == &pcie_port_bus_type) {
-		pcie_dev = to_pcie_device(device);
-		if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT)
+		struct pcie_port_data *port_data;
+
+		port_data = pci_get_drvdata(to_pcie_device(device)->port);
+		if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT)
 			result->is_downstream = 1;
 			result->is_downstream = 1;
 
 
 		driver = device->driver;
 		driver = device->driver;
 		if (driver) {
 		if (driver) {
 			service_driver = to_service_driver(driver);
 			service_driver = to_service_driver(driver);
-			if (service_driver->id_table->service_type ==
-					PCIE_PORT_SERVICE_AER) {
+			if (service_driver->service == PCIE_PORT_SERVICE_AER) {
 				result->aer_driver = service_driver;
 				result->aer_driver = service_driver;
 				return 1;
 				return 1;
 			}
 			}

+ 8 - 6
drivers/pci/pcie/portdrv.h

@@ -25,19 +25,21 @@
 #define PCIE_CAPABILITIES_REG		0x2
 #define PCIE_CAPABILITIES_REG		0x2
 #define PCIE_SLOT_CAPABILITIES_REG	0x14
 #define PCIE_SLOT_CAPABILITIES_REG	0x14
 #define PCIE_PORT_DEVICE_MAXSERVICES	4
 #define PCIE_PORT_DEVICE_MAXSERVICES	4
+#define PCIE_PORT_MSI_VECTOR_MASK	0x1f
+/*
+ * According to the PCI Express Base Specification 2.0, the indices of the MSI-X
+ * table entires used by port services must not exceed 31
+ */
+#define PCIE_PORT_MAX_MSIX_ENTRIES	32
 
 
 #define get_descriptor_id(type, service) (((type - 4) << 4) | service)
 #define get_descriptor_id(type, service) (((type - 4) << 4) | service)
 
 
-struct pcie_port_device_ext {
-	int interrupt_mode;	/* [0:INTx | 1:MSI | 2:MSI-X] */
-};
-
 extern struct bus_type pcie_port_bus_type;
 extern struct bus_type pcie_port_bus_type;
 extern int pcie_port_device_probe(struct pci_dev *dev);
 extern int pcie_port_device_probe(struct pci_dev *dev);
 extern int pcie_port_device_register(struct pci_dev *dev);
 extern int pcie_port_device_register(struct pci_dev *dev);
 #ifdef CONFIG_PM
 #ifdef CONFIG_PM
-extern int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state);
-extern int pcie_port_device_resume(struct pci_dev *dev);
+extern int pcie_port_device_suspend(struct device *dev);
+extern int pcie_port_device_resume(struct device *dev);
 #endif
 #endif
 extern void pcie_port_device_remove(struct pci_dev *dev);
 extern void pcie_port_device_remove(struct pci_dev *dev);
 extern int __must_check pcie_port_bus_register(void);
 extern int __must_check pcie_port_bus_register(void);

+ 10 - 8
drivers/pci/pcie/portdrv_bus.c

@@ -26,20 +26,22 @@ EXPORT_SYMBOL_GPL(pcie_port_bus_type);
 static int pcie_port_bus_match(struct device *dev, struct device_driver *drv)
 static int pcie_port_bus_match(struct device *dev, struct device_driver *drv)
 {
 {
 	struct pcie_device *pciedev;
 	struct pcie_device *pciedev;
+	struct pcie_port_data *port_data;
 	struct pcie_port_service_driver *driver;
 	struct pcie_port_service_driver *driver;
 
 
 	if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type)
 	if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type)
 		return 0;
 		return 0;
-	
+
 	pciedev = to_pcie_device(dev);
 	pciedev = to_pcie_device(dev);
 	driver = to_service_driver(drv);
 	driver = to_service_driver(drv);
-	if (   (driver->id_table->vendor != PCI_ANY_ID && 
-		driver->id_table->vendor != pciedev->id.vendor) ||
-	       (driver->id_table->device != PCI_ANY_ID &&
-		driver->id_table->device != pciedev->id.device) ||	
-	       (driver->id_table->port_type != PCIE_ANY_PORT &&
-		driver->id_table->port_type != pciedev->id.port_type) ||
-		driver->id_table->service_type != pciedev->id.service_type )
+
+	if (driver->service != pciedev->service)
+		return 0;
+
+	port_data = pci_get_drvdata(pciedev->port);
+
+	if (driver->port_type != PCIE_ANY_PORT
+	     && driver->port_type != port_data->port_type)
 		return 0;
 		return 0;
 
 
 	return 1;
 	return 1;

+ 248 - 131
drivers/pci/pcie/portdrv_core.c

@@ -15,10 +15,9 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/pcieport_if.h>
 #include <linux/pcieport_if.h>
 
 
+#include "../pci.h"
 #include "portdrv.h"
 #include "portdrv.h"
 
 
-extern int pcie_mch_quirk;	/* MSI-quirk Indicator */
-
 /**
 /**
  * release_pcie_device - free PCI Express port service device structure
  * release_pcie_device - free PCI Express port service device structure
  * @dev: Port service device to release
  * @dev: Port service device to release
@@ -31,26 +30,150 @@ static void release_pcie_device(struct device *dev)
 	kfree(to_pcie_device(dev));			
 	kfree(to_pcie_device(dev));			
 }
 }
 
 
-static int is_msi_quirked(struct pci_dev *dev)
+/**
+ * pcie_port_msix_add_entry - add entry to given array of MSI-X entries
+ * @entries: Array of MSI-X entries
+ * @new_entry: Index of the entry to add to the array
+ * @nr_entries: Number of entries aleady in the array
+ *
+ * Return value: Position of the added entry in the array
+ */
+static int pcie_port_msix_add_entry(
+	struct msix_entry *entries, int new_entry, int nr_entries)
 {
 {
-	int port_type, quirk = 0;
+	int j;
+
+	for (j = 0; j < nr_entries; j++)
+		if (entries[j].entry == new_entry)
+			return j;
+
+	entries[j].entry = new_entry;
+	return j;
+}
+
+/**
+ * pcie_port_enable_msix - try to set up MSI-X as interrupt mode for given port
+ * @dev: PCI Express port to handle
+ * @vectors: Array of interrupt vectors to populate
+ * @mask: Bitmask of port capabilities returned by get_port_device_capability()
+ *
+ * Return value: 0 on success, error code on failure
+ */
+static int pcie_port_enable_msix(struct pci_dev *dev, int *vectors, int mask)
+{
+	struct msix_entry *msix_entries;
+	int idx[PCIE_PORT_DEVICE_MAXSERVICES];
+	int nr_entries, status, pos, i, nvec;
 	u16 reg16;
 	u16 reg16;
+	u32 reg32;
 
 
-	pci_read_config_word(dev, 
-		pci_find_capability(dev, PCI_CAP_ID_EXP) + 
-		PCIE_CAPABILITIES_REG, &reg16);
-	port_type = (reg16 >> 4) & PORT_TYPE_MASK;
-	switch(port_type) {
-	case PCIE_RC_PORT:
-		if (pcie_mch_quirk == 1)
-			quirk = 1;
-		break;
-	case PCIE_SW_UPSTREAM_PORT:
-	case PCIE_SW_DOWNSTREAM_PORT:
-	default:
-		break;	
+	nr_entries = pci_msix_table_size(dev);
+	if (!nr_entries)
+		return -EINVAL;
+	if (nr_entries > PCIE_PORT_MAX_MSIX_ENTRIES)
+		nr_entries = PCIE_PORT_MAX_MSIX_ENTRIES;
+
+	msix_entries = kzalloc(sizeof(*msix_entries) * nr_entries, GFP_KERNEL);
+	if (!msix_entries)
+		return -ENOMEM;
+
+	/*
+	 * Allocate as many entries as the port wants, so that we can check
+	 * which of them will be useful.  Moreover, if nr_entries is correctly
+	 * equal to the number of entries this port actually uses, we'll happily
+	 * go through without any tricks.
+	 */
+	for (i = 0; i < nr_entries; i++)
+		msix_entries[i].entry = i;
+
+	status = pci_enable_msix(dev, msix_entries, nr_entries);
+	if (status)
+		goto Exit;
+
+	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+		idx[i] = -1;
+	status = -EIO;
+	nvec = 0;
+
+	if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) {
+		int entry;
+
+		/*
+		 * The code below follows the PCI Express Base Specification 2.0
+		 * stating in Section 6.1.6 that "PME and Hot-Plug Event
+		 * interrupts (when both are implemented) always share the same
+		 * MSI or MSI-X vector, as indicated by the Interrupt Message
+		 * Number field in the PCI Express Capabilities register", where
+		 * according to Section 7.8.2 of the specification "For MSI-X,
+		 * the value in this field indicates which MSI-X Table entry is
+		 * used to generate the interrupt message."
+		 */
+		pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+		pci_read_config_word(dev, pos + PCIE_CAPABILITIES_REG, &reg16);
+		entry = (reg16 >> 9) & PCIE_PORT_MSI_VECTOR_MASK;
+		if (entry >= nr_entries)
+			goto Error;
+
+		i = pcie_port_msix_add_entry(msix_entries, entry, nvec);
+		if (i == nvec)
+			nvec++;
+
+		idx[PCIE_PORT_SERVICE_PME_SHIFT] = i;
+		idx[PCIE_PORT_SERVICE_HP_SHIFT] = i;
+	}
+
+	if (mask & PCIE_PORT_SERVICE_AER) {
+		int entry;
+
+		/*
+		 * The code below follows Section 7.10.10 of the PCI Express
+		 * Base Specification 2.0 stating that bits 31-27 of the Root
+		 * Error Status Register contain a value indicating which of the
+		 * MSI/MSI-X vectors assigned to the port is going to be used
+		 * for AER, where "For MSI-X, the value in this register
+		 * indicates which MSI-X Table entry is used to generate the
+		 * interrupt message."
+		 */
+		pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+		pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &reg32);
+		entry = reg32 >> 27;
+		if (entry >= nr_entries)
+			goto Error;
+
+		i = pcie_port_msix_add_entry(msix_entries, entry, nvec);
+		if (i == nvec)
+			nvec++;
+
+		idx[PCIE_PORT_SERVICE_AER_SHIFT] = i;
 	}
 	}
-	return quirk;
+
+	/*
+	 * If nvec is equal to the allocated number of entries, we can just use
+	 * what we have.  Otherwise, the port has some extra entries not for the
+	 * services we know and we need to work around that.
+	 */
+	if (nvec == nr_entries) {
+		status = 0;
+	} else {
+		/* Drop the temporary MSI-X setup */
+		pci_disable_msix(dev);
+
+		/* Now allocate the MSI-X vectors for real */
+		status = pci_enable_msix(dev, msix_entries, nvec);
+		if (status)
+			goto Exit;
+	}
+
+	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+		vectors[i] = idx[i] >= 0 ? msix_entries[idx[i]].vector : -1;
+
+ Exit:
+	kfree(msix_entries);
+	return status;
+
+ Error:
+	pci_disable_msix(dev);
+	goto Exit;
 }
 }
 
 
 /**
 /**
@@ -64,47 +187,32 @@ static int is_msi_quirked(struct pci_dev *dev)
  */
  */
 static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask)
 static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask)
 {
 {
-	int i, pos, nvec, status = -EINVAL;
-	int interrupt_mode = PCIE_PORT_INTx_MODE;
+	struct pcie_port_data *port_data = pci_get_drvdata(dev);
+	int irq, interrupt_mode = PCIE_PORT_NO_IRQ;
+	int i;
 
 
-	/* Set INTx as default */
-	for (i = 0, nvec = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
-		if (mask & (1 << i)) 
-			nvec++;
-		vectors[i] = dev->irq;
-	}
-	
 	/* Check MSI quirk */
 	/* Check MSI quirk */
-	if (is_msi_quirked(dev))
-		return interrupt_mode;
-
-	/* Select MSI-X over MSI if supported */		
-	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
-	if (pos) {
-		struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] = 
-			{{0, 0}, {0, 1}, {0, 2}, {0, 3}};
-		status = pci_enable_msix(dev, msix_entries, nvec);
-		if (!status) {
-			int j = 0;
-
-			interrupt_mode = PCIE_PORT_MSIX_MODE;
-			for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
-				if (mask & (1 << i)) 
-					vectors[i] = msix_entries[j++].vector;
-			}
-		}
-	} 
-	if (status) {
-		pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
-		if (pos) {
-			status = pci_enable_msi(dev);
-			if (!status) {
-				interrupt_mode = PCIE_PORT_MSI_MODE;
-				for (i = 0;i < PCIE_PORT_DEVICE_MAXSERVICES;i++)
-					vectors[i] = dev->irq;
-			}
-		}
-	} 
+	if (port_data->port_type == PCIE_RC_PORT && pcie_mch_quirk)
+		goto Fallback;
+
+	/* Try to use MSI-X if supported */
+	if (!pcie_port_enable_msix(dev, vectors, mask))
+		return PCIE_PORT_MSIX_MODE;
+
+	/* We're not going to use MSI-X, so try MSI and fall back to INTx */
+	if (!pci_enable_msi(dev))
+		interrupt_mode = PCIE_PORT_MSI_MODE;
+
+ Fallback:
+	if (interrupt_mode == PCIE_PORT_NO_IRQ && dev->pin)
+		interrupt_mode = PCIE_PORT_INTx_MODE;
+
+	irq = interrupt_mode != PCIE_PORT_NO_IRQ ? dev->irq : -1;
+	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++)
+		vectors[i] = irq;
+
+	vectors[PCIE_PORT_SERVICE_VC_SHIFT] = -1;
+
 	return interrupt_mode;
 	return interrupt_mode;
 }
 }
 
 
@@ -132,13 +240,11 @@ static int get_port_device_capability(struct pci_dev *dev)
 			pos + PCIE_SLOT_CAPABILITIES_REG, &reg32);
 			pos + PCIE_SLOT_CAPABILITIES_REG, &reg32);
 		if (reg32 & SLOT_HP_CAPABLE_MASK)
 		if (reg32 & SLOT_HP_CAPABLE_MASK)
 			services |= PCIE_PORT_SERVICE_HP;
 			services |= PCIE_PORT_SERVICE_HP;
-	} 
-	/* PME Capable - root port capability */
-	if (((reg16 >> 4) & PORT_TYPE_MASK) == PCIE_RC_PORT)
-		services |= PCIE_PORT_SERVICE_PME;
-
+	}
+	/* AER capable */
 	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
 	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
 		services |= PCIE_PORT_SERVICE_AER;
 		services |= PCIE_PORT_SERVICE_AER;
+	/* VC support */
 	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC))
 	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC))
 		services |= PCIE_PORT_SERVICE_VC;
 		services |= PCIE_PORT_SERVICE_VC;
 
 
@@ -152,20 +258,17 @@ static int get_port_device_capability(struct pci_dev *dev)
  * @port_type: Type of the port
  * @port_type: Type of the port
  * @service_type: Type of service to associate with the service device
  * @service_type: Type of service to associate with the service device
  * @irq: Interrupt vector to associate with the service device
  * @irq: Interrupt vector to associate with the service device
- * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI)
  */
  */
 static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, 
 static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, 
-	int port_type, int service_type, int irq, int irq_mode)
+	int service_type, int irq)
 {
 {
+	struct pcie_port_data *port_data = pci_get_drvdata(parent);
 	struct device *device;
 	struct device *device;
+	int port_type = port_data->port_type;
 
 
 	dev->port = parent;
 	dev->port = parent;
-	dev->interrupt_mode = irq_mode;
 	dev->irq = irq;
 	dev->irq = irq;
-	dev->id.vendor = parent->vendor;
-	dev->id.device = parent->device;
-	dev->id.port_type = port_type;
-	dev->id.service_type = (1 << service_type);
+	dev->service = service_type;
 
 
 	/* Initialize generic device interface */
 	/* Initialize generic device interface */
 	device = &dev->device;
 	device = &dev->device;
@@ -185,10 +288,9 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev,
  * @port_type: Type of the port
  * @port_type: Type of the port
  * @service_type: Type of service to associate with the service device
  * @service_type: Type of service to associate with the service device
  * @irq: Interrupt vector to associate with the service device
  * @irq: Interrupt vector to associate with the service device
- * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI)
  */
  */
 static struct pcie_device* alloc_pcie_device(struct pci_dev *parent,
 static struct pcie_device* alloc_pcie_device(struct pci_dev *parent,
-	int port_type, int service_type, int irq, int irq_mode)
+	int service_type, int irq)
 {
 {
 	struct pcie_device *device;
 	struct pcie_device *device;
 
 
@@ -196,7 +298,7 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent,
 	if (!device)
 	if (!device)
 		return NULL;
 		return NULL;
 
 
-	pcie_device_init(parent, device, port_type, service_type, irq,irq_mode);
+	pcie_device_init(parent, device, service_type, irq);
 	return device;
 	return device;
 }
 }
 
 
@@ -230,63 +332,90 @@ int pcie_port_device_probe(struct pci_dev *dev)
  */
  */
 int pcie_port_device_register(struct pci_dev *dev)
 int pcie_port_device_register(struct pci_dev *dev)
 {
 {
-	struct pcie_port_device_ext *p_ext;
-	int status, type, capabilities, irq_mode, i;
+	struct pcie_port_data *port_data;
+	int status, capabilities, irq_mode, i, nr_serv;
 	int vectors[PCIE_PORT_DEVICE_MAXSERVICES];
 	int vectors[PCIE_PORT_DEVICE_MAXSERVICES];
 	u16 reg16;
 	u16 reg16;
 
 
-	/* Allocate port device extension */
-	if (!(p_ext = kmalloc(sizeof(struct pcie_port_device_ext), GFP_KERNEL)))
+	port_data = kzalloc(sizeof(*port_data), GFP_KERNEL);
+	if (!port_data)
 		return -ENOMEM;
 		return -ENOMEM;
-
-	pci_set_drvdata(dev, p_ext);
+	pci_set_drvdata(dev, port_data);
 
 
 	/* Get port type */
 	/* Get port type */
 	pci_read_config_word(dev,
 	pci_read_config_word(dev,
 		pci_find_capability(dev, PCI_CAP_ID_EXP) +
 		pci_find_capability(dev, PCI_CAP_ID_EXP) +
 		PCIE_CAPABILITIES_REG, &reg16);
 		PCIE_CAPABILITIES_REG, &reg16);
-	type = (reg16 >> 4) & PORT_TYPE_MASK;
+	port_data->port_type = (reg16 >> 4) & PORT_TYPE_MASK;
 
 
-	/* Now get port services */
 	capabilities = get_port_device_capability(dev);
 	capabilities = get_port_device_capability(dev);
+	/* Root ports are capable of generating PME too */
+	if (port_data->port_type == PCIE_RC_PORT)
+		capabilities |= PCIE_PORT_SERVICE_PME;
+
 	irq_mode = assign_interrupt_mode(dev, vectors, capabilities);
 	irq_mode = assign_interrupt_mode(dev, vectors, capabilities);
-	p_ext->interrupt_mode = irq_mode;
+	if (irq_mode == PCIE_PORT_NO_IRQ) {
+		/*
+		 * Don't use service devices that require interrupts if there is
+		 * no way to generate them.
+		 */
+		if (!(capabilities & PCIE_PORT_SERVICE_VC)) {
+			status = -ENODEV;
+			goto Error;
+		}
+		capabilities = PCIE_PORT_SERVICE_VC;
+	}
+	port_data->port_irq_mode = irq_mode;
+
+	status = pci_enable_device(dev);
+	if (status)
+		goto Error;
+	pci_set_master(dev);
 
 
 	/* Allocate child services if any */
 	/* Allocate child services if any */
-	for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
+	for (i = 0, nr_serv = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
 		struct pcie_device *child;
 		struct pcie_device *child;
+		int service = 1 << i;
+
+		if (!(capabilities & service))
+			continue;
 
 
-		if (capabilities & (1 << i)) {
-			child = alloc_pcie_device(
-				dev, 		/* parent */
-				type,		/* port type */
-				i,		/* service type */
-				vectors[i],	/* irq */
-				irq_mode	/* interrupt mode */);
-			if (child) {
-				status = device_register(&child->device);
-				if (status) {
-					kfree(child);
-					continue;
-				}
-				get_device(&child->device);
-			}
+		child = alloc_pcie_device(dev, service, vectors[i]);
+		if (!child)
+			continue;
+
+		status = device_register(&child->device);
+		if (status) {
+			kfree(child);
+			continue;
 		}
 		}
+
+		get_device(&child->device);
+		nr_serv++;
+	}
+	if (!nr_serv) {
+		pci_disable_device(dev);
+		status = -ENODEV;
+		goto Error;
 	}
 	}
+
 	return 0;
 	return 0;
+
+ Error:
+	kfree(port_data);
+	return status;
 }
 }
 
 
 #ifdef CONFIG_PM
 #ifdef CONFIG_PM
 static int suspend_iter(struct device *dev, void *data)
 static int suspend_iter(struct device *dev, void *data)
 {
 {
 	struct pcie_port_service_driver *service_driver;
 	struct pcie_port_service_driver *service_driver;
-	pm_message_t state = * (pm_message_t *) data;
 
 
  	if ((dev->bus == &pcie_port_bus_type) &&
  	if ((dev->bus == &pcie_port_bus_type) &&
  	    (dev->driver)) {
  	    (dev->driver)) {
  		service_driver = to_service_driver(dev->driver);
  		service_driver = to_service_driver(dev->driver);
  		if (service_driver->suspend)
  		if (service_driver->suspend)
- 			service_driver->suspend(to_pcie_device(dev), state);
+ 			service_driver->suspend(to_pcie_device(dev));
   	}
   	}
 	return 0;
 	return 0;
 }
 }
@@ -294,11 +423,10 @@ static int suspend_iter(struct device *dev, void *data)
 /**
 /**
  * pcie_port_device_suspend - suspend port services associated with a PCIe port
  * pcie_port_device_suspend - suspend port services associated with a PCIe port
  * @dev: PCI Express port to handle
  * @dev: PCI Express port to handle
- * @state: Representation of system power management transition in progress
  */
  */
-int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state)
+int pcie_port_device_suspend(struct device *dev)
 {
 {
-	return device_for_each_child(&dev->dev, &state, suspend_iter);
+	return device_for_each_child(dev, NULL, suspend_iter);
 }
 }
 
 
 static int resume_iter(struct device *dev, void *data)
 static int resume_iter(struct device *dev, void *data)
@@ -318,24 +446,17 @@ static int resume_iter(struct device *dev, void *data)
  * pcie_port_device_suspend - resume port services associated with a PCIe port
  * pcie_port_device_suspend - resume port services associated with a PCIe port
  * @dev: PCI Express port to handle
  * @dev: PCI Express port to handle
  */
  */
-int pcie_port_device_resume(struct pci_dev *dev)
+int pcie_port_device_resume(struct device *dev)
 {
 {
-	return device_for_each_child(&dev->dev, NULL, resume_iter);
+	return device_for_each_child(dev, NULL, resume_iter);
 }
 }
-#endif
+#endif /* PM */
 
 
 static int remove_iter(struct device *dev, void *data)
 static int remove_iter(struct device *dev, void *data)
 {
 {
-	struct pcie_port_service_driver *service_driver;
-
 	if (dev->bus == &pcie_port_bus_type) {
 	if (dev->bus == &pcie_port_bus_type) {
-		if (dev->driver) {
-			service_driver = to_service_driver(dev->driver);
-			if (service_driver->remove)
-				service_driver->remove(to_pcie_device(dev));
-		}
-		*(unsigned long*)data = (unsigned long)dev;
-		return 1;
+		put_device(dev);
+		device_unregister(dev);
 	}
 	}
 	return 0;
 	return 0;
 }
 }
@@ -349,25 +470,21 @@ static int remove_iter(struct device *dev, void *data)
  */
  */
 void pcie_port_device_remove(struct pci_dev *dev)
 void pcie_port_device_remove(struct pci_dev *dev)
 {
 {
-	struct device *device;
-	unsigned long device_addr;
-	int interrupt_mode = PCIE_PORT_INTx_MODE;
-	int status;
+	struct pcie_port_data *port_data = pci_get_drvdata(dev);
 
 
-	do {
-		status = device_for_each_child(&dev->dev, &device_addr, remove_iter);
-		if (status) {
-			device = (struct device*)device_addr;
-			interrupt_mode = (to_pcie_device(device))->interrupt_mode;
-			put_device(device);
-			device_unregister(device);
-		}
-	} while (status);
-	/* Switch to INTx by default if MSI enabled */
-	if (interrupt_mode == PCIE_PORT_MSIX_MODE)
+	device_for_each_child(&dev->dev, NULL, remove_iter);
+	pci_disable_device(dev);
+
+	switch (port_data->port_irq_mode) {
+	case PCIE_PORT_MSIX_MODE:
 		pci_disable_msix(dev);
 		pci_disable_msix(dev);
-	else if (interrupt_mode == PCIE_PORT_MSI_MODE)
+		break;
+	case PCIE_PORT_MSI_MODE:
 		pci_disable_msi(dev);
 		pci_disable_msi(dev);
+		break;
+	}
+
+	kfree(port_data);
 }
 }
 
 
 /**
 /**
@@ -392,7 +509,7 @@ static int pcie_port_probe_service(struct device *dev)
 		return -ENODEV;
 		return -ENODEV;
 
 
 	pciedev = to_pcie_device(dev);
 	pciedev = to_pcie_device(dev);
-	status = driver->probe(pciedev, driver->id_table);
+	status = driver->probe(pciedev);
 	if (!status) {
 	if (!status) {
 		dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n",
 		dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n",
 			driver->name);
 			driver->name);

+ 19 - 31
drivers/pci/pcie/portdrv_pci.c

@@ -32,11 +32,6 @@ MODULE_LICENSE("GPL");
 /* global data */
 /* global data */
 static const char device_name[] = "pcieport-driver";
 static const char device_name[] = "pcieport-driver";
 
 
-static int pcie_portdrv_save_config(struct pci_dev *dev)
-{
-	return pci_save_state(dev);
-}
-
 static int pcie_portdrv_restore_config(struct pci_dev *dev)
 static int pcie_portdrv_restore_config(struct pci_dev *dev)
 {
 {
 	int retval;
 	int retval;
@@ -49,21 +44,21 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev)
 }
 }
 
 
 #ifdef CONFIG_PM
 #ifdef CONFIG_PM
-static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state)
-{
-	return pcie_port_device_suspend(dev, state);
+static struct dev_pm_ops pcie_portdrv_pm_ops = {
+	.suspend	= pcie_port_device_suspend,
+	.resume		= pcie_port_device_resume,
+	.freeze		= pcie_port_device_suspend,
+	.thaw		= pcie_port_device_resume,
+	.poweroff	= pcie_port_device_suspend,
+	.restore	= pcie_port_device_resume,
+};
 
 
-}
+#define PCIE_PORTDRV_PM_OPS	(&pcie_portdrv_pm_ops)
 
 
-static int pcie_portdrv_resume(struct pci_dev *dev)
-{
-	pci_set_master(dev);
-	return pcie_port_device_resume(dev);
-}
-#else
-#define pcie_portdrv_suspend NULL
-#define pcie_portdrv_resume NULL
-#endif
+#else /* !PM */
+
+#define PCIE_PORTDRV_PM_OPS	NULL
+#endif /* !PM */
 
 
 /*
 /*
  * pcie_portdrv_probe - Probe PCI-Express port devices
  * pcie_portdrv_probe - Probe PCI-Express port devices
@@ -82,20 +77,15 @@ static int __devinit pcie_portdrv_probe (struct pci_dev *dev,
 	if (status)
 	if (status)
 		return status;
 		return status;
 
 
-	if (pci_enable_device(dev) < 0) 
-		return -ENODEV;
-	
-	pci_set_master(dev);
         if (!dev->irq && dev->pin) {
         if (!dev->irq && dev->pin) {
 		dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
 		dev_warn(&dev->dev, "device [%04x:%04x] has invalid IRQ; "
 			 "check vendor BIOS\n", dev->vendor, dev->device);
 			 "check vendor BIOS\n", dev->vendor, dev->device);
 	}
 	}
-	if (pcie_port_device_register(dev)) {
-		pci_disable_device(dev);
-		return -ENOMEM;
-	}
+	status = pcie_port_device_register(dev);
+	if (status)
+		return status;
 
 
-	pcie_portdrv_save_config(dev);
+	pci_save_state(dev);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -104,7 +94,6 @@ static void pcie_portdrv_remove (struct pci_dev *dev)
 {
 {
 	pcie_port_device_remove(dev);
 	pcie_port_device_remove(dev);
 	pci_disable_device(dev);
 	pci_disable_device(dev);
-	kfree(pci_get_drvdata(dev));
 }
 }
 
 
 static int error_detected_iter(struct device *device, void *data)
 static int error_detected_iter(struct device *device, void *data)
@@ -278,10 +267,9 @@ static struct pci_driver pcie_portdriver = {
 	.probe		= pcie_portdrv_probe,
 	.probe		= pcie_portdrv_probe,
 	.remove		= pcie_portdrv_remove,
 	.remove		= pcie_portdrv_remove,
 
 
-	.suspend	= pcie_portdrv_suspend,
-	.resume		= pcie_portdrv_resume,
-
 	.err_handler 	= &pcie_portdrv_err_handler,
 	.err_handler 	= &pcie_portdrv_err_handler,
+
+	.driver.pm 	= PCIE_PORTDRV_PM_OPS,
 };
 };
 
 
 static int __init pcie_portdrv_init(void)
 static int __init pcie_portdrv_init(void)

+ 130 - 80
drivers/pci/probe.c

@@ -287,7 +287,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 	struct resource *res;
 	struct resource *res;
 	int i;
 	int i;
 
 
-	if (!dev)		/* It's a host bus, nothing to read */
+	if (!child->parent)	/* It's a host bus, nothing to read */
 		return;
 		return;
 
 
 	if (dev->transparent) {
 	if (dev->transparent) {
@@ -511,21 +511,21 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
 
 
 		/*
 		/*
 		 * If we already got to this bus through a different bridge,
 		 * If we already got to this bus through a different bridge,
-		 * ignore it.  This can happen with the i450NX chipset.
+		 * don't re-add it. This can happen with the i450NX chipset.
+		 *
+		 * However, we continue to descend down the hierarchy and
+		 * scan remaining child buses.
 		 */
 		 */
-		if (pci_find_bus(pci_domain_nr(bus), busnr)) {
-			dev_info(&dev->dev, "bus %04x:%02x already known\n",
-				 pci_domain_nr(bus), busnr);
-			goto out;
+		child = pci_find_bus(pci_domain_nr(bus), busnr);
+		if (!child) {
+			child = pci_add_new_bus(bus, dev, busnr);
+			if (!child)
+				goto out;
+			child->primary = buses & 0xFF;
+			child->subordinate = (buses >> 16) & 0xFF;
+			child->bridge_ctl = bctl;
 		}
 		}
 
 
-		child = pci_add_new_bus(bus, dev, busnr);
-		if (!child)
-			goto out;
-		child->primary = buses & 0xFF;
-		child->subordinate = (buses >> 16) & 0xFF;
-		child->bridge_ctl = bctl;
-
 		cmax = pci_scan_child_bus(child);
 		cmax = pci_scan_child_bus(child);
 		if (cmax > max)
 		if (cmax > max)
 			max = cmax;
 			max = cmax;
@@ -674,6 +674,19 @@ static void pci_read_irq(struct pci_dev *dev)
 	dev->irq = irq;
 	dev->irq = irq;
 }
 }
 
 
+static void set_pcie_port_type(struct pci_dev *pdev)
+{
+	int pos;
+	u16 reg16;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return;
+	pdev->is_pcie = 1;
+	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
+}
+
 #define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)
 #define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)
 
 
 /**
 /**
@@ -683,12 +696,33 @@ static void pci_read_irq(struct pci_dev *dev)
  * Initialize the device structure with information about the device's 
  * Initialize the device structure with information about the device's 
  * vendor,class,memory and IO-space addresses,IRQ lines etc.
  * vendor,class,memory and IO-space addresses,IRQ lines etc.
  * Called at initialisation of the PCI subsystem and by CardBus services.
  * Called at initialisation of the PCI subsystem and by CardBus services.
- * Returns 0 on success and -1 if unknown type of device (not normal, bridge
- * or CardBus).
+ * Returns 0 on success and negative if unknown type of device (not normal,
+ * bridge or CardBus).
  */
  */
-static int pci_setup_device(struct pci_dev * dev)
+int pci_setup_device(struct pci_dev *dev)
 {
 {
 	u32 class;
 	u32 class;
+	u8 hdr_type;
+	struct pci_slot *slot;
+
+	if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
+		return -EIO;
+
+	dev->sysdata = dev->bus->sysdata;
+	dev->dev.parent = dev->bus->bridge;
+	dev->dev.bus = &pci_bus_type;
+	dev->hdr_type = hdr_type & 0x7f;
+	dev->multifunction = !!(hdr_type & 0x80);
+	dev->error_state = pci_channel_io_normal;
+	set_pcie_port_type(dev);
+
+	list_for_each_entry(slot, &dev->bus->slots, list)
+		if (PCI_SLOT(dev->devfn) == slot->number)
+			dev->slot = slot;
+
+	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
+	   set this higher, assuming the system even supports it.  */
+	dev->dma_mask = 0xffffffff;
 
 
 	dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
 	dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
 		     dev->bus->number, PCI_SLOT(dev->devfn),
 		     dev->bus->number, PCI_SLOT(dev->devfn),
@@ -703,12 +737,14 @@ static int pci_setup_device(struct pci_dev * dev)
 	dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n",
 	dev_dbg(&dev->dev, "found [%04x:%04x] class %06x header type %02x\n",
 		 dev->vendor, dev->device, class, dev->hdr_type);
 		 dev->vendor, dev->device, class, dev->hdr_type);
 
 
+	/* need to have dev->class ready */
+	dev->cfg_size = pci_cfg_space_size(dev);
+
 	/* "Unknown power state" */
 	/* "Unknown power state" */
 	dev->current_state = PCI_UNKNOWN;
 	dev->current_state = PCI_UNKNOWN;
 
 
 	/* Early fixups, before probing the BARs */
 	/* Early fixups, before probing the BARs */
 	pci_fixup_device(pci_fixup_early, dev);
 	pci_fixup_device(pci_fixup_early, dev);
-	class = dev->class >> 8;
 
 
 	switch (dev->hdr_type) {		    /* header type */
 	switch (dev->hdr_type) {		    /* header type */
 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
@@ -770,7 +806,7 @@ static int pci_setup_device(struct pci_dev * dev)
 	default:				    /* unknown header */
 	default:				    /* unknown header */
 		dev_err(&dev->dev, "unknown header type %02x, "
 		dev_err(&dev->dev, "unknown header type %02x, "
 			"ignoring device\n", dev->hdr_type);
 			"ignoring device\n", dev->hdr_type);
-		return -1;
+		return -EIO;
 
 
 	bad:
 	bad:
 		dev_err(&dev->dev, "ignoring class %02x (doesn't match header "
 		dev_err(&dev->dev, "ignoring class %02x (doesn't match header "
@@ -785,6 +821,7 @@ static int pci_setup_device(struct pci_dev * dev)
 static void pci_release_capabilities(struct pci_dev *dev)
 static void pci_release_capabilities(struct pci_dev *dev)
 {
 {
 	pci_vpd_release(dev);
 	pci_vpd_release(dev);
+	pci_iov_release(dev);
 }
 }
 
 
 /**
 /**
@@ -803,19 +840,6 @@ static void pci_release_dev(struct device *dev)
 	kfree(pci_dev);
 	kfree(pci_dev);
 }
 }
 
 
-static void set_pcie_port_type(struct pci_dev *pdev)
-{
-	int pos;
-	u16 reg16;
-
-	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
-	if (!pos)
-		return;
-	pdev->is_pcie = 1;
-	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
-	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
-}
-
 /**
 /**
  * pci_cfg_space_size - get the configuration space size of the PCI device.
  * pci_cfg_space_size - get the configuration space size of the PCI device.
  * @dev: PCI device
  * @dev: PCI device
@@ -847,6 +871,11 @@ int pci_cfg_space_size(struct pci_dev *dev)
 {
 {
 	int pos;
 	int pos;
 	u32 status;
 	u32 status;
+	u16 class;
+
+	class = dev->class >> 8;
+	if (class == PCI_CLASS_BRIDGE_HOST)
+		return pci_cfg_space_size_ext(dev);
 
 
 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!pos) {
 	if (!pos) {
@@ -891,9 +920,7 @@ EXPORT_SYMBOL(alloc_pci_dev);
 static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 {
 {
 	struct pci_dev *dev;
 	struct pci_dev *dev;
-	struct pci_slot *slot;
 	u32 l;
 	u32 l;
-	u8 hdr_type;
 	int delay = 1;
 	int delay = 1;
 
 
 	if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
 	if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
@@ -920,34 +947,16 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 		}
 		}
 	}
 	}
 
 
-	if (pci_bus_read_config_byte(bus, devfn, PCI_HEADER_TYPE, &hdr_type))
-		return NULL;
-
 	dev = alloc_pci_dev();
 	dev = alloc_pci_dev();
 	if (!dev)
 	if (!dev)
 		return NULL;
 		return NULL;
 
 
 	dev->bus = bus;
 	dev->bus = bus;
-	dev->sysdata = bus->sysdata;
-	dev->dev.parent = bus->bridge;
-	dev->dev.bus = &pci_bus_type;
 	dev->devfn = devfn;
 	dev->devfn = devfn;
-	dev->hdr_type = hdr_type & 0x7f;
-	dev->multifunction = !!(hdr_type & 0x80);
 	dev->vendor = l & 0xffff;
 	dev->vendor = l & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
 	dev->device = (l >> 16) & 0xffff;
-	dev->cfg_size = pci_cfg_space_size(dev);
-	dev->error_state = pci_channel_io_normal;
-	set_pcie_port_type(dev);
-
-	list_for_each_entry(slot, &bus->slots, list)
-		if (PCI_SLOT(devfn) == slot->number)
-			dev->slot = slot;
 
 
-	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
-	   set this higher, assuming the system even supports it.  */
-	dev->dma_mask = 0xffffffff;
-	if (pci_setup_device(dev) < 0) {
+	if (pci_setup_device(dev)) {
 		kfree(dev);
 		kfree(dev);
 		return NULL;
 		return NULL;
 	}
 	}
@@ -972,6 +981,9 @@ static void pci_init_capabilities(struct pci_dev *dev)
 
 
 	/* Alternative Routing-ID Forwarding */
 	/* Alternative Routing-ID Forwarding */
 	pci_enable_ari(dev);
 	pci_enable_ari(dev);
+
+	/* Single Root I/O Virtualization */
+	pci_iov_init(dev);
 }
 }
 
 
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
@@ -1006,6 +1018,12 @@ struct pci_dev *__ref pci_scan_single_device(struct pci_bus *bus, int devfn)
 {
 {
 	struct pci_dev *dev;
 	struct pci_dev *dev;
 
 
+	dev = pci_get_slot(bus, devfn);
+	if (dev) {
+		pci_dev_put(dev);
+		return dev;
+	}
+
 	dev = pci_scan_device(bus, devfn);
 	dev = pci_scan_device(bus, devfn);
 	if (!dev)
 	if (!dev)
 		return NULL;
 		return NULL;
@@ -1024,35 +1042,27 @@ EXPORT_SYMBOL(pci_scan_single_device);
  * Scan a PCI slot on the specified PCI bus for devices, adding
  * Scan a PCI slot on the specified PCI bus for devices, adding
  * discovered devices to the @bus->devices list.  New devices
  * discovered devices to the @bus->devices list.  New devices
  * will not have is_added set.
  * will not have is_added set.
+ *
+ * Returns the number of new devices found.
  */
  */
 int pci_scan_slot(struct pci_bus *bus, int devfn)
 int pci_scan_slot(struct pci_bus *bus, int devfn)
 {
 {
-	int func, nr = 0;
-	int scan_all_fns;
-
-	scan_all_fns = pcibios_scan_all_fns(bus, devfn);
-
-	for (func = 0; func < 8; func++, devfn++) {
-		struct pci_dev *dev;
-
-		dev = pci_scan_single_device(bus, devfn);
-		if (dev) {
-			nr++;
+	int fn, nr = 0;
+	struct pci_dev *dev;
 
 
-			/*
-		 	 * If this is a single function device,
-		 	 * don't scan past the first function.
-		 	 */
-			if (!dev->multifunction) {
-				if (func > 0) {
-					dev->multifunction = 1;
-				} else {
- 					break;
-				}
+	dev = pci_scan_single_device(bus, devfn);
+	if (dev && !dev->is_added)	/* new device? */
+		nr++;
+
+	if ((dev && dev->multifunction) ||
+	    (!dev && pcibios_scan_all_fns(bus, devfn))) {
+		for (fn = 1; fn < 8; fn++) {
+			dev = pci_scan_single_device(bus, devfn + fn);
+			if (dev) {
+				if (!dev->is_added)
+					nr++;
+				dev->multifunction = 1;
 			}
 			}
-		} else {
-			if (func == 0 && !scan_all_fns)
-				break;
 		}
 		}
 	}
 	}
 
 
@@ -1074,12 +1084,21 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
 	for (devfn = 0; devfn < 0x100; devfn += 8)
 	for (devfn = 0; devfn < 0x100; devfn += 8)
 		pci_scan_slot(bus, devfn);
 		pci_scan_slot(bus, devfn);
 
 
+	/* Reserve buses for SR-IOV capability. */
+	max += pci_iov_bus_range(bus);
+
 	/*
 	/*
 	 * After performing arch-dependent fixup of the bus, look behind
 	 * After performing arch-dependent fixup of the bus, look behind
 	 * all PCI-to-PCI bridges on this bus.
 	 * all PCI-to-PCI bridges on this bus.
 	 */
 	 */
-	pr_debug("PCI: Fixups for bus %04x:%02x\n", pci_domain_nr(bus), bus->number);
-	pcibios_fixup_bus(bus);
+	if (!bus->is_added) {
+		pr_debug("PCI: Fixups for bus %04x:%02x\n",
+			 pci_domain_nr(bus), bus->number);
+		pcibios_fixup_bus(bus);
+		if (pci_is_root_bus(bus))
+			bus->is_added = 1;
+	}
+
 	for (pass=0; pass < 2; pass++)
 	for (pass=0; pass < 2; pass++)
 		list_for_each_entry(dev, &bus->devices, bus_list) {
 		list_for_each_entry(dev, &bus->devices, bus_list) {
 			if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
 			if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
@@ -1114,7 +1133,7 @@ struct pci_bus * pci_create_bus(struct device *parent,
 	if (!b)
 	if (!b)
 		return NULL;
 		return NULL;
 
 
-	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev){
 	if (!dev){
 		kfree(b);
 		kfree(b);
 		return NULL;
 		return NULL;
@@ -1133,7 +1152,6 @@ struct pci_bus * pci_create_bus(struct device *parent,
 	list_add_tail(&b->node, &pci_root_buses);
 	list_add_tail(&b->node, &pci_root_buses);
 	up_write(&pci_bus_sem);
 	up_write(&pci_bus_sem);
 
 
-	memset(dev, 0, sizeof(*dev));
 	dev->parent = parent;
 	dev->parent = parent;
 	dev->release = pci_release_bus_bridge_dev;
 	dev->release = pci_release_bus_bridge_dev;
 	dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus);
 	dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus);
@@ -1193,6 +1211,38 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
 EXPORT_SYMBOL(pci_scan_bus_parented);
 EXPORT_SYMBOL(pci_scan_bus_parented);
 
 
 #ifdef CONFIG_HOTPLUG
 #ifdef CONFIG_HOTPLUG
+/**
+ * pci_rescan_bus - scan a PCI bus for devices.
+ * @bus: PCI bus to scan
+ *
+ * Scan a PCI bus and child buses for new devices, adds them,
+ * and enables them.
+ *
+ * Returns the max number of subordinate bus discovered.
+ */
+unsigned int __devinit pci_rescan_bus(struct pci_bus *bus)
+{
+	unsigned int max;
+	struct pci_dev *dev;
+
+	max = pci_scan_child_bus(bus);
+
+	down_read(&pci_bus_sem);
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+		    dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+			if (dev->subordinate)
+				pci_bus_size_bridges(dev->subordinate);
+	up_read(&pci_bus_sem);
+
+	pci_bus_assign_resources(bus);
+	pci_enable_bridges(bus);
+	pci_bus_add_devices(bus);
+
+	return max;
+}
+EXPORT_SYMBOL_GPL(pci_rescan_bus);
+
 EXPORT_SYMBOL(pci_add_new_bus);
 EXPORT_SYMBOL(pci_add_new_bus);
 EXPORT_SYMBOL(pci_scan_slot);
 EXPORT_SYMBOL(pci_scan_slot);
 EXPORT_SYMBOL(pci_scan_bridge);
 EXPORT_SYMBOL(pci_scan_bridge);

+ 182 - 39
drivers/pci/quirks.c

@@ -24,6 +24,7 @@
 #include <linux/kallsyms.h>
 #include <linux/kallsyms.h>
 #include <linux/dmi.h>
 #include <linux/dmi.h>
 #include <linux/pci-aspm.h>
 #include <linux/pci-aspm.h>
+#include <linux/ioport.h>
 #include "pci.h"
 #include "pci.h"
 
 
 int isa_dma_bridge_buggy;
 int isa_dma_bridge_buggy;
@@ -34,6 +35,65 @@ int pcie_mch_quirk;
 EXPORT_SYMBOL(pcie_mch_quirk);
 EXPORT_SYMBOL(pcie_mch_quirk);
 
 
 #ifdef CONFIG_PCI_QUIRKS
 #ifdef CONFIG_PCI_QUIRKS
+/*
+ * This quirk function disables the device and releases resources
+ * which is specified by kernel's boot parameter 'pci=resource_alignment='.
+ * It also rounds up size to specified alignment.
+ * Later on, the kernel will assign page-aligned memory resource back
+ * to that device.
+ */
+static void __devinit quirk_resource_alignment(struct pci_dev *dev)
+{
+	int i;
+	struct resource *r;
+	resource_size_t align, size;
+
+	if (!pci_is_reassigndev(dev))
+		return;
+
+	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+	    (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) {
+		dev_warn(&dev->dev,
+			"Can't reassign resources to host bridge.\n");
+		return;
+	}
+
+	dev_info(&dev->dev, "Disabling device and release resources.\n");
+	pci_disable_device(dev);
+
+	align = pci_specified_resource_alignment(dev);
+	for (i=0; i < PCI_BRIDGE_RESOURCES; i++) {
+		r = &dev->resource[i];
+		if (!(r->flags & IORESOURCE_MEM))
+			continue;
+		size = resource_size(r);
+		if (size < align) {
+			size = align;
+			dev_info(&dev->dev,
+				"Rounding up size of resource #%d to %#llx.\n",
+				i, (unsigned long long)size);
+		}
+		r->end = size - 1;
+		r->start = 0;
+	}
+	/* Need to disable bridge's resource window,
+	 * to enable the kernel to reassign new resource
+	 * window later on.
+	 */
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+	    (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
+			r = &dev->resource[i];
+			if (!(r->flags & IORESOURCE_MEM))
+				continue;
+			r->end = resource_size(r) - 1;
+			r->start = 0;
+		}
+		pci_disable_bridge_window(dev);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment);
+
 /* The Mellanox Tavor device gives false positive parity errors
 /* The Mellanox Tavor device gives false positive parity errors
  * Mark this device with a broken_parity_status, to allow
  * Mark this device with a broken_parity_status, to allow
  * PCI scanning code to "skip" this now blacklisted device.
  * PCI scanning code to "skip" this now blacklisted device.
@@ -1126,10 +1186,15 @@ static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev)
 				 * its on-board VGA controller */
 				 * its on-board VGA controller */
 				asus_hides_smbus = 1;
 				asus_hides_smbus = 1;
 			}
 			}
-		else if (dev->device == PCI_DEVICE_ID_INTEL_82845G_IG)
+		else if (dev->device == PCI_DEVICE_ID_INTEL_82801DB_2)
 			switch(dev->subsystem_device) {
 			switch(dev->subsystem_device) {
 			case 0x00b8: /* Compaq Evo D510 CMT */
 			case 0x00b8: /* Compaq Evo D510 CMT */
 			case 0x00b9: /* Compaq Evo D510 SFF */
 			case 0x00b9: /* Compaq Evo D510 SFF */
+				/* Motherboard doesn't have Host bridge
+				 * subvendor/subdevice IDs and on-board VGA
+				 * controller is disabled if an AGP card is
+				 * inserted, therefore checking USB UHCI
+				 * Controller #1 */
 				asus_hides_smbus = 1;
 				asus_hides_smbus = 1;
 			}
 			}
 		else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC)
 		else if (dev->device == PCI_DEVICE_ID_INTEL_82815_CGC)
@@ -1154,7 +1219,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82855GM_HB,	as
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82915GM_HB, asus_hides_smbus_hostbridge);
 
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82810_IG3,	asus_hides_smbus_hostbridge);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82810_IG3,	asus_hides_smbus_hostbridge);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82845G_IG,	asus_hides_smbus_hostbridge);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82801DB_2,	asus_hides_smbus_hostbridge);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82815_CGC,	asus_hides_smbus_hostbridge);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82815_CGC,	asus_hides_smbus_hostbridge);
 
 
 static void asus_hides_smbus_lpc(struct pci_dev *dev)
 static void asus_hides_smbus_lpc(struct pci_dev *dev)
@@ -1664,9 +1729,13 @@ static void __devinit quirk_netmos(struct pci_dev *dev)
 	 * of parallel ports and <S> is the number of serial ports.
 	 * of parallel ports and <S> is the number of serial ports.
 	 */
 	 */
 	switch (dev->device) {
 	switch (dev->device) {
+	case PCI_DEVICE_ID_NETMOS_9835:
+		/* Well, this rule doesn't hold for the following 9835 device */
+		if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM &&
+				dev->subsystem_device == 0x0299)
+			return;
 	case PCI_DEVICE_ID_NETMOS_9735:
 	case PCI_DEVICE_ID_NETMOS_9735:
 	case PCI_DEVICE_ID_NETMOS_9745:
 	case PCI_DEVICE_ID_NETMOS_9745:
-	case PCI_DEVICE_ID_NETMOS_9835:
 	case PCI_DEVICE_ID_NETMOS_9845:
 	case PCI_DEVICE_ID_NETMOS_9845:
 	case PCI_DEVICE_ID_NETMOS_9855:
 	case PCI_DEVICE_ID_NETMOS_9855:
 		if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_SERIAL &&
 		if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_SERIAL &&
@@ -2078,6 +2147,92 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
 			PCI_DEVICE_ID_NVIDIA_NVENET_15,
 			PCI_DEVICE_ID_NVIDIA_NVENET_15,
 			nvenet_msi_disable);
 			nvenet_msi_disable);
 
 
+static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
+{
+	int pos, ttl = 48;
+	int found = 0;
+
+	/* check if there is HT MSI cap or enabled on this device */
+	pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
+	while (pos && ttl--) {
+		u8 flags;
+
+		if (found < 1)
+			found = 1;
+		if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
+					 &flags) == 0) {
+			if (flags & HT_MSI_FLAGS_ENABLE) {
+				if (found < 2) {
+					found = 2;
+					break;
+				}
+			}
+		}
+		pos = pci_find_next_ht_capability(dev, pos,
+						  HT_CAPTYPE_MSI_MAPPING);
+	}
+
+	return found;
+}
+
+static int __devinit host_bridge_with_leaf(struct pci_dev *host_bridge)
+{
+	struct pci_dev *dev;
+	int pos;
+	int i, dev_no;
+	int found = 0;
+
+	dev_no = host_bridge->devfn >> 3;
+	for (i = dev_no + 1; i < 0x20; i++) {
+		dev = pci_get_slot(host_bridge->bus, PCI_DEVFN(i, 0));
+		if (!dev)
+			continue;
+
+		/* found next host bridge ?*/
+		pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE);
+		if (pos != 0) {
+			pci_dev_put(dev);
+			break;
+		}
+
+		if (ht_check_msi_mapping(dev)) {
+			found = 1;
+			pci_dev_put(dev);
+			break;
+		}
+		pci_dev_put(dev);
+	}
+
+	return found;
+}
+
+#define PCI_HT_CAP_SLAVE_CTRL0     4    /* link control */
+#define PCI_HT_CAP_SLAVE_CTRL1     8    /* link control to */
+
+static int __devinit is_end_of_ht_chain(struct pci_dev *dev)
+{
+	int pos, ctrl_off;
+	int end = 0;
+	u16 flags, ctrl;
+
+	pos = pci_find_ht_capability(dev, HT_CAPTYPE_SLAVE);
+
+	if (!pos)
+		goto out;
+
+	pci_read_config_word(dev, pos + PCI_CAP_FLAGS, &flags);
+
+	ctrl_off = ((flags >> 10) & 1) ?
+			PCI_HT_CAP_SLAVE_CTRL0 : PCI_HT_CAP_SLAVE_CTRL1;
+	pci_read_config_word(dev, pos + ctrl_off, &ctrl);
+
+	if (ctrl & (1 << 6))
+		end = 1;
+
+out:
+	return end;
+}
+
 static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev)
 static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev)
 {
 {
 	struct pci_dev *host_bridge;
 	struct pci_dev *host_bridge;
@@ -2102,6 +2257,11 @@ static void __devinit nv_ht_enable_msi_mapping(struct pci_dev *dev)
 	if (!found)
 	if (!found)
 		return;
 		return;
 
 
+	/* don't enable end_device/host_bridge with leaf directly here */
+	if (host_bridge == dev && is_end_of_ht_chain(host_bridge) &&
+	    host_bridge_with_leaf(host_bridge))
+		goto out;
+
 	/* root did that ! */
 	/* root did that ! */
 	if (msi_ht_cap_enabled(host_bridge))
 	if (msi_ht_cap_enabled(host_bridge))
 		goto out;
 		goto out;
@@ -2132,44 +2292,12 @@ static void __devinit ht_disable_msi_mapping(struct pci_dev *dev)
 	}
 	}
 }
 }
 
 
-static int __devinit ht_check_msi_mapping(struct pci_dev *dev)
-{
-	int pos, ttl = 48;
-	int found = 0;
-
-	/* check if there is HT MSI cap or enabled on this device */
-	pos = pci_find_ht_capability(dev, HT_CAPTYPE_MSI_MAPPING);
-	while (pos && ttl--) {
-		u8 flags;
-
-		if (found < 1)
-			found = 1;
-		if (pci_read_config_byte(dev, pos + HT_MSI_FLAGS,
-					 &flags) == 0) {
-			if (flags & HT_MSI_FLAGS_ENABLE) {
-				if (found < 2) {
-					found = 2;
-					break;
-				}
-			}
-		}
-		pos = pci_find_next_ht_capability(dev, pos,
-						  HT_CAPTYPE_MSI_MAPPING);
-	}
-
-	return found;
-}
-
-static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
+static void __devinit __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all)
 {
 {
 	struct pci_dev *host_bridge;
 	struct pci_dev *host_bridge;
 	int pos;
 	int pos;
 	int found;
 	int found;
 
 
-	/* Enabling HT MSI mapping on this device breaks MCP51 */
-	if (dev->device == 0x270)
-		return;
-
 	/* check if there is HT MSI cap or enabled on this device */
 	/* check if there is HT MSI cap or enabled on this device */
 	found = ht_check_msi_mapping(dev);
 	found = ht_check_msi_mapping(dev);
 
 
@@ -2193,7 +2321,10 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
 		/* Host bridge is to HT */
 		/* Host bridge is to HT */
 		if (found == 1) {
 		if (found == 1) {
 			/* it is not enabled, try to enable it */
 			/* it is not enabled, try to enable it */
-			nv_ht_enable_msi_mapping(dev);
+			if (all)
+				ht_enable_msi_mapping(dev);
+			else
+				nv_ht_enable_msi_mapping(dev);
 		}
 		}
 		return;
 		return;
 	}
 	}
@@ -2205,8 +2336,20 @@ static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
 	/* Host bridge is not to HT, disable HT MSI mapping on this device */
 	/* Host bridge is not to HT, disable HT MSI mapping on this device */
 	ht_disable_msi_mapping(dev);
 	ht_disable_msi_mapping(dev);
 }
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk);
+
+static void __devinit nv_msi_ht_cap_quirk_all(struct pci_dev *dev)
+{
+	return __nv_msi_ht_cap_quirk(dev, 1);
+}
+
+static void __devinit nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev)
+{
+	return __nv_msi_ht_cap_quirk(dev, 0);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf);
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all);
 
 
 static void __devinit quirk_msi_intx_disable_bug(struct pci_dev *dev)
 static void __devinit quirk_msi_intx_disable_bug(struct pci_dev *dev)
 {
 {

+ 4 - 0
drivers/pci/remove.c

@@ -71,6 +71,9 @@ void pci_remove_bus(struct pci_bus *pci_bus)
 	down_write(&pci_bus_sem);
 	down_write(&pci_bus_sem);
 	list_del(&pci_bus->node);
 	list_del(&pci_bus->node);
 	up_write(&pci_bus_sem);
 	up_write(&pci_bus_sem);
+	if (!pci_bus->is_added)
+		return;
+
 	pci_remove_legacy_files(pci_bus);
 	pci_remove_legacy_files(pci_bus);
 	device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
 	device_remove_file(&pci_bus->dev, &dev_attr_cpuaffinity);
 	device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
 	device_remove_file(&pci_bus->dev, &dev_attr_cpulistaffinity);
@@ -92,6 +95,7 @@ EXPORT_SYMBOL(pci_remove_bus);
  */
  */
 void pci_remove_bus_device(struct pci_dev *dev)
 void pci_remove_bus_device(struct pci_dev *dev)
 {
 {
+	pci_stop_bus_device(dev);
 	if (dev->subordinate) {
 	if (dev->subordinate) {
 		struct pci_bus *b = dev->subordinate;
 		struct pci_bus *b = dev->subordinate;
 
 

+ 1 - 1
drivers/pci/search.c

@@ -29,7 +29,7 @@ pci_find_upstream_pcie_bridge(struct pci_dev *pdev)
 	if (pdev->is_pcie)
 	if (pdev->is_pcie)
 		return NULL;
 		return NULL;
 	while (1) {
 	while (1) {
-		if (!pdev->bus->self)
+		if (!pdev->bus->parent)
 			break;
 			break;
 		pdev = pdev->bus->self;
 		pdev = pdev->bus->self;
 		/* a p2p bridge */
 		/* a p2p bridge */

+ 5 - 2
drivers/pci/setup-bus.c

@@ -27,7 +27,7 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 
 
 
 
-static void pbus_assign_resources_sorted(struct pci_bus *bus)
+static void pbus_assign_resources_sorted(const struct pci_bus *bus)
 {
 {
 	struct pci_dev *dev;
 	struct pci_dev *dev;
 	struct resource *res;
 	struct resource *res;
@@ -144,6 +144,9 @@ static void pci_setup_bridge(struct pci_bus *bus)
 	struct pci_bus_region region;
 	struct pci_bus_region region;
 	u32 l, bu, lu, io_upper16;
 	u32 l, bu, lu, io_upper16;
 
 
+	if (!pci_is_root_bus(bus) && bus->is_added)
+		return;
+
 	dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
 	dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n",
 		 pci_domain_nr(bus), bus->number);
 		 pci_domain_nr(bus), bus->number);
 
 
@@ -495,7 +498,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
 }
 }
 EXPORT_SYMBOL(pci_bus_size_bridges);
 EXPORT_SYMBOL(pci_bus_size_bridges);
 
 
-void __ref pci_bus_assign_resources(struct pci_bus *bus)
+void __ref pci_bus_assign_resources(const struct pci_bus *bus)
 {
 {
 	struct pci_bus *b;
 	struct pci_bus *b;
 	struct pci_dev *dev;
 	struct pci_dev *dev;

+ 15 - 0
drivers/pci/setup-res.c

@@ -120,6 +120,21 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
 	return err;
 	return err;
 }
 }
 
 
+#ifdef CONFIG_PCI_QUIRKS
+void pci_disable_bridge_window(struct pci_dev *dev)
+{
+	dev_dbg(&dev->dev, "Disabling bridge window.\n");
+
+	/* MMIO Base/Limit */
+	pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
+
+	/* Prefetchable MMIO Base/Limit */
+	pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
+	pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
+}
+#endif	/* CONFIG_PCI_QUIRKS */
+
 int pci_assign_resource(struct pci_dev *dev, int resno)
 int pci_assign_resource(struct pci_dev *dev, int resno)
 {
 {
 	struct pci_bus *bus = dev->bus;
 	struct pci_bus *bus = dev->bus;

+ 8 - 10
drivers/pci/slot.c

@@ -1,8 +1,8 @@
 /*
 /*
  * drivers/pci/slot.c
  * drivers/pci/slot.c
  * Copyright (C) 2006 Matthew Wilcox <matthew@wil.cx>
  * Copyright (C) 2006 Matthew Wilcox <matthew@wil.cx>
- * Copyright (C) 2006-2008 Hewlett-Packard Development Company, L.P.
- * 	Alex Chiang <achiang@hp.com>
+ * Copyright (C) 2006-2009 Hewlett-Packard Development Company, L.P.
+ *	Alex Chiang <achiang@hp.com>
  */
  */
 
 
 #include <linux/kobject.h>
 #include <linux/kobject.h>
@@ -52,8 +52,8 @@ static void pci_slot_release(struct kobject *kobj)
 	struct pci_dev *dev;
 	struct pci_dev *dev;
 	struct pci_slot *slot = to_pci_slot(kobj);
 	struct pci_slot *slot = to_pci_slot(kobj);
 
 
-	pr_debug("%s: releasing pci_slot on %x:%d\n", __func__,
-		 slot->bus->number, slot->number);
+	dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n",
+		slot->number, pci_slot_name(slot));
 
 
 	list_for_each_entry(dev, &slot->bus->devices, bus_list)
 	list_for_each_entry(dev, &slot->bus->devices, bus_list)
 		if (PCI_SLOT(dev->devfn) == slot->number)
 		if (PCI_SLOT(dev->devfn) == slot->number)
@@ -248,9 +248,8 @@ placeholder:
 		if (PCI_SLOT(dev->devfn) == slot_nr)
 		if (PCI_SLOT(dev->devfn) == slot_nr)
 			dev->slot = slot;
 			dev->slot = slot;
 
 
-	/* Don't care if debug printk has a -1 for slot_nr */
-	pr_debug("%s: created pci_slot on %04x:%02x:%02x\n",
-		 __func__, pci_domain_nr(parent), parent->number, slot_nr);
+	dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n",
+		slot_nr, pci_slot_name(slot));
 
 
 out:
 out:
 	kfree(slot_name);
 	kfree(slot_name);
@@ -299,9 +298,8 @@ EXPORT_SYMBOL_GPL(pci_renumber_slot);
  */
  */
 void pci_destroy_slot(struct pci_slot *slot)
 void pci_destroy_slot(struct pci_slot *slot)
 {
 {
-	pr_debug("%s: dec refcount to %d on %04x:%02x:%02x\n", __func__,
-		 atomic_read(&slot->kobj.kref.refcount) - 1,
-		 pci_domain_nr(slot->bus), slot->bus->number, slot->number);
+	dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n",
+		slot->number, atomic_read(&slot->kobj.kref.refcount) - 1);
 
 
 	down_write(&pci_bus_sem);
 	down_write(&pci_bus_sem);
 	kobject_put(&slot->kobj);
 	kobject_put(&slot->kobj);

+ 34 - 0
include/linux/acpi.h

@@ -257,6 +257,40 @@ void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
 void __init acpi_old_suspend_ordering(void);
 void __init acpi_s4_no_nvs(void);
 void __init acpi_s4_no_nvs(void);
 #endif /* CONFIG_PM_SLEEP */
 #endif /* CONFIG_PM_SLEEP */
+
+#define OSC_QUERY_TYPE			0
+#define OSC_SUPPORT_TYPE 		1
+#define OSC_CONTROL_TYPE		2
+#define OSC_SUPPORT_MASKS		0x1f
+
+/* _OSC DW0 Definition */
+#define OSC_QUERY_ENABLE		1
+#define OSC_REQUEST_ERROR		2
+#define OSC_INVALID_UUID_ERROR		4
+#define OSC_INVALID_REVISION_ERROR	8
+#define OSC_CAPABILITIES_MASK_ERROR	16
+
+/* _OSC DW1 Definition (OS Support Fields) */
+#define OSC_EXT_PCI_CONFIG_SUPPORT		1
+#define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
+#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT	4
+#define OSC_PCI_SEGMENT_GROUPS_SUPPORT		8
+#define OSC_MSI_SUPPORT				16
+
+/* _OSC DW1 Definition (OS Control Fields) */
+#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	1
+#define OSC_SHPC_NATIVE_HP_CONTROL 		2
+#define OSC_PCI_EXPRESS_PME_CONTROL		4
+#define OSC_PCI_EXPRESS_AER_CONTROL		8
+#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	16
+
+#define OSC_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
+				OSC_SHPC_NATIVE_HP_CONTROL | 		\
+				OSC_PCI_EXPRESS_PME_CONTROL |		\
+				OSC_PCI_EXPRESS_AER_CONTROL |		\
+				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
+
+extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
 #else	/* CONFIG_ACPI */
 #else	/* CONFIG_ACPI */
 
 
 static inline int early_acpi_boot_init(void)
 static inline int early_acpi_boot_init(void)

+ 8 - 5
include/linux/msi.h

@@ -20,20 +20,23 @@ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
 
 
 struct msi_desc {
 struct msi_desc {
 	struct {
 	struct {
-		__u8	type	: 5; 	/* {0: unused, 5h:MSI, 11h:MSI-X} */
+		__u8	is_msix	: 1;
+		__u8	multiple: 3;	/* log2 number of messages */
 		__u8	maskbit	: 1; 	/* mask-pending bit supported ?   */
 		__u8	maskbit	: 1; 	/* mask-pending bit supported ?   */
-		__u8	masked	: 1;
 		__u8	is_64	: 1;	/* Address size: 0=32bit 1=64bit  */
 		__u8	is_64	: 1;	/* Address size: 0=32bit 1=64bit  */
 		__u8	pos;	 	/* Location of the msi capability */
 		__u8	pos;	 	/* Location of the msi capability */
-		__u32	maskbits_mask;  /* mask bits mask */
 		__u16	entry_nr;    	/* specific enabled entry 	  */
 		__u16	entry_nr;    	/* specific enabled entry 	  */
 		unsigned default_irq;	/* default pre-assigned irq	  */
 		unsigned default_irq;	/* default pre-assigned irq	  */
-	}msi_attrib;
+	} msi_attrib;
 
 
+	u32 masked;			/* mask bits */
 	unsigned int irq;
 	unsigned int irq;
 	struct list_head list;
 	struct list_head list;
 
 
-	void __iomem *mask_base;
+	union {
+		void __iomem *mask_base;
+		u8 mask_pos;
+	};
 	struct pci_dev *dev;
 	struct pci_dev *dev;
 
 
 	/* Last set MSI message */
 	/* Last set MSI message */

+ 10 - 57
include/linux/pci-acpi.h

@@ -10,72 +10,25 @@
 
 
 #include <linux/acpi.h>
 #include <linux/acpi.h>
 
 
-#define OSC_QUERY_TYPE			0
-#define OSC_SUPPORT_TYPE 		1
-#define OSC_CONTROL_TYPE		2
-#define OSC_SUPPORT_MASKS		0x1f
-
-/*
- * _OSC DW0 Definition 
- */
-#define OSC_QUERY_ENABLE		1
-#define OSC_REQUEST_ERROR		2
-#define OSC_INVALID_UUID_ERROR		4
-#define OSC_INVALID_REVISION_ERROR	8
-#define OSC_CAPABILITIES_MASK_ERROR	16
-
-/*
- * _OSC DW1 Definition (OS Support Fields)
- */
-#define OSC_EXT_PCI_CONFIG_SUPPORT		1
-#define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
-#define OSC_CLOCK_PWR_CAPABILITY_SUPPORT	4
-#define OSC_PCI_SEGMENT_GROUPS_SUPPORT		8
-#define OSC_MSI_SUPPORT				16
-
-/*
- * _OSC DW1 Definition (OS Control Fields)
- */
-#define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	1
-#define OSC_SHPC_NATIVE_HP_CONTROL 		2
-#define OSC_PCI_EXPRESS_PME_CONTROL		4
-#define OSC_PCI_EXPRESS_AER_CONTROL		8
-#define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	16
-
-#define OSC_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
-				OSC_SHPC_NATIVE_HP_CONTROL | 		\
-				OSC_PCI_EXPRESS_PME_CONTROL |		\
-				OSC_PCI_EXPRESS_AER_CONTROL |		\
-				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
-
 #ifdef CONFIG_ACPI
 #ifdef CONFIG_ACPI
-extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags);
-int pci_acpi_osc_support(acpi_handle handle, u32 flags);
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 {
 {
-	/* Find root host bridge */
-	while (pdev->bus->self)
-		pdev = pdev->bus->self;
-
-	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus),
-			pdev->bus->number);
+	struct pci_bus *pbus = pdev->bus;
+	/* Find a PCI root bus */
+	while (pbus->parent)
+		pbus = pbus->parent;
+	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus),
+					      pbus->number);
 }
 }
 
 
 static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
 static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
 {
 {
-	int seg = pci_domain_nr(pbus), busnr = pbus->number;
-	struct pci_dev *bridge = pbus->self;
-	if (bridge)
-		return DEVICE_ACPI_HANDLE(&(bridge->dev));
-	return acpi_get_pci_rootbridge_handle(seg, busnr);
+	if (pbus->parent)
+		return DEVICE_ACPI_HANDLE(&(pbus->self->dev));
+	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus),
+					      pbus->number);
 }
 }
 #else
 #else
-#if !defined(AE_ERROR)
-typedef u32 		acpi_status;
-#define AE_ERROR      	(acpi_status) (0x0001)
-#endif    
-static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
-{return AE_ERROR;}
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 { return NULL; }
 { return NULL; }
 #endif
 #endif

+ 57 - 4
include/linux/pci.h

@@ -52,6 +52,7 @@
 #include <asm/atomic.h>
 #include <asm/atomic.h>
 #include <linux/device.h>
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/io.h>
+#include <linux/irqreturn.h>
 
 
 /* Include the ID list */
 /* Include the ID list */
 #include <linux/pci_ids.h>
 #include <linux/pci_ids.h>
@@ -93,6 +94,12 @@ enum {
 	/* #6: expansion ROM resource */
 	/* #6: expansion ROM resource */
 	PCI_ROM_RESOURCE,
 	PCI_ROM_RESOURCE,
 
 
+	/* device specific resources */
+#ifdef CONFIG_PCI_IOV
+	PCI_IOV_RESOURCES,
+	PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1,
+#endif
+
 	/* resources assigned to buses behind the bridge */
 	/* resources assigned to buses behind the bridge */
 #define PCI_BRIDGE_RESOURCE_NUM 4
 #define PCI_BRIDGE_RESOURCE_NUM 4
 
 
@@ -180,6 +187,7 @@ struct pci_cap_saved_state {
 
 
 struct pcie_link_state;
 struct pcie_link_state;
 struct pci_vpd;
 struct pci_vpd;
+struct pci_sriov;
 
 
 /*
 /*
  * The pci_dev structure is used to describe PCI devices.
  * The pci_dev structure is used to describe PCI devices.
@@ -257,6 +265,8 @@ struct pci_dev {
 	unsigned int	is_managed:1;
 	unsigned int	is_managed:1;
 	unsigned int	is_pcie:1;
 	unsigned int	is_pcie:1;
 	unsigned int	state_saved:1;
 	unsigned int	state_saved:1;
+	unsigned int	is_physfn:1;
+	unsigned int	is_virtfn:1;
 	pci_dev_flags_t dev_flags;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
 
@@ -270,6 +280,12 @@ struct pci_dev {
 	struct list_head msi_list;
 	struct list_head msi_list;
 #endif
 #endif
 	struct pci_vpd *vpd;
 	struct pci_vpd *vpd;
+#ifdef CONFIG_PCI_IOV
+	union {
+		struct pci_sriov *sriov;	/* SR-IOV capability related */
+		struct pci_dev *physfn;	/* the PF this VF is associated with */
+	};
+#endif
 };
 };
 
 
 extern struct pci_dev *alloc_pci_dev(void);
 extern struct pci_dev *alloc_pci_dev(void);
@@ -341,6 +357,15 @@ struct pci_bus {
 #define pci_bus_b(n)	list_entry(n, struct pci_bus, node)
 #define pci_bus_b(n)	list_entry(n, struct pci_bus, node)
 #define to_pci_bus(n)	container_of(n, struct pci_bus, dev)
 #define to_pci_bus(n)	container_of(n, struct pci_bus, dev)
 
 
+/*
+ * Returns true if the pci bus is root (behind host-pci bridge),
+ * false otherwise
+ */
+static inline bool pci_is_root_bus(struct pci_bus *pbus)
+{
+	return !(pbus->parent);
+}
+
 #ifdef CONFIG_PCI_MSI
 #ifdef CONFIG_PCI_MSI
 static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
 static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
 {
 {
@@ -528,7 +553,7 @@ void pcibios_update_irq(struct pci_dev *, int irq);
 /* Generic PCI functions used internally */
 /* Generic PCI functions used internally */
 
 
 extern struct pci_bus *pci_find_bus(int domain, int busnr);
 extern struct pci_bus *pci_find_bus(int domain, int busnr);
-void pci_bus_add_devices(struct pci_bus *bus);
+void pci_bus_add_devices(const struct pci_bus *bus);
 struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus,
 struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus,
 				      struct pci_ops *ops, void *sysdata);
 				      struct pci_ops *ops, void *sysdata);
 static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
 static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
@@ -702,6 +727,9 @@ int pci_back_from_sleep(struct pci_dev *dev);
 
 
 /* Functions for PCI Hotplug drivers to use */
 /* Functions for PCI Hotplug drivers to use */
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
+#ifdef CONFIG_HOTPLUG
+unsigned int pci_rescan_bus(struct pci_bus *bus);
+#endif
 
 
 /* Vital product data routines */
 /* Vital product data routines */
 ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
 ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
@@ -709,7 +737,7 @@ ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void
 int pci_vpd_truncate(struct pci_dev *dev, size_t size);
 int pci_vpd_truncate(struct pci_dev *dev, size_t size);
 
 
 /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
 /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
-void pci_bus_assign_resources(struct pci_bus *bus);
+void pci_bus_assign_resources(const struct pci_bus *bus);
 void pci_bus_size_bridges(struct pci_bus *bus);
 void pci_bus_size_bridges(struct pci_bus *bus);
 int pci_claim_resource(struct pci_dev *, int);
 int pci_claim_resource(struct pci_dev *, int);
 void pci_assign_unassigned_resources(void);
 void pci_assign_unassigned_resources(void);
@@ -790,7 +818,7 @@ struct msix_entry {
 
 
 
 
 #ifndef CONFIG_PCI_MSI
 #ifndef CONFIG_PCI_MSI
-static inline int pci_enable_msi(struct pci_dev *dev)
+static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 {
 {
 	return -1;
 	return -1;
 }
 }
@@ -800,6 +828,10 @@ static inline void pci_msi_shutdown(struct pci_dev *dev)
 static inline void pci_disable_msi(struct pci_dev *dev)
 static inline void pci_disable_msi(struct pci_dev *dev)
 { }
 { }
 
 
+static inline int pci_msix_table_size(struct pci_dev *dev)
+{
+	return 0;
+}
 static inline int pci_enable_msix(struct pci_dev *dev,
 static inline int pci_enable_msix(struct pci_dev *dev,
 				  struct msix_entry *entries, int nvec)
 				  struct msix_entry *entries, int nvec)
 {
 {
@@ -821,9 +853,10 @@ static inline int pci_msi_enabled(void)
 	return 0;
 	return 0;
 }
 }
 #else
 #else
-extern int pci_enable_msi(struct pci_dev *dev);
+extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
 extern void pci_msi_shutdown(struct pci_dev *dev);
 extern void pci_msi_shutdown(struct pci_dev *dev);
 extern void pci_disable_msi(struct pci_dev *dev);
 extern void pci_disable_msi(struct pci_dev *dev);
+extern int pci_msix_table_size(struct pci_dev *dev);
 extern int pci_enable_msix(struct pci_dev *dev,
 extern int pci_enable_msix(struct pci_dev *dev,
 	struct msix_entry *entries, int nvec);
 	struct msix_entry *entries, int nvec);
 extern void pci_msix_shutdown(struct pci_dev *dev);
 extern void pci_msix_shutdown(struct pci_dev *dev);
@@ -842,6 +875,8 @@ static inline int pcie_aspm_enabled(void)
 extern int pcie_aspm_enabled(void);
 extern int pcie_aspm_enabled(void);
 #endif
 #endif
 
 
+#define pci_enable_msi(pdev)	pci_enable_msi_block(pdev, 1)
+
 #ifdef CONFIG_HT_IRQ
 #ifdef CONFIG_HT_IRQ
 /* The functions a driver should call */
 /* The functions a driver should call */
 int  ht_create_irq(struct pci_dev *dev, int idx);
 int  ht_create_irq(struct pci_dev *dev, int idx);
@@ -1195,5 +1230,23 @@ int pci_ext_cfg_avail(struct pci_dev *dev);
 
 
 void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
 void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
 
 
+#ifdef CONFIG_PCI_IOV
+extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+extern void pci_disable_sriov(struct pci_dev *dev);
+extern irqreturn_t pci_sriov_migration(struct pci_dev *dev);
+#else
+static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+	return -ENODEV;
+}
+static inline void pci_disable_sriov(struct pci_dev *dev)
+{
+}
+static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev)
+{
+	return IRQ_NONE;
+}
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
 #endif /* LINUX_PCI_H */

+ 1 - 0
include/linux/pci_ids.h

@@ -2396,6 +2396,7 @@
 #define PCI_DEVICE_ID_INTEL_82801CA_12	0x248c
 #define PCI_DEVICE_ID_INTEL_82801CA_12	0x248c
 #define PCI_DEVICE_ID_INTEL_82801DB_0	0x24c0
 #define PCI_DEVICE_ID_INTEL_82801DB_0	0x24c0
 #define PCI_DEVICE_ID_INTEL_82801DB_1	0x24c1
 #define PCI_DEVICE_ID_INTEL_82801DB_1	0x24c1
+#define PCI_DEVICE_ID_INTEL_82801DB_2	0x24c2
 #define PCI_DEVICE_ID_INTEL_82801DB_3	0x24c3
 #define PCI_DEVICE_ID_INTEL_82801DB_3	0x24c3
 #define PCI_DEVICE_ID_INTEL_82801DB_5	0x24c5
 #define PCI_DEVICE_ID_INTEL_82801DB_5	0x24c5
 #define PCI_DEVICE_ID_INTEL_82801DB_6	0x24c6
 #define PCI_DEVICE_ID_INTEL_82801DB_6	0x24c6

+ 36 - 1
include/linux/pci_regs.h

@@ -235,7 +235,7 @@
 #define  PCI_PM_CAP_PME_SHIFT	11	/* Start of the PME Mask in PMC */
 #define  PCI_PM_CAP_PME_SHIFT	11	/* Start of the PME Mask in PMC */
 #define PCI_PM_CTRL		4	/* PM control and status register */
 #define PCI_PM_CTRL		4	/* PM control and status register */
 #define  PCI_PM_CTRL_STATE_MASK	0x0003	/* Current power state (D0 to D3) */
 #define  PCI_PM_CTRL_STATE_MASK	0x0003	/* Current power state (D0 to D3) */
-#define  PCI_PM_CTRL_NO_SOFT_RESET	0x0004	/* No reset for D3hot->D0 */
+#define  PCI_PM_CTRL_NO_SOFT_RESET	0x0008	/* No reset for D3hot->D0 */
 #define  PCI_PM_CTRL_PME_ENABLE	0x0100	/* PME pin enable */
 #define  PCI_PM_CTRL_PME_ENABLE	0x0100	/* PME pin enable */
 #define  PCI_PM_CTRL_DATA_SEL_MASK	0x1e00	/* Data select (??) */
 #define  PCI_PM_CTRL_DATA_SEL_MASK	0x1e00	/* Data select (??) */
 #define  PCI_PM_CTRL_DATA_SCALE_MASK	0x6000	/* Data scale (??) */
 #define  PCI_PM_CTRL_DATA_SCALE_MASK	0x6000	/* Data scale (??) */
@@ -375,6 +375,7 @@
 #define  PCI_EXP_TYPE_UPSTREAM	0x5	/* Upstream Port */
 #define  PCI_EXP_TYPE_UPSTREAM	0x5	/* Upstream Port */
 #define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
 #define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
 #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
 #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
+#define  PCI_EXP_TYPE_RC_END	0x9	/* Root Complex Integrated Endpoint */
 #define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
 #define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
 #define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
 #define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
 #define PCI_EXP_DEVCAP		4	/* Device capabilities */
 #define PCI_EXP_DEVCAP		4	/* Device capabilities */
@@ -487,6 +488,8 @@
 #define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
 #define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
 #define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
+#define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
+#define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */
 
 
 /* Extended Capabilities (PCI-X 2.0 and Express) */
 /* Extended Capabilities (PCI-X 2.0 and Express) */
 #define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
 #define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
@@ -498,6 +501,7 @@
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
 #define PCI_EXT_CAP_ID_PWR	4
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ARI	14
+#define PCI_EXT_CAP_ID_SRIOV	16
 
 
 /* Advanced Error Reporting */
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -615,4 +619,35 @@
 #define  PCI_ARI_CTRL_ACS	0x0002	/* ACS Function Groups Enable */
 #define  PCI_ARI_CTRL_ACS	0x0002	/* ACS Function Groups Enable */
 #define  PCI_ARI_CTRL_FG(x)	(((x) >> 4) & 7) /* Function Group */
 #define  PCI_ARI_CTRL_FG(x)	(((x) >> 4) & 7) /* Function Group */
 
 
+/* Single Root I/O Virtualization */
+#define PCI_SRIOV_CAP		0x04	/* SR-IOV Capabilities */
+#define  PCI_SRIOV_CAP_VFM	0x01	/* VF Migration Capable */
+#define  PCI_SRIOV_CAP_INTR(x)	((x) >> 21) /* Interrupt Message Number */
+#define PCI_SRIOV_CTRL		0x08	/* SR-IOV Control */
+#define  PCI_SRIOV_CTRL_VFE	0x01	/* VF Enable */
+#define  PCI_SRIOV_CTRL_VFM	0x02	/* VF Migration Enable */
+#define  PCI_SRIOV_CTRL_INTR	0x04	/* VF Migration Interrupt Enable */
+#define  PCI_SRIOV_CTRL_MSE	0x08	/* VF Memory Space Enable */
+#define  PCI_SRIOV_CTRL_ARI	0x10	/* ARI Capable Hierarchy */
+#define PCI_SRIOV_STATUS	0x0a	/* SR-IOV Status */
+#define  PCI_SRIOV_STATUS_VFM	0x01	/* VF Migration Status */
+#define PCI_SRIOV_INITIAL_VF	0x0c	/* Initial VFs */
+#define PCI_SRIOV_TOTAL_VF	0x0e	/* Total VFs */
+#define PCI_SRIOV_NUM_VF	0x10	/* Number of VFs */
+#define PCI_SRIOV_FUNC_LINK	0x12	/* Function Dependency Link */
+#define PCI_SRIOV_VF_OFFSET	0x14	/* First VF Offset */
+#define PCI_SRIOV_VF_STRIDE	0x16	/* Following VF Stride */
+#define PCI_SRIOV_VF_DID	0x1a	/* VF Device ID */
+#define PCI_SRIOV_SUP_PGSIZE	0x1c	/* Supported Page Sizes */
+#define PCI_SRIOV_SYS_PGSIZE	0x20	/* System Page Size */
+#define PCI_SRIOV_BAR		0x24	/* VF BAR0 */
+#define  PCI_SRIOV_NUM_BARS	6	/* Number of VF BARs */
+#define PCI_SRIOV_VFM		0x3c	/* VF Migration State Array Offset*/
+#define  PCI_SRIOV_VFM_BIR(x)	((x) & 7)	/* State BIR */
+#define  PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7)	/* State Offset */
+#define  PCI_SRIOV_VFM_UA	0x0	/* Inactive.Unavailable */
+#define  PCI_SRIOV_VFM_MI	0x1	/* Dormant.MigrateIn */
+#define  PCI_SRIOV_VFM_MO	0x2	/* Active.MigrateOut */
+#define  PCI_SRIOV_VFM_AV	0x3	/* Active.Available */
+
 #endif /* LINUX_PCI_REGS_H */
 #endif /* LINUX_PCI_REGS_H */

+ 19 - 17
include/linux/pcieport_if.h

@@ -16,29 +16,30 @@
 #define PCIE_ANY_PORT			7
 #define PCIE_ANY_PORT			7
 
 
 /* Service Type */
 /* Service Type */
-#define PCIE_PORT_SERVICE_PME		1	/* Power Management Event */
-#define PCIE_PORT_SERVICE_AER		2	/* Advanced Error Reporting */
-#define PCIE_PORT_SERVICE_HP		4	/* Native Hotplug */
-#define PCIE_PORT_SERVICE_VC		8	/* Virtual Channel */
+#define PCIE_PORT_SERVICE_PME_SHIFT	0	/* Power Management Event */
+#define PCIE_PORT_SERVICE_PME		(1 << PCIE_PORT_SERVICE_PME_SHIFT)
+#define PCIE_PORT_SERVICE_AER_SHIFT	1	/* Advanced Error Reporting */
+#define PCIE_PORT_SERVICE_AER		(1 << PCIE_PORT_SERVICE_AER_SHIFT)
+#define PCIE_PORT_SERVICE_HP_SHIFT	2	/* Native Hotplug */
+#define PCIE_PORT_SERVICE_HP		(1 << PCIE_PORT_SERVICE_HP_SHIFT)
+#define PCIE_PORT_SERVICE_VC_SHIFT	3	/* Virtual Channel */
+#define PCIE_PORT_SERVICE_VC		(1 << PCIE_PORT_SERVICE_VC_SHIFT)
 
 
 /* Root/Upstream/Downstream Port's Interrupt Mode */
 /* Root/Upstream/Downstream Port's Interrupt Mode */
+#define PCIE_PORT_NO_IRQ		(-1)
 #define PCIE_PORT_INTx_MODE		0
 #define PCIE_PORT_INTx_MODE		0
 #define PCIE_PORT_MSI_MODE		1
 #define PCIE_PORT_MSI_MODE		1
 #define PCIE_PORT_MSIX_MODE		2
 #define PCIE_PORT_MSIX_MODE		2
 
 
-struct pcie_port_service_id {
-	__u32 vendor, device;		/* Vendor and device ID or PCI_ANY_ID*/
-	__u32 subvendor, subdevice;	/* Subsystem ID's or PCI_ANY_ID */
-	__u32 class, class_mask;	/* (class,subclass,prog-if) triplet */
-	__u32 port_type, service_type;	/* Port Entity */
-	kernel_ulong_t driver_data;
+struct pcie_port_data {
+	int port_type;		/* Type of the port */
+	int port_irq_mode;	/* [0:INTx | 1:MSI | 2:MSI-X] */
 };
 };
 
 
 struct pcie_device {
 struct pcie_device {
 	int 		irq;	    /* Service IRQ/MSI/MSI-X Vector */
 	int 		irq;	    /* Service IRQ/MSI/MSI-X Vector */
-	int 		interrupt_mode;	/* [0:INTx | 1:MSI | 2:MSI-X] */	
-	struct pcie_port_service_id id;	/* Service ID */
-	struct pci_dev	*port;	    /* Root/Upstream/Downstream Port */
+	struct pci_dev *port;	    /* Root/Upstream/Downstream Port */
+	u32		service;    /* Port service this device represents */
 	void		*priv_data; /* Service Private Data */
 	void		*priv_data; /* Service Private Data */
 	struct device	device;     /* Generic Device Interface */
 	struct device	device;     /* Generic Device Interface */
 };
 };
@@ -56,10 +57,9 @@ static inline void* get_service_data(struct pcie_device *dev)
 
 
 struct pcie_port_service_driver {
 struct pcie_port_service_driver {
 	const char *name;
 	const char *name;
-	int (*probe) (struct pcie_device *dev, 
-		const struct pcie_port_service_id *id);
+	int (*probe) (struct pcie_device *dev);
 	void (*remove) (struct pcie_device *dev);
 	void (*remove) (struct pcie_device *dev);
-	int (*suspend) (struct pcie_device *dev, pm_message_t state);
+	int (*suspend) (struct pcie_device *dev);
 	int (*resume) (struct pcie_device *dev);
 	int (*resume) (struct pcie_device *dev);
 
 
 	/* Service Error Recovery Handler */
 	/* Service Error Recovery Handler */
@@ -68,7 +68,9 @@ struct pcie_port_service_driver {
 	/* Link Reset Capability - AER service driver specific */
 	/* Link Reset Capability - AER service driver specific */
 	pci_ers_result_t (*reset_link) (struct pci_dev *dev);
 	pci_ers_result_t (*reset_link) (struct pci_dev *dev);
 
 
-	const struct pcie_port_service_id *id_table;
+	int port_type;  /* Type of the port this driver can handle */
+	u32 service;    /* Port service this device represents */
+
 	struct device_driver driver;
 	struct device_driver driver;
 };
 };
 #define to_service_driver(d) \
 #define to_service_driver(d) \